Read the structured invoice XML embedded inside a ZUGFeRD or Factur-X PDF. Parse it for automated accounting import.
use pdfluent::PdfDocument;
fn main() -> pdfluent::Result<()> {
let doc = PdfDocument::open("invoice.pdf")?;
if let Some(einvoice) = doc.extract_einvoice()? {
println!("Profile: {:?}", einvoice.profile());
println!("XML length: {} bytes", einvoice.xml().len());
std::fs::write("extracted-invoice.xml", einvoice.xml())?;
} else {
println!("No e-invoice XML found in this PDF.");
}
Ok(())
}XML extraction works with the base crate. The einvoice feature adds profile detection and validation helpers.
# Cargo.toml
[dependencies]
pdfluent = { version = "0.9", features = ["einvoice"] }Use has_einvoice() to quickly check before attempting extraction. This reads only the PDF attachment table.
use pdfluent::PdfDocument;
let doc = PdfDocument::open("invoice.pdf")?;
if doc.has_einvoice() {
println!("E-invoice XML found.");
} else {
println!("No e-invoice embedded.");
}extract_einvoice() returns an EInvoiceData struct. The profile() method identifies ZUGFeRD MINIMUM, BASIC, EN16931, or EXTENDED.
use pdfluent::EInvoiceProfile;
let einvoice = doc.extract_einvoice()?.unwrap();
println!("Profile: {:?}", einvoice.profile());
match einvoice.profile() {
EInvoiceProfile::Minimum => println!("Basic routing data only"),
EInvoiceProfile::BasicWl => println!("Line items without account data"),
EInvoiceProfile::EN16931 => println!("Full invoice, EU compliant"),
EInvoiceProfile::Extended => println!("Extended German profile"),
EInvoiceProfile::XRechnung => println!("German public sector"),
_ => {}
}The extracted XML is a standard Rust String. Use any XML parser to read the invoice fields.
use roxmltree::Document as XmlDoc;
let xml_str = einvoice.xml();
let xml = XmlDoc::parse(&xml_str)?;
// Read invoice number
let invoice_id = xml
.descendants()
.find(|n| n.has_tag_name("ID") && n.parent().map_or(false, |p| p.has_tag_name("ExchangedDocument")))
.and_then(|n| n.text());
println!("Invoice ID: {:?}", invoice_id);Combine with the batch processing pattern to extract XML from many invoices at once.
use std::fs;
use pdfluent::PdfDocument;
let dir = fs::read_dir("./invoices")?;
for entry in dir.filter_map(|e| e.ok()) {
let path = entry.path();
if path.extension().map_or(false, |e| e == "pdf") {
let doc = PdfDocument::open(&path)?;
if let Some(inv) = doc.extract_einvoice()? {
let xml_path = path.with_extension("xml");
fs::write(&xml_path, inv.xml())?;
println!("Extracted: {}", xml_path.display());
}
}
}No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.