List and extract all embedded file streams from a PDF document. Save attachments to disk or read them directly as byte buffers.
use pdfluent::PdfDocument;
use std::fs;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let doc = PdfDocument::open("invoice_with_attachment.pdf")?;
for attachment in doc.attachments() {
let filename = attachment.filename();
let data = attachment.read_data()?;
fs::write(format!("output/{}", filename), &data)?;
println!("Extracted {} ({} bytes)", filename, data.len());
}
Ok(())
}Add the pdfluent crate to Cargo.toml.
[dependencies]
pdfluent = "0.9"Open the document. A read-only borrow is enough for reading attachments.
use pdfluent::PdfDocument;
let doc = PdfDocument::open("package.pdf")?;Call doc.attachments() to get attachment metadata. No file data is read at this point.
let attachments = doc.attachments();
println!("Found {} attachment(s):", attachments.len());
for att in &attachments {
println!(
" {} - {} - {} bytes",
att.filename(),
att.mime_type().unwrap_or("unknown"),
att.size(),
);
}Find the attachment you want by filename and extract its bytes.
let xml_att = doc
.attachments()
.into_iter()
.find(|a| a.filename().ends_with(".xml"));
if let Some(att) = xml_att {
let data = att.read_data()?;
std::fs::write("extracted_invoice.xml", &data)?;
println!("Extracted: {} bytes", data.len());
} else {
println!("No XML attachment found.");
}Loop over all attachments and save each to a target folder.
use std::fs;
use std::path::Path;
let output_dir = Path::new("extracted_files");
fs::create_dir_all(output_dir)?;
for att in doc.attachments() {
let dest = output_dir.join(att.filename());
let data = att.read_data()?;
fs::write(&dest, &data)?;
println!("Saved: {}", dest.display());
}No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.