Pull JPEG, PNG, and JBIG2 images out of a PDF without re-encoding. Preserves original compression and quality.
use pdfluent::PdfDocument;
fn main() -> pdfluent::Result<()> {
let doc = PdfDocument::open("document.pdf")?;
for (page_idx, page) in doc.pages().enumerate() {
for (img_idx, image) in page.images().enumerate() {
let filename = format!(
"page{}_img{}.{}",
page_idx + 1,
img_idx + 1,
image.format().extension()
);
image.save(&filename)?;
println!("Saved {} ({}x{})", filename, image.width(), image.height());
}
}
Ok(())
}Image extraction is part of the base crate. No extra features are required.
# Cargo.toml
[dependencies]
pdfluent = "0.9"page.images() returns an iterator over all XObject images on the page. Inline images are included.
use pdfluent::PdfDocument;
let doc = PdfDocument::open("document.pdf")?;
for (i, page) in doc.pages().enumerate() {
let count = page.images().count();
println!("Page {}: {} image(s)", i + 1, count);
}image.save() writes the image bytes to a file without re-encoding. JPEG images stay JPEG, preserving the original quality.
for (i, page) in doc.pages().enumerate() {
for (j, image) in page.images().enumerate() {
let ext = image.format().extension(); // "jpg", "png", "jbig2"
let path = format!("output/p{}_i{}.{}", i + 1, j + 1, ext);
image.save(&path)?;
}
}Call image.to_png_bytes() to decode the image and re-encode as PNG, regardless of its original format.
use std::fs;
for (i, page) in doc.pages().enumerate() {
for (j, image) in page.images().enumerate() {
let png_bytes = image.to_png_bytes()?;
let path = format!("output/p{}_i{}.png", i + 1, j + 1);
fs::write(&path, &png_bytes)?;
println!("Wrote PNG: {}", path);
}
}Skip thumbnails and decorative images by checking pixel dimensions before saving.
for (i, page) in doc.pages().enumerate() {
for (j, image) in page.images()
.filter(|img| img.width() >= 100 && img.height() >= 100)
.enumerate()
{
let path = format!("output/p{}_i{}.jpg", i + 1, j + 1);
image.save(&path)?;
println!(
"Saved {}x{} image: {}",
image.width(), image.height(), path
);
}
}No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.