Read the text content of each page as a plain string or as structured spans with font and position data.
use pdfluent::Document;
fn main() -> pdfluent::Result<()> {
let doc = Document::open("input.pdf")?;
for (i, page) in doc.pages().enumerate() {
let text = page.extract_text()?;
println!("=== Page {} ===", i + 1);
println!("{}", text);
}
Ok(())
}Text extraction only requires read access.
let doc = Document::open("input.pdf")?;extract_text() reconstructs reading order using glyph positions and returns a plain String. Words are separated by spaces; paragraphs by newlines.
let page = doc.page(0)?;
let text = page.extract_text()?;
println!("{}", text);extract_spans() returns each text run as a TextSpan with font name, font size, and bounding Rect. Useful for layout analysis.
for span in page.extract_spans()? {
println!(
""{}" font={} size={:.1}pt at {:?}",
span.text,
span.font_name,
span.font_size,
span.rect,
);
}Iterate over all pages and collect text per page into a Vec.
let pages_text: Vec<String> = doc
.pages()
.map(|p| p.extract_text().unwrap_or_default())
.collect();Save each page as a separate text file, or join them with newlines for a single output file.
for (i, text) in pages_text.iter().enumerate() {
std::fs::write(format!("page_{:03}.txt", i + 1), text)?;
}No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.