Find all occurrences of a string in a PDF and retrieve the bounding box of each match on each page.
use pdfluent::Document;
fn main() -> pdfluent::Result<()> {
let doc = Document::open("input.pdf")?;
let matches = doc.search("invoice number")?;
for m in &matches {
println!(
"Page {}: {:?} -> "{}"",
m.page + 1,
m.rect,
m.text
);
}
println!("{} match(es) found", matches.len());
Ok(())
}A read-only Document is sufficient for text search.
let doc = Document::open("input.pdf")?;doc.search() performs a case-insensitive Unicode-normalized search across all pages and returns a Vec of TextMatch.
let matches = doc.search("invoice number")?;Each TextMatch carries the zero-based page index, the bounding Rect in page coordinates, and the matched text fragment.
for m in &matches {
println!(
"page={} x1={:.1} y1={:.1} x2={:.1} y2={:.1}",
m.page + 1,
m.rect.x_min, m.rect.y_min,
m.rect.x_max, m.rect.y_max,
);
}Use SearchOptions to enable case-sensitive matching or regex search.
use pdfluent::text::SearchOptions;
let opts = SearchOptions::new()
.case_sensitive(true)
.whole_word(true);
let matches = doc.search_with("Total", opts)?;For large documents, searching page by page avoids loading the full text index at once.
let page = doc.page(0)?;
let matches = page.search("signature")?;
for m in &matches {
println!("Found at {:?}", m.rect);
}No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.