Extract and diff the text of two PDF documents page by page to find additions, deletions, and changes.
use pdfluent::{Document, diff::TextDiff};
fn main() -> pdfluent::Result<()> {
let doc_a = Document::open("version_a.pdf")?;
let doc_b = Document::open("version_b.pdf")?;
let diff = TextDiff::compare(&doc_a, &doc_b)?;
if diff.is_identical() {
println!("Documents are text-identical.");
} else {
for change in diff.changes() {
println!(
"Page {}: {:?} {:?}",
change.page + 1,
change.kind,
change.text,
);
}
}
Ok(())
}Open the two PDF files you want to compare as read-only Documents.
let doc_a = Document::open("original.pdf")?;
let doc_b = Document::open("revised.pdf")?;TextDiff::compare extracts the plain text from each page and computes a line-level diff using the longest common subsequence algorithm.
use pdfluent::diff::TextDiff;
let diff = TextDiff::compare(&doc_a, &doc_b)?;is_identical() is a quick check before iterating individual changes.
if diff.is_identical() {
println!("No text differences found.");
return Ok(());
}Each DiffChange carries the page index, change kind (Added, Removed, or Changed), and the text content.
use pdfluent::diff::ChangeKind;
for change in diff.changes() {
let marker = match change.kind {
ChangeKind::Added => "+",
ChangeKind::Removed => "-",
ChangeKind::Changed => "~",
};
println!(
"Page {:>3} {} {}",
change.page + 1,
marker,
change.text.trim(),
);
}If the documents have different page counts, pages that exist only in one document are reported as whole-page additions or deletions.
println!("Pages in A: {}", doc_a.page_count());
println!("Pages in B: {}", doc_b.page_count());
println!("Total changes: {}", diff.change_count());
println!("Pages with changes: {}", diff.changed_page_count());No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.