Use regular expressions to find and permanently remove credit card numbers, SSNs, email addresses, or any structured data from a PDF.
use pdfluent::{PdfDocument, RedactOptions};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut doc = PdfDocument::open("report.pdf")?;
// Redact all US Social Security Numbers
let ssn_pattern = r"\b\d{3}-\d{2}-\d{4}\b";
doc.redact_pattern(ssn_pattern, RedactOptions::default())?;
doc.apply_redactions()?;
doc.save("report_redacted.pdf")?;
Ok(())
}Add the pdfluent crate to Cargo.toml.
[dependencies]
pdfluent = "0.9"Load the file from disk or from an in-memory buffer.
use pdfluent::PdfDocument;
let mut doc = PdfDocument::open("customer_data.pdf")?;Write patterns for the data types you want to remove. PDFluent uses the Rust regex crate syntax.
// Credit card: Visa, Mastercard, Amex formats
let cc_pattern = r"\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})\b";
// Email addresses
let email_pattern = r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}";
// US phone numbers
let phone_pattern = r"\b\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4}\b";Call redact_pattern() for each regex. You can chain multiple patterns before calling apply_redactions().
use pdfluent::RedactOptions;
let opts = RedactOptions::default()
.fill_color(pdfluent::Color::black())
.overlay_text("REDACTED");
doc.redact_pattern(cc_pattern, opts.clone())?;
doc.redact_pattern(email_pattern, opts.clone())?;
doc.redact_pattern(phone_pattern, opts)?;apply_redactions() permanently removes all matched text from the content stream.
doc.apply_redactions()?;
doc.save("customer_data_clean.pdf")?;
println!("Pattern redaction complete.");No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.