Use rayon and PDFluent together to process a folder of PDFs across all CPU cores. No thread pool setup required.
use rayon::prelude::*;
use std::fs;
use pdfluent::PdfDocument;
fn main() -> anyhow::Result<()> {
let files: Vec<_> = fs::read_dir("./input")?
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().map_or(false, |e| e == "pdf"))
.collect();
files.par_iter().for_each(|path| {
match process_file(path) {
Ok(_) => println!("OK {:?}", path.file_name().unwrap()),
Err(e) => eprintln!("ERR {:?}: {}", path.file_name().unwrap(), e),
}
});
Ok(())
}
fn process_file(path: &std::path::Path) -> pdfluent::Result<()> {
let doc = PdfDocument::open(path)?;
let text = doc.extract_text()?;
let out = path.with_extension("txt");
fs::write(out, text)?;
Ok(())
}rayon provides a parallel iterator that distributes work across all available CPU cores automatically.
# Cargo.toml
[dependencies]
pdfluent = "0.9"
rayon = "1.10"
anyhow = "1"Use std::fs::read_dir to walk the input directory. Filter by extension to skip non-PDF files.
use std::fs;
use std::path::PathBuf;
let files: Vec<PathBuf> = fs::read_dir("./input")?
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().map_or(false, |ext| ext == "pdf"))
.collect();
println!("Found {} PDF files", files.len());Replace .iter() with .par_iter() to run each file on a separate thread. PdfDocument::open is Send, so it works safely across rayon threads.
use rayon::prelude::*;
files.par_iter().for_each(|path| {
match process_file(path) {
Ok(_) => println!("OK {}", path.display()),
Err(e) => eprintln!("ERR {}: {}", path.display(), e),
}
});Keep the function focused on one document. Open, process, save. Errors are returned and logged by the caller.
fn process_file(path: &std::path::Path) -> pdfluent::Result<()> {
let mut doc = PdfDocument::open(path)?;
// Example: stamp a watermark on every page
for page in doc.pages_mut() {
page.add_watermark("CONFIDENTIAL")?;
}
let out_path = std::path::Path::new("./output")
.join(path.file_name().unwrap());
doc.save(out_path)?;
Ok(())
}Use par_iter().map() instead of par_iter().for_each() when you need to collect success and error counts.
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
let ok_count = Arc::new(AtomicUsize::new(0));
let err_count = Arc::new(AtomicUsize::new(0));
files.par_iter().for_each(|path| {
let ok = ok_count.clone();
let err = err_count.clone();
match process_file(path) {
Ok(_) => { ok.fetch_add(1, Ordering::Relaxed); }
Err(e) => {
eprintln!("ERR {}: {}", path.display(), e);
err.fetch_add(1, Ordering::Relaxed);
}
}
});
println!(
"Done. {} ok, {} errors",
ok_count.load(Ordering::Relaxed),
err_count.load(Ordering::Relaxed)
);No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.