Use the PDF outline to split a document into sections automatically. Each top-level bookmark becomes its own output file.
use pdfluent::PdfDocument;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let doc = PdfDocument::open("manual.pdf")?;
let results = doc
.split_by_top_level_bookmarks()
.write_files("{title}.pdf")?;
for r in &results {
println!("{} -> {} pages", r.filename, r.page_count);
}
Ok(())
}Check that the document has top-level bookmarks before splitting. PDFluent exposes the full outline tree via outline().
use pdfluent::PdfDocument;
let doc = PdfDocument::open("manual.pdf")?;
let outline = doc.outline()?;
println!("Top-level sections: {}", outline.len());
for item in &outline {
println!(" {} -> page {}", item.title, item.destination_page);
}split_by_top_level_bookmarks() computes the page range for each bookmark automatically. The range ends where the next bookmark starts.
let splitter = doc.split_by_top_level_bookmarks();Use {title} in the pattern to name each file after its bookmark. PDFluent sanitises the title to produce a valid filename.
splitter.write_files("{title}.pdf")?;
// "Introduction.pdf", "Chapter 1.pdf", "Chapter 2.pdf", ...To split at second-level bookmarks instead of the top level, set the depth parameter.
use pdfluent::SplitDepth;
doc.split_by_bookmarks(SplitDepth::Level(2))
.write_files("section_{n}.pdf")?;If you need the split data in memory, use to_vec() to get a Vec of SplitSegment values without writing to disk.
let segments = doc
.split_by_top_level_bookmarks()
.to_vec()?;
for seg in segments {
println!("{}: {} bytes", seg.title, seg.data.len());
// upload seg.data to S3, etc.
}No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.