Detect and extract structured table data from PDF pages. Get rows and cells as Rust values without writing custom parsing logic.
use pdfluent::PdfDocument;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let doc = PdfDocument::open("report.pdf")?;
let page = doc.page(0)?;
for table in page.extract_tables()? {
for row in &table.rows {
let cells: Vec<&str> = row.iter()
.map(|c| c.text.as_str())
.collect();
println!("{}", cells.join(" | "));
}
}
Ok(())
}Table extraction works on a per-page basis. Open the document and select the page that contains the table.
use pdfluent::PdfDocument;
let doc = PdfDocument::open("financial_report.pdf")?;
let page = doc.page(1)?; // 0-indexed, so this is page 2extract_tables() returns a Vec<Table>. Each Table has a rows field: a Vec<Vec<TableCell>>. Cells span columns if they have a colspan greater than 1.
let tables = page.extract_tables()?;
println!("Found {} table(s) on this page", tables.len());Each TableCell contains the text content and the column span. Iterate rows and cells to process the data.
for (ti, table) in tables.iter().enumerate() {
println!("Table {}: {} rows", ti + 1, table.rows.len());
for row in &table.rows {
for cell in row {
print!("[{}] ", cell.text.trim());
}
println!();
}
}Write a simple CSV from the extracted rows. Use the csv crate for proper quoting.
use std::io::Write;
let mut out = std::fs::File::create("table.csv")?;
for row in &tables[0].rows {
let line = row.iter()
.map(|c| format!(""{}"", c.text.replace('"', """")))
.collect::<Vec<_>>()
.join(",");
writeln!(out, "{}", line)?;
}Use TableExtractionOptions to adjust the line-merge tolerance and minimum cell size, which helps with tables that have thin or invisible borders.
use pdfluent::TableExtractionOptions;
let opts = TableExtractionOptions::default()
.line_tolerance(2.0)
.min_cell_width(20.0);
let tables = page.extract_tables_with_options(&opts)?;No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.