Render PDF pages to images with PDFluent, send them to Google Cloud Vision for OCR, and write the results back as an invisible text layer.
use pdfluent::{Sdk, ocr::{OcrLayerOptions, OcrWord}};
use reqwest::Client;
use serde_json::{json, Value};
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let api_key = std::env::var("GCP_VISION_API_KEY")?;
let sdk = Sdk::new()?;
let doc = sdk.open("scanned_contract.pdf")?;
let http = Client::new();
let mut builder = doc.add_ocr_layer();
for page in doc.pages().filter(|p| p.is_image_only()) {
// Render page to PNG bytes at 300 DPI
let png_bytes = doc.render_page_to_bytes(page.index(), 300)?;
let b64 = BASE64.encode(&png_bytes);
let body = json!({
"requests": [{
"image": { "content": b64 },
"features": [{ "type": "DOCUMENT_TEXT_DETECTION" }]
}]
});
let resp: Value = http
.post(format!(
"https://vision.googleapis.com/v1/images:annotate?key={api_key}"
))
.json(&body)
.send()
.await?
.json()
.await?;
let words = extract_words(&resp, page.index())?;
builder.add_page_words(page.index(), words);
}
let opts = OcrLayerOptions::builder()
.text_rendering_mode(pdfluent::ocr::TextRenderingMode::Invisible)
.build();
let searchable = builder.finish(opts)?;
searchable.save("contract_searchable.pdf")?;
println!("Done.");
Ok(())
}You need PDFluent, reqwest for the Vision API call, serde_json, base64, and tokio.
# Cargo.toml
[dependencies]
pdfluent = "0.9"
reqwest = { version = "0.12", features = ["json"] }
serde_json = "1"
base64 = "0.22"
tokio = { version = "1", features = ["full"] }
anyhow = "1"The quickest approach for testing is an API key. For production, use a service account with the Cloud Vision API role and the Application Default Credentials flow.
# Option 1: API key (development/testing)
export GCP_VISION_API_KEY=AIzaSy...
# Option 2: Service account (production)
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
# Then use the OAuth2 token endpoint or the google-cloud Rust cratesPDFluent detects pages with no text layer. Pages that already have selectable text are skipped.
let sdk = Sdk::new()?;
let doc = sdk.open("scanned_contract.pdf")?;
let scanned_count = doc.pages().filter(|p| p.is_image_only()).count();
println!("{} of {} pages are scanned", scanned_count, doc.page_count());Use DOCUMENT_TEXT_DETECTION rather than TEXT_DETECTION. The DOCUMENT variant returns symbols grouped into words, lines, and paragraphs, which gives better word-level bounding boxes for the PDFluent overlay.
let png_bytes = doc.render_page_to_bytes(page.index(), 300)?;
let b64 = BASE64.encode(&png_bytes);
let body = json!({
"requests": [{
"image": { "content": b64 },
"features": [{ "type": "DOCUMENT_TEXT_DETECTION" }]
}]
});
let resp: Value = http
.post(format!("https://vision.googleapis.com/v1/images:annotate?key={api_key}"))
.json(&body)
.send()
.await?
.json()
.await?;The DOCUMENT_TEXT_DETECTION response returns a fullTextAnnotation with pages > blocks > paragraphs > words. Each word has a boundingBox with normalizedVertices. PDFluent needs the bounding box as left/top/width/height fractions.
fn extract_words(resp: &Value, _page_index: u32) -> anyhow::Result<Vec<OcrWord>> {
let mut words = Vec::new();
let annotation = &resp["responses"][0]["fullTextAnnotation"];
let pages = annotation["pages"].as_array().unwrap_or(&vec![]);
for page in pages {
for block in page["blocks"].as_array().unwrap_or(&vec![]) {
for para in block["paragraphs"].as_array().unwrap_or(&vec![]) {
for word in para["words"].as_array().unwrap_or(&vec![]) {
// Reconstruct word text from symbols
let text: String = word["symbols"]
.as_array()
.unwrap_or(&vec![])
.iter()
.filter_map(|s| s["text"].as_str())
.collect();
if text.is_empty() { continue; }
// normalizedVertices are fractions of image width/height
let verts = &word["boundingBox"]["normalizedVertices"];
if let (Some(v0), Some(v2)) = (verts.get(0), verts.get(2)) {
let left = v0["x"].as_f64().unwrap_or(0.0);
let top = v0["y"].as_f64().unwrap_or(0.0);
let right = v2["x"].as_f64().unwrap_or(0.0);
let bottom = v2["y"].as_f64().unwrap_or(0.0);
words.push(OcrWord {
text,
left,
top,
width: right - left,
height: bottom - top,
confidence: word["confidence"].as_f64(),
});
}
}
}
}
}
Ok(words)
}Pass the collected words to the layer builder, call finish(), and save. The text is invisible at render time but fully searchable and copyable.
builder.add_page_words(page.index(), words);
// After processing all pages:
let opts = OcrLayerOptions::builder()
.text_rendering_mode(pdfluent::ocr::TextRenderingMode::Invisible)
.conform_to_pdfa2b(true) // optional: PDF/A-2b for archival
.build();
let searchable = builder.finish(opts)?;
searchable.save("contract_searchable.pdf")?;No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.