Send scanned PDF pages to Azure AI Document Intelligence (formerly Form Recognizer) and write the OCR results back as a searchable text layer using PDFluent.
use pdfluent::{Sdk, ocr::{OcrLayerOptions, OcrWord}};
use reqwest::Client;
use serde_json::Value;
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let endpoint = std::env::var("AZURE_FORM_RECOGNIZER_ENDPOINT")?;
let api_key = std::env::var("AZURE_FORM_RECOGNIZER_KEY")?;
let sdk = Sdk::new()?;
let doc = sdk.open("scanned_invoice.pdf")?;
let http = Client::new();
let mut builder = doc.add_ocr_layer();
for page in doc.pages().filter(|p| p.is_image_only()) {
let png_bytes = doc.render_page_to_bytes(page.index(), 300)?;
// Submit image to the prebuilt-read model
let submit_url = format!(
"{endpoint}/formrecognizer/documentModels/prebuilt-read:analyze ?api-version=2023-07-31"
);
let submit_resp = http
.post(&submit_url)
.header("Ocp-Apim-Subscription-Key", &api_key)
.header("Content-Type", "image/png")
.body(png_bytes)
.send()
.await?;
// Get the polling URL from Operation-Location header
let operation_url = submit_resp
.headers()
.get("operation-location")
.and_then(|v| v.to_str().ok())
.ok_or_else(|| anyhow::anyhow!("No operation-location header"))?
.to_string();
// Poll until the analysis is complete
let result: Value = loop {
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
let poll: Value = http
.get(&operation_url)
.header("Ocp-Apim-Subscription-Key", &api_key)
.send()
.await?
.json()
.await?;
match poll["status"].as_str() {
Some("succeeded") => break poll,
Some("failed") => anyhow::bail!("Azure analysis failed"),
_ => continue,
}
};
let words = extract_words(&result, page.index())?;
builder.add_page_words(page.index(), words);
}
let opts = OcrLayerOptions::builder()
.text_rendering_mode(pdfluent::ocr::TextRenderingMode::Invisible)
.build();
let searchable = builder.finish(opts)?;
searchable.save("invoice_searchable.pdf")?;
println!("Done.");
Ok(())
}You need PDFluent, reqwest for HTTP calls, serde_json, base64, and tokio.
# Cargo.toml
[dependencies]
pdfluent = "0.9"
reqwest = { version = "0.12", features = ["json"] }
serde_json = "1"
base64 = "0.22"
tokio = { version = "1", features = ["full"] }
anyhow = "1"Create an Azure AI Document Intelligence resource in the Azure portal. You need the endpoint URL and one of the subscription keys.
export AZURE_FORM_RECOGNIZER_ENDPOINT=https://your-resource.cognitiveservices.azure.com
export AZURE_FORM_RECOGNIZER_KEY=your_subscription_keyPDFluent finds pages with no text content stream. Pages that already have selectable text are left unchanged.
let sdk = Sdk::new()?;
let doc = sdk.open("scanned_invoice.pdf")?;
let scanned: Vec<u32> = doc.pages()
.filter(|p| p.is_image_only())
.map(|p| p.index())
.collect();
println!("{} pages need OCR", scanned.len());Render the page to PNG bytes and POST them to the Document Intelligence analyze endpoint. The prebuilt-read model handles printed and handwritten text. The API responds with 202 Accepted and an Operation-Location header for polling.
let png_bytes = doc.render_page_to_bytes(page_index, 300)?;
let submit_url = format!(
"{endpoint}/formrecognizer/documentModels/prebuilt-read:analyze ?api-version=2023-07-31"
);
let submit_resp = http
.post(&submit_url)
.header("Ocp-Apim-Subscription-Key", &api_key)
.header("Content-Type", "image/png")
.body(png_bytes)
.send()
.await?;
// The polling URL is in the Operation-Location response header
let operation_url = submit_resp
.headers()
.get("operation-location")
.and_then(|v| v.to_str().ok())
.unwrap()
.to_string();Azure Document Intelligence processes requests asynchronously. Poll the operation URL until status is "succeeded" or "failed". A simple 1-second sleep between polls is sufficient for single-page images.
let result: Value = loop {
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
let poll: Value = http
.get(&operation_url)
.header("Ocp-Apim-Subscription-Key", &api_key)
.send()
.await?
.json()
.await?;
match poll["status"].as_str() {
Some("succeeded") => break poll,
Some("failed") => anyhow::bail!("Azure analysis failed: {:?}", poll["error"]),
_ => {
// "running" or "notStarted" — keep polling
continue;
}
}
};The result contains pages > words with polygon coordinates. Azure returns polygons as [x0,y0, x1,y1, ...] in points (1/72 inch). Normalize by dividing by the page width and height returned in the same response.
fn extract_words(result: &Value, _page_index: u32) -> anyhow::Result<Vec<OcrWord>> {
let mut words = Vec::new();
let pages = result["analyzeResult"]["pages"].as_array().unwrap_or(&vec![]);
for page in pages {
let page_width = page["width"].as_f64().unwrap_or(1.0);
let page_height = page["height"].as_f64().unwrap_or(1.0);
for word in page["words"].as_array().unwrap_or(&vec![]) {
let text = word["content"].as_str().unwrap_or("").to_string();
if text.is_empty() { continue; }
// polygon is [x0,y0, x1,y1, x2,y2, x3,y3] in points
let poly = word["polygon"].as_array().unwrap_or(&vec![]);
if poly.len() < 8 { continue; }
let x_vals: Vec<f64> = poly.iter().step_by(2)
.filter_map(|v| v.as_f64()).collect();
let y_vals: Vec<f64> = poly.iter().skip(1).step_by(2)
.filter_map(|v| v.as_f64()).collect();
let x_min = x_vals.iter().cloned().fold(f64::INFINITY, f64::min);
let y_min = y_vals.iter().cloned().fold(f64::INFINITY, f64::min);
let x_max = x_vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let y_max = y_vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
words.push(OcrWord {
text,
// Normalize to 0.0–1.0 fractions
left: x_min / page_width,
top: y_min / page_height,
width: (x_max - x_min) / page_width,
height: (y_max - y_min) / page_height,
confidence: word["confidence"].as_f64(),
});
}
}
Ok(words)
}Add the words to the layer builder for each page, then finish and save. The resulting PDF has invisible text positioned over each word for search and copy.
builder.add_page_words(page_index, words);
// After all pages:
let opts = OcrLayerOptions::builder()
.text_rendering_mode(pdfluent::ocr::TextRenderingMode::Invisible)
.conform_to_pdfa2b(true)
.build();
let searchable = builder.finish(opts)?;
searchable.save("invoice_searchable.pdf")?;No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.