PDFluent has no native dependencies, which makes it ideal for Lambda. Cold start times are under 50 ms for a typical PDF handler.
use lambda_runtime::{service_fn, LambdaEvent, Error};
use aws_sdk_s3::Client as S3Client;
use pdfluent::PdfDocument;
use serde_json::Value;
#[tokio::main]
async fn main() -> Result<(), Error> {
lambda_runtime::run(service_fn(handler)).await
}
async fn handler(event: LambdaEvent<Value>) -> Result<Value, Error> {
let bucket = event.payload["bucket"].as_str().unwrap_or_default();
let key = event.payload["key"].as_str().unwrap_or_default();
let config = aws_config::load_from_env().await;
let s3 = S3Client::new(&config);
let resp = s3.get_object().bucket(bucket).key(key).send().await?;
let bytes = resp.body.collect().await?.into_bytes();
let doc = PdfDocument::from_bytes(&bytes)?;
let page_count = doc.page_count();
let text = doc.extract_text()?;
Ok(serde_json::json!({
"pages": page_count,
"chars": text.len()
}))
}Use the lambda_runtime crate from AWS. Build a binary named bootstrap, which is the required name for Lambda custom runtimes.
# Cargo.toml
[package]
name = "pdf-lambda"
version = "0.1.0"
edition = "2021"
[[bin]]
name = "bootstrap"
path = "src/main.rs"
[dependencies]
pdfluent = "0.9"
lambda_runtime = "0.11"
aws-config = { version = "1", features = ["behavior-version-latest"] }
aws-sdk-s3 = "1"
tokio = { version = "1", features = ["full"] }
serde_json = "1"Lambda runs on Amazon Linux 2 (x86_64 or arm64). Use cargo-lambda to cross-compile without a Linux machine.
# Install cargo-lambda
cargo install cargo-lambda
# Build for x86_64 Lambda
cargo lambda build --release --target x86_64-unknown-linux-musl
# Or build for arm64 Lambda (Graviton2, cheaper)
cargo lambda build --release --target aarch64-unknown-linux-muslRead the S3 bucket and key from the event payload. Download the PDF bytes from S3 and pass them to PdfDocument::from_bytes.
async fn handler(event: LambdaEvent<Value>) -> Result<Value, Error> {
let bucket = event.payload["bucket"].as_str().unwrap_or_default();
let key = event.payload["key"].as_str().unwrap_or_default();
let config = aws_config::load_from_env().await;
let s3 = S3Client::new(&config);
let resp = s3.get_object()
.bucket(bucket)
.key(key)
.send()
.await?;
let bytes = resp.body.collect().await?.into_bytes();
let doc = PdfDocument::from_bytes(&bytes)?;
// ... process the document
Ok(serde_json::json!({ "pages": doc.page_count() }))
}cargo lambda deploy uploads the binary as a Lambda function with the provided.al2 runtime.
cargo lambda deploy \
--region eu-west-1 \
--memory 512 \
--timeout 30 \
pdf-lambdaThe function needs GetObject permission on the S3 bucket. Attach an inline policy or a managed policy to the execution role.
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": ["s3:GetObject"],
"Resource": "arn:aws:s3:::my-pdf-bucket/*"
}
]
}No JVM, no runtime, no DLL dependencies. Ships as a single native binary or WASM module.
Rust's ownership model prevents buffer overflows and use-after-free. No segfaults in PDF parsing.
Same code runs server-side, in Docker, on AWS Lambda, on Cloudflare Workers, or in the browser via WASM.