import { type Context, complete } from "@mariozechner/pi-ai"; import { Type } from "@sinclair/typebox"; import type { OpenClawConfig } from "../../config/config.js"; import { extractPdfContent, type PdfExtractedContent } from "../../media/pdf-extract.js"; import { resolveUserPath } from "../../utils.js"; import { loadWebMediaRaw } from "../../web/media.js"; import { coerceImageModelConfig, type ImageModelConfig, resolveProviderVisionModelFromConfig, } from "./image-tool.helpers.js"; import { applyImageModelConfigDefaults, buildTextToolResult, resolveModelFromRegistry, resolveMediaToolLocalRoots, resolveModelRuntimeApiKey, resolvePromptAndModelOverride, } from "./media-tool-shared.js"; import { hasAuthForProvider, resolveDefaultModelRef } from "./model-config.helpers.js"; import { anthropicAnalyzePdf, geminiAnalyzePdf } from "./pdf-native-providers.js"; import { coercePdfAssistantText, coercePdfModelConfig, parsePageRange, providerSupportsNativePdf, resolvePdfToolMaxTokens, } from "./pdf-tool.helpers.js"; import { createSandboxBridgeReadFile, discoverAuthStorage, discoverModels, ensureOpenClawModelsJson, resolveSandboxedBridgeMediaPath, runWithImageModelFallback, type AnyAgentTool, type SandboxedBridgeMediaPathConfig, type SandboxFsBridge, type ToolFsPolicy, } from "./tool-runtime.helpers.js"; const DEFAULT_PROMPT = "Analyze this PDF document."; const DEFAULT_MAX_PDFS = 10; const DEFAULT_MAX_BYTES_MB = 10; const DEFAULT_MAX_PAGES = 20; const ANTHROPIC_PDF_PRIMARY = "anthropic/claude-opus-4-6"; const ANTHROPIC_PDF_FALLBACK = "anthropic/claude-opus-4-5"; const PDF_MIN_TEXT_CHARS = 200; const PDF_MAX_PIXELS = 4_000_000; // --------------------------------------------------------------------------- // Model resolution (mirrors image tool pattern) // --------------------------------------------------------------------------- /** * Resolve the effective PDF model config. * Falls back to the image model config, then to provider-specific defaults. */ export function resolvePdfModelConfigForTool(params: { cfg?: OpenClawConfig; agentDir: string; }): ImageModelConfig | null { // Check for explicit PDF model config first const explicitPdf = coercePdfModelConfig(params.cfg); if (explicitPdf.primary?.trim() || (explicitPdf.fallbacks?.length ?? 0) > 0) { return explicitPdf; } // Fall back to the image model config const explicitImage = coerceImageModelConfig(params.cfg); if (explicitImage.primary?.trim() || (explicitImage.fallbacks?.length ?? 0) > 0) { return explicitImage; } // Auto-detect from available providers const primary = resolveDefaultModelRef(params.cfg); const anthropicOk = hasAuthForProvider({ provider: "anthropic", agentDir: params.agentDir }); const googleOk = hasAuthForProvider({ provider: "google", agentDir: params.agentDir }); const openaiOk = hasAuthForProvider({ provider: "openai", agentDir: params.agentDir }); const fallbacks: string[] = []; const addFallback = (ref: string) => { const trimmed = ref.trim(); if (trimmed && !fallbacks.includes(trimmed)) { fallbacks.push(trimmed); } }; // Prefer providers with native PDF support let preferred: string | null = null; const providerOk = hasAuthForProvider({ provider: primary.provider, agentDir: params.agentDir }); const providerVision = resolveProviderVisionModelFromConfig({ cfg: params.cfg, provider: primary.provider, }); if (primary.provider === "anthropic" && anthropicOk) { preferred = ANTHROPIC_PDF_PRIMARY; } else if (primary.provider === "google" && googleOk && providerVision) { preferred = providerVision; } else if (providerOk && providerVision) { preferred = providerVision; } else if (anthropicOk) { preferred = ANTHROPIC_PDF_PRIMARY; } else if (googleOk) { preferred = "google/gemini-2.5-pro"; } else if (openaiOk) { preferred = "openai/gpt-5-mini"; } if (preferred?.trim()) { if (anthropicOk && preferred !== ANTHROPIC_PDF_PRIMARY) { addFallback(ANTHROPIC_PDF_PRIMARY); } if (anthropicOk) { addFallback(ANTHROPIC_PDF_FALLBACK); } if (openaiOk) { addFallback("openai/gpt-5-mini"); } const pruned = fallbacks.filter((ref) => ref !== preferred); return { primary: preferred, ...(pruned.length > 0 ? { fallbacks: pruned } : {}) }; } return null; } // --------------------------------------------------------------------------- // Build context for extraction fallback path // --------------------------------------------------------------------------- function buildPdfExtractionContext(prompt: string, extractions: PdfExtractedContent[]): Context { const content: Array< { type: "text"; text: string } | { type: "image"; data: string; mimeType: string } > = []; // Add extracted text and images for (let i = 0; i < extractions.length; i++) { const extraction = extractions[i]; if (extraction.text.trim()) { const label = extractions.length > 1 ? `[PDF ${i + 1} text]\n` : "[PDF text]\n"; content.push({ type: "text", text: label + extraction.text }); } for (const img of extraction.images) { content.push({ type: "image", data: img.data, mimeType: img.mimeType }); } } // Add the user prompt content.push({ type: "text", text: prompt }); return { messages: [{ role: "user", content, timestamp: Date.now() }], }; } // --------------------------------------------------------------------------- // Run PDF prompt with model fallback // --------------------------------------------------------------------------- type PdfSandboxConfig = { root: string; bridge: SandboxFsBridge; }; async function runPdfPrompt(params: { cfg?: OpenClawConfig; agentDir: string; pdfModelConfig: ImageModelConfig; modelOverride?: string; prompt: string; pdfBuffers: Array<{ base64: string; filename: string }>; pageNumbers?: number[]; getExtractions: () => Promise; }): Promise<{ text: string; provider: string; model: string; native: boolean; attempts: Array<{ provider: string; model: string; error: string }>; }> { const effectiveCfg = applyImageModelConfigDefaults(params.cfg, params.pdfModelConfig); await ensureOpenClawModelsJson(effectiveCfg, params.agentDir); const authStorage = discoverAuthStorage(params.agentDir); const modelRegistry = discoverModels(authStorage, params.agentDir); let extractionCache: PdfExtractedContent[] | null = null; const getExtractions = async (): Promise => { if (!extractionCache) { extractionCache = await params.getExtractions(); } return extractionCache; }; const result = await runWithImageModelFallback({ cfg: effectiveCfg, modelOverride: params.modelOverride, run: async (provider, modelId) => { const model = resolveModelFromRegistry({ modelRegistry, provider, modelId }); const apiKey = await resolveModelRuntimeApiKey({ model, cfg: effectiveCfg, agentDir: params.agentDir, authStorage, }); if (providerSupportsNativePdf(provider)) { if (params.pageNumbers && params.pageNumbers.length > 0) { throw new Error( `pages is not supported with native PDF providers (${provider}/${modelId}). Remove pages, or use a non-native model for page filtering.`, ); } const pdfs = params.pdfBuffers.map((p) => ({ base64: p.base64, filename: p.filename, })); if (provider === "anthropic") { const text = await anthropicAnalyzePdf({ apiKey, modelId, prompt: params.prompt, pdfs, maxTokens: resolvePdfToolMaxTokens(model.maxTokens), baseUrl: model.baseUrl, }); return { text, provider, model: modelId, native: true }; } if (provider === "google") { const text = await geminiAnalyzePdf({ apiKey, modelId, prompt: params.prompt, pdfs, baseUrl: model.baseUrl, }); return { text, provider, model: modelId, native: true }; } } const extractions = await getExtractions(); const hasImages = extractions.some((e) => e.images.length > 0); if (hasImages && !model.input?.includes("image")) { const hasText = extractions.some((e) => e.text.trim().length > 0); if (!hasText) { throw new Error( `Model ${provider}/${modelId} does not support images and PDF has no extractable text.`, ); } const textOnlyExtractions: PdfExtractedContent[] = extractions.map((e) => ({ text: e.text, images: [], })); const context = buildPdfExtractionContext(params.prompt, textOnlyExtractions); const message = await complete(model, context, { apiKey, maxTokens: resolvePdfToolMaxTokens(model.maxTokens), }); const text = coercePdfAssistantText({ message, provider, model: modelId }); return { text, provider, model: modelId, native: false }; } const context = buildPdfExtractionContext(params.prompt, extractions); const message = await complete(model, context, { apiKey, maxTokens: resolvePdfToolMaxTokens(model.maxTokens), }); const text = coercePdfAssistantText({ message, provider, model: modelId }); return { text, provider, model: modelId, native: false }; }, }); return { text: result.result.text, provider: result.result.provider, model: result.result.model, native: result.result.native, attempts: result.attempts.map((a) => ({ provider: a.provider, model: a.model, error: a.error, })), }; } // --------------------------------------------------------------------------- // PDF tool factory // --------------------------------------------------------------------------- export function createPdfTool(options?: { config?: OpenClawConfig; agentDir?: string; workspaceDir?: string; sandbox?: PdfSandboxConfig; fsPolicy?: ToolFsPolicy; }): AnyAgentTool | null { const agentDir = options?.agentDir?.trim(); if (!agentDir) { const explicit = coercePdfModelConfig(options?.config); if (explicit.primary?.trim() || (explicit.fallbacks?.length ?? 0) > 0) { throw new Error("createPdfTool requires agentDir when enabled"); } return null; } const pdfModelConfig = resolvePdfModelConfigForTool({ cfg: options?.config, agentDir }); if (!pdfModelConfig) { return null; } const maxBytesMbDefault = ( options?.config?.agents?.defaults as Record | undefined )?.pdfMaxBytesMb; const maxPagesDefault = (options?.config?.agents?.defaults as Record | undefined) ?.pdfMaxPages; const configuredMaxBytesMb = typeof maxBytesMbDefault === "number" && Number.isFinite(maxBytesMbDefault) ? maxBytesMbDefault : DEFAULT_MAX_BYTES_MB; const configuredMaxPages = typeof maxPagesDefault === "number" && Number.isFinite(maxPagesDefault) ? Math.floor(maxPagesDefault) : DEFAULT_MAX_PAGES; const localRoots = resolveMediaToolLocalRoots(options?.workspaceDir, { workspaceOnly: options?.fsPolicy?.workspaceOnly === true, }); const description = "Analyze one or more PDF documents with a model. Supports native PDF analysis for Anthropic and Google models, with text/image extraction fallback for other providers. Use pdf for a single path/URL, or pdfs for multiple (up to 10). Provide a prompt describing what to analyze."; return { label: "PDF", name: "pdf", description, parameters: Type.Object({ prompt: Type.Optional(Type.String()), pdf: Type.Optional(Type.String({ description: "Single PDF path or URL." })), pdfs: Type.Optional( Type.Array(Type.String(), { description: "Multiple PDF paths or URLs (up to 10).", }), ), pages: Type.Optional( Type.String({ description: 'Page range to process, e.g. "1-5", "1,3,5-7". Defaults to all pages.', }), ), model: Type.Optional(Type.String()), maxBytesMb: Type.Optional(Type.Number()), }), execute: async (_toolCallId, args) => { const record = args && typeof args === "object" ? (args as Record) : {}; // MARK: - Normalize pdf + pdfs input const pdfCandidates: string[] = []; if (typeof record.pdf === "string") { pdfCandidates.push(record.pdf); } if (Array.isArray(record.pdfs)) { pdfCandidates.push(...record.pdfs.filter((v): v is string => typeof v === "string")); } const seenPdfs = new Set(); const pdfInputs: string[] = []; for (const candidate of pdfCandidates) { const trimmed = candidate.trim(); if (!trimmed || seenPdfs.has(trimmed)) { continue; } seenPdfs.add(trimmed); pdfInputs.push(trimmed); } if (pdfInputs.length === 0) { throw new Error("pdf required: provide a path or URL to a PDF document"); } // Enforce max PDFs cap if (pdfInputs.length > DEFAULT_MAX_PDFS) { return { content: [ { type: "text", text: `Too many PDFs: ${pdfInputs.length} provided, maximum is ${DEFAULT_MAX_PDFS}. Please reduce the number.`, }, ], details: { error: "too_many_pdfs", count: pdfInputs.length, max: DEFAULT_MAX_PDFS }, }; } const { prompt: promptRaw, modelOverride } = resolvePromptAndModelOverride( record, DEFAULT_PROMPT, ); const maxBytesMbRaw = typeof record.maxBytesMb === "number" ? record.maxBytesMb : undefined; const maxBytesMb = typeof maxBytesMbRaw === "number" && Number.isFinite(maxBytesMbRaw) && maxBytesMbRaw > 0 ? maxBytesMbRaw : configuredMaxBytesMb; const maxBytes = Math.floor(maxBytesMb * 1024 * 1024); // Parse page range const pagesRaw = typeof record.pages === "string" && record.pages.trim() ? record.pages.trim() : undefined; const sandboxConfig: SandboxedBridgeMediaPathConfig | null = options?.sandbox && options.sandbox.root.trim() ? { root: options.sandbox.root.trim(), bridge: options.sandbox.bridge, workspaceOnly: options.fsPolicy?.workspaceOnly === true, } : null; // MARK: - Load each PDF const loadedPdfs: Array<{ base64: string; buffer: Buffer; filename: string; resolvedPath: string; rewrittenFrom?: string; }> = []; for (const pdfRaw of pdfInputs) { const trimmed = pdfRaw.trim(); const isHttpUrl = /^https?:\/\//i.test(trimmed); const isFileUrl = /^file:/i.test(trimmed); const isDataUrl = /^data:/i.test(trimmed); const looksLikeWindowsDrive = /^[a-zA-Z]:[\\/]/.test(trimmed); const hasScheme = /^[a-z][a-z0-9+.-]*:/i.test(trimmed); if (hasScheme && !looksLikeWindowsDrive && !isFileUrl && !isHttpUrl && !isDataUrl) { return { content: [ { type: "text", text: `Unsupported PDF reference: ${pdfRaw}. Use a file path, file:// URL, or http(s) URL.`, }, ], details: { error: "unsupported_pdf_reference", pdf: pdfRaw }, }; } if (sandboxConfig && isHttpUrl) { throw new Error("Sandboxed PDF tool does not allow remote URLs."); } const resolvedPdf = (() => { if (sandboxConfig) { return trimmed; } if (trimmed.startsWith("~")) { return resolveUserPath(trimmed); } return trimmed; })(); const resolvedPathInfo: { resolved: string; rewrittenFrom?: string } = sandboxConfig ? await resolveSandboxedBridgeMediaPath({ sandbox: sandboxConfig, mediaPath: resolvedPdf, inboundFallbackDir: "media/inbound", }) : { resolved: resolvedPdf.startsWith("file://") ? resolvedPdf.slice("file://".length) : resolvedPdf, }; const media = sandboxConfig ? await loadWebMediaRaw(resolvedPathInfo.resolved, { maxBytes, sandboxValidated: true, readFile: createSandboxBridgeReadFile({ sandbox: sandboxConfig }), }) : await loadWebMediaRaw(resolvedPathInfo.resolved, { maxBytes, localRoots, }); if (media.kind !== "document") { // Check MIME type more specifically const ct = (media.contentType ?? "").toLowerCase(); if (!ct.includes("pdf") && !ct.includes("application/pdf")) { throw new Error(`Expected PDF but got ${media.contentType ?? media.kind}: ${pdfRaw}`); } } const base64 = media.buffer.toString("base64"); const filename = media.fileName ?? (isHttpUrl ? (new URL(trimmed).pathname.split("/").pop() ?? "document.pdf") : "document.pdf"); loadedPdfs.push({ base64, buffer: media.buffer, filename, resolvedPath: resolvedPathInfo.resolved, ...(resolvedPathInfo.rewrittenFrom ? { rewrittenFrom: resolvedPathInfo.rewrittenFrom } : {}), }); } const pageNumbers = pagesRaw ? parsePageRange(pagesRaw, configuredMaxPages) : undefined; const getExtractions = async (): Promise => { const extractedAll: PdfExtractedContent[] = []; for (const pdf of loadedPdfs) { const extracted = await extractPdfContent({ buffer: pdf.buffer, maxPages: configuredMaxPages, maxPixels: PDF_MAX_PIXELS, minTextChars: PDF_MIN_TEXT_CHARS, pageNumbers, }); extractedAll.push(extracted); } return extractedAll; }; const result = await runPdfPrompt({ cfg: options?.config, agentDir, pdfModelConfig, modelOverride, prompt: promptRaw, pdfBuffers: loadedPdfs.map((p) => ({ base64: p.base64, filename: p.filename })), pageNumbers, getExtractions, }); const pdfDetails = loadedPdfs.length === 1 ? { pdf: loadedPdfs[0].resolvedPath, ...(loadedPdfs[0].rewrittenFrom ? { rewrittenFrom: loadedPdfs[0].rewrittenFrom } : {}), } : { pdfs: loadedPdfs.map((p) => ({ pdf: p.resolvedPath, ...(p.rewrittenFrom ? { rewrittenFrom: p.rewrittenFrom } : {}), })), }; return buildTextToolResult(result, { native: result.native, ...pdfDetails }); }, }; }