Files
openclaw/src/browser/routes/agent.snapshot.ts
2026-03-09 00:25:43 +00:00

343 lines
11 KiB
TypeScript

import path from "node:path";
import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js";
import { captureScreenshot, snapshotAria } from "../cdp.js";
import { withBrowserNavigationPolicy } from "../navigation-guard.js";
import {
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
normalizeBrowserScreenshot,
} from "../screenshot.js";
import type { BrowserRouteContext } from "../server-context.js";
import {
getPwAiModule,
handleRouteError,
readBody,
requirePwAi,
resolveProfileContext,
withPlaywrightRouteContext,
withRouteTabContext,
} from "./agent.shared.js";
import {
resolveSnapshotPlan,
shouldUsePlaywrightForAriaSnapshot,
shouldUsePlaywrightForScreenshot,
} from "./agent.snapshot.plan.js";
import type { BrowserResponse, BrowserRouteRegistrar } from "./types.js";
import { jsonError, toBoolean, toStringOrEmpty } from "./utils.js";
async function saveBrowserMediaResponse(params: {
res: BrowserResponse;
buffer: Buffer;
contentType: string;
maxBytes: number;
targetId: string;
url: string;
}) {
await ensureMediaDir();
const saved = await saveMediaBuffer(
params.buffer,
params.contentType,
"browser",
params.maxBytes,
);
params.res.json({
ok: true,
path: path.resolve(saved.path),
targetId: params.targetId,
url: params.url,
});
}
/** Resolve the correct targetId after a navigation that may trigger a renderer swap. */
export async function resolveTargetIdAfterNavigate(opts: {
oldTargetId: string;
navigatedUrl: string;
listTabs: () => Promise<Array<{ targetId: string; url: string }>>;
}): Promise<string> {
let currentTargetId = opts.oldTargetId;
try {
const pickReplacement = (tabs: Array<{ targetId: string; url: string }>) => {
if (tabs.some((tab) => tab.targetId === opts.oldTargetId)) {
return opts.oldTargetId;
}
const byUrl = tabs.filter((tab) => tab.url === opts.navigatedUrl);
if (byUrl.length === 1) {
return byUrl[0]?.targetId ?? opts.oldTargetId;
}
const uniqueReplacement = byUrl.filter((tab) => tab.targetId !== opts.oldTargetId);
if (uniqueReplacement.length === 1) {
return uniqueReplacement[0]?.targetId ?? opts.oldTargetId;
}
if (tabs.length === 1) {
return tabs[0]?.targetId ?? opts.oldTargetId;
}
return opts.oldTargetId;
};
currentTargetId = pickReplacement(await opts.listTabs());
if (currentTargetId === opts.oldTargetId) {
await new Promise((r) => setTimeout(r, 800));
currentTargetId = pickReplacement(await opts.listTabs());
}
} catch {
// Best-effort: fall back to pre-navigation targetId
}
return currentTargetId;
}
export function registerBrowserAgentSnapshotRoutes(
app: BrowserRouteRegistrar,
ctx: BrowserRouteContext,
) {
app.post("/navigate", async (req, res) => {
const body = readBody(req);
const url = toStringOrEmpty(body.url);
const targetId = toStringOrEmpty(body.targetId) || undefined;
if (!url) {
return jsonError(res, 400, "url is required");
}
await withPlaywrightRouteContext({
req,
res,
ctx,
targetId,
feature: "navigate",
run: async ({ cdpUrl, tab, pw, profileCtx }) => {
const result = await pw.navigateViaPlaywright({
cdpUrl,
targetId: tab.targetId,
url,
...withBrowserNavigationPolicy(ctx.state().resolved.ssrfPolicy),
});
const currentTargetId = await resolveTargetIdAfterNavigate({
oldTargetId: tab.targetId,
navigatedUrl: result.url,
listTabs: () => profileCtx.listTabs(),
});
res.json({ ok: true, targetId: currentTargetId, ...result });
},
});
});
app.post("/pdf", async (req, res) => {
const body = readBody(req);
const targetId = toStringOrEmpty(body.targetId) || undefined;
await withPlaywrightRouteContext({
req,
res,
ctx,
targetId,
feature: "pdf",
run: async ({ cdpUrl, tab, pw }) => {
const pdf = await pw.pdfViaPlaywright({
cdpUrl,
targetId: tab.targetId,
});
await saveBrowserMediaResponse({
res,
buffer: pdf.buffer,
contentType: "application/pdf",
maxBytes: pdf.buffer.byteLength,
targetId: tab.targetId,
url: tab.url,
});
},
});
});
app.post("/screenshot", async (req, res) => {
const body = readBody(req);
const targetId = toStringOrEmpty(body.targetId) || undefined;
const fullPage = toBoolean(body.fullPage) ?? false;
const ref = toStringOrEmpty(body.ref) || undefined;
const element = toStringOrEmpty(body.element) || undefined;
const type = body.type === "jpeg" ? "jpeg" : "png";
if (fullPage && (ref || element)) {
return jsonError(res, 400, "fullPage is not supported for element screenshots");
}
await withRouteTabContext({
req,
res,
ctx,
targetId,
run: async ({ profileCtx, tab, cdpUrl }) => {
let buffer: Buffer;
const shouldUsePlaywright = shouldUsePlaywrightForScreenshot({
profile: profileCtx.profile,
wsUrl: tab.wsUrl,
ref,
element,
});
if (shouldUsePlaywright) {
const pw = await requirePwAi(res, "screenshot");
if (!pw) {
return;
}
const snap = await pw.takeScreenshotViaPlaywright({
cdpUrl,
targetId: tab.targetId,
ref,
element,
fullPage,
type,
});
buffer = snap.buffer;
} else {
buffer = await captureScreenshot({
wsUrl: tab.wsUrl ?? "",
fullPage,
format: type,
quality: type === "jpeg" ? 85 : undefined,
});
}
const normalized = await normalizeBrowserScreenshot(buffer, {
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
});
await saveBrowserMediaResponse({
res,
buffer: normalized.buffer,
contentType: normalized.contentType ?? `image/${type}`,
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
targetId: tab.targetId,
url: tab.url,
});
},
});
});
app.get("/snapshot", async (req, res) => {
const profileCtx = resolveProfileContext(req, res, ctx);
if (!profileCtx) {
return;
}
const targetId = typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const hasPlaywright = Boolean(await getPwAiModule());
const plan = resolveSnapshotPlan({
profile: profileCtx.profile,
query: req.query,
hasPlaywright,
});
try {
const tab = await profileCtx.ensureTabAvailable(targetId || undefined);
if ((plan.labels || plan.mode === "efficient") && plan.format === "aria") {
return jsonError(res, 400, "labels/mode=efficient require format=ai");
}
if (plan.format === "ai") {
const pw = await requirePwAi(res, "ai snapshot");
if (!pw) {
return;
}
const roleSnapshotArgs = {
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
selector: plan.selectorValue,
frameSelector: plan.frameSelectorValue,
refsMode: plan.refsMode,
options: {
interactive: plan.interactive ?? undefined,
compact: plan.compact ?? undefined,
maxDepth: plan.depth ?? undefined,
},
};
const snap = plan.wantsRoleSnapshot
? await pw.snapshotRoleViaPlaywright(roleSnapshotArgs)
: await pw
.snapshotAiViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
...(typeof plan.resolvedMaxChars === "number"
? { maxChars: plan.resolvedMaxChars }
: {}),
})
.catch(async (err) => {
// Public-API fallback when Playwright's private _snapshotForAI is missing.
if (String(err).toLowerCase().includes("_snapshotforai")) {
return await pw.snapshotRoleViaPlaywright(roleSnapshotArgs);
}
throw err;
});
if (plan.labels) {
const labeled = await pw.screenshotWithLabelsViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
refs: "refs" in snap ? snap.refs : {},
type: "png",
});
const normalized = await normalizeBrowserScreenshot(labeled.buffer, {
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
});
await ensureMediaDir();
const saved = await saveMediaBuffer(
normalized.buffer,
normalized.contentType ?? "image/png",
"browser",
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
);
const imageType = normalized.contentType?.includes("jpeg") ? "jpeg" : "png";
return res.json({
ok: true,
format: plan.format,
targetId: tab.targetId,
url: tab.url,
labels: true,
labelsCount: labeled.labels,
labelsSkipped: labeled.skipped,
imagePath: path.resolve(saved.path),
imageType,
...snap,
});
}
return res.json({
ok: true,
format: plan.format,
targetId: tab.targetId,
url: tab.url,
...snap,
});
}
const snap = shouldUsePlaywrightForAriaSnapshot({
profile: profileCtx.profile,
wsUrl: tab.wsUrl,
})
? (() => {
// Extension relay doesn't expose per-page WS URLs; run AX snapshot via Playwright CDP session.
// Also covers cases where wsUrl is missing/unusable.
return requirePwAi(res, "aria snapshot").then(async (pw) => {
if (!pw) {
return null;
}
return await pw.snapshotAriaViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
limit: plan.limit,
});
});
})()
: snapshotAria({ wsUrl: tab.wsUrl ?? "", limit: plan.limit });
const resolved = await Promise.resolve(snap);
if (!resolved) {
return;
}
return res.json({
ok: true,
format: plan.format,
targetId: tab.targetId,
url: tab.url,
...resolved,
});
} catch (err) {
handleRouteError(ctx, res, err);
}
});
}