diff --git a/CHANGELOG.md b/CHANGELOG.md index 8232c05c4..6b3b9f754 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai - Browser/Extension navigation reattach: preserve debugger re-attachment when relay is temporarily disconnected by deferring relay attach events until reconnect/re-announce, reducing post-navigation tab loss. (#28725) Thanks @stone-jin. - Browser/Profile defaults: prefer `openclaw` profile over `chrome` in headless/no-sandbox environments unless an explicit `defaultProfile` is configured. (#14944) Thanks @BenediktSchackenberg. - Browser/Remote CDP ownership checks: skip local-process ownership errors for non-loopback remote CDP profiles when HTTP is reachable but the websocket handshake fails, and surface the remote websocket attach/retry path instead. (#15582) Landed from contributor (#28780) Thanks @stubbi, @bsormagec, @unblockedgamesstudio and @vincentkoc. +- Browser/Act request compatibility: accept legacy flattened `action="act"` params (`kind/ref/text/...`) in addition to `request={...}` so browser act calls no longer fail with `request required`. (#15120) Thanks @vincentkoc. - Browser/Extension relay stale tabs: evict stale cached targets from `/json/list` when extension targets are destroyed/crashed or commands fail with missing target/session errors. (#6175) Thanks @vincentkoc. - CLI/Browser start timeout: honor `openclaw browser --timeout start` and stop by removing the fixed 15000ms override so slower Chrome startups can use caller-provided timeouts. (#22412, #23427) Thanks @vincentkoc. - Browser/CDP startup diagnostics: include Chrome stderr output and a Linux no-sandbox hint in startup timeout errors so failed launches are easier to diagnose. (#29312) Thanks @veast. diff --git a/src/agents/tools/browser-tool.schema.ts b/src/agents/tools/browser-tool.schema.ts index bebbe5ad2..aef51f635 100644 --- a/src/agents/tools/browser-tool.schema.ts +++ b/src/agents/tools/browser-tool.schema.ts @@ -60,6 +60,7 @@ const BrowserActSchema = Type.Object({ slowly: Type.Optional(Type.Boolean()), // press key: Type.Optional(Type.String()), + delayMs: Type.Optional(Type.Number()), // drag startRef: Type.Optional(Type.String()), endRef: Type.Optional(Type.String()), @@ -72,7 +73,11 @@ const BrowserActSchema = Type.Object({ height: Type.Optional(Type.Number()), // wait timeMs: Type.Optional(Type.Number()), + selector: Type.Optional(Type.String()), + url: Type.Optional(Type.String()), + loadState: Type.Optional(Type.String()), textGone: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), // evaluate fn: Type.Optional(Type.String()), }); @@ -109,5 +114,25 @@ export const BrowserToolSchema = Type.Object({ timeoutMs: Type.Optional(Type.Number()), accept: Type.Optional(Type.Boolean()), promptText: Type.Optional(Type.String()), + // Legacy flattened act params (preferred: request={...}) + kind: Type.Optional(stringEnum(BROWSER_ACT_KINDS)), + doubleClick: Type.Optional(Type.Boolean()), + button: Type.Optional(Type.String()), + modifiers: Type.Optional(Type.Array(Type.String())), + text: Type.Optional(Type.String()), + submit: Type.Optional(Type.Boolean()), + slowly: Type.Optional(Type.Boolean()), + key: Type.Optional(Type.String()), + delayMs: Type.Optional(Type.Number()), + startRef: Type.Optional(Type.String()), + endRef: Type.Optional(Type.String()), + values: Type.Optional(Type.Array(Type.String())), + fields: Type.Optional(Type.Array(Type.Object({}, { additionalProperties: true }))), + width: Type.Optional(Type.Number()), + height: Type.Optional(Type.Number()), + timeMs: Type.Optional(Type.Number()), + textGone: Type.Optional(Type.String()), + loadState: Type.Optional(Type.String()), + fn: Type.Optional(Type.String()), request: Type.Optional(BrowserActSchema), }); diff --git a/src/agents/tools/browser-tool.test.ts b/src/agents/tools/browser-tool.test.ts index f299bb552..0f9f3f5a2 100644 --- a/src/agents/tools/browser-tool.test.ts +++ b/src/agents/tools/browser-tool.test.ts @@ -307,6 +307,62 @@ describe("browser tool url alias support", () => { }); }); +describe("browser tool act compatibility", () => { + afterEach(() => { + vi.clearAllMocks(); + configMocks.loadConfig.mockReturnValue({ browser: {} }); + nodesUtilsMocks.listNodes.mockResolvedValue([]); + }); + + it("accepts flattened act params for backward compatibility", async () => { + const tool = createBrowserTool(); + await tool.execute?.("call-1", { + action: "act", + kind: "type", + ref: "f1e3", + text: "Test Title", + targetId: "tab-1", + timeoutMs: 5000, + }); + + expect(browserActionsMocks.browserAct).toHaveBeenCalledWith( + undefined, + expect.objectContaining({ + kind: "type", + ref: "f1e3", + text: "Test Title", + targetId: "tab-1", + timeoutMs: 5000, + }), + expect.objectContaining({ profile: undefined }), + ); + }); + + it("prefers request payload when both request and flattened fields are present", async () => { + const tool = createBrowserTool(); + await tool.execute?.("call-1", { + action: "act", + kind: "click", + ref: "legacy-ref", + request: { + kind: "press", + key: "Enter", + targetId: "tab-2", + }, + }); + + expect(browserActionsMocks.browserAct).toHaveBeenCalledWith( + undefined, + { + kind: "press", + key: "Enter", + targetId: "tab-2", + }, + expect.objectContaining({ profile: undefined }), + ); + }); +}); + describe("browser tool snapshot labels", () => { afterEach(() => { vi.clearAllMocks(); diff --git a/src/agents/tools/browser-tool.ts b/src/agents/tools/browser-tool.ts index 2a8a9e0ce..0e7491f9b 100644 --- a/src/agents/tools/browser-tool.ts +++ b/src/agents/tools/browser-tool.ts @@ -91,6 +91,53 @@ function readTargetUrlParam(params: Record) { ); } +const LEGACY_BROWSER_ACT_REQUEST_KEYS = [ + "targetId", + "ref", + "doubleClick", + "button", + "modifiers", + "text", + "submit", + "slowly", + "key", + "delayMs", + "startRef", + "endRef", + "values", + "fields", + "width", + "height", + "timeMs", + "textGone", + "selector", + "url", + "loadState", + "fn", + "timeoutMs", +] as const; + +function readActRequestParam(params: Record) { + const requestParam = params.request; + if (requestParam && typeof requestParam === "object") { + return requestParam as Parameters[1]; + } + + const kind = readStringParam(params, "kind"); + if (!kind) { + return undefined; + } + + const request: Record = { kind }; + for (const key of LEGACY_BROWSER_ACT_REQUEST_KEYS) { + if (!Object.hasOwn(params, key)) { + continue; + } + request[key] = params[key]; + } + return request as Parameters[1]; +} + type BrowserProxyFile = { path: string; base64: string; @@ -796,8 +843,8 @@ export function createBrowserTool(opts?: { ); } case "act": { - const request = params.request as Record | undefined; - if (!request || typeof request !== "object") { + const request = readActRequestParam(params); + if (!request) { throw new Error("request required"); } try { @@ -808,7 +855,7 @@ export function createBrowserTool(opts?: { profile, body: request, }) - : await browserAct(baseUrl, request as Parameters[1], { + : await browserAct(baseUrl, request, { profile, }); return jsonResult(result);