fix(nodes): raise transport timeout for exec.approval.request (#12098) (#12188)

`openclaw nodes run` always timed out after 35s with "gateway timeout
after 35000ms" even though `openclaw nodes invoke system.run` worked
instantly on the same node.

Root cause: the CLI's default --timeout of 35s was used as the WebSocket
transport timeout for exec.approval.request, but the gateway-side
handler waits up to 120s for user approval — so the transport was always
killed 85s too early.

Fix: override opts.timeout for the approval call to
Math.max(parseTimeoutMs(opts.timeout) ?? 0, approvalTimeoutMs + 10_000)
(130s by default), ensuring the transport outlasts the approval wait
while still honoring any larger user-supplied --timeout.
This commit is contained in:
Marcus Castro
2026-02-14 21:00:01 -03:00
committed by GitHub
parent c8c8fc4530
commit 82c1d9d3ef
2 changed files with 118 additions and 2 deletions

View File

@@ -0,0 +1,104 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { parseTimeoutMs } from "../nodes-run.js";
/**
* Regression test for #12098:
* `openclaw nodes run` times out after 35s because the CLI transport timeout
* (35s default) is shorter than the exec approval timeout (120s). The
* exec.approval.request call must use a transport timeout at least as long
* as the approval timeout so the gateway has enough time to collect the
* user's decision.
*
* The root cause: callGatewayCli reads opts.timeout for the transport timeout.
* Before the fix, nodes run called callGatewayCli("exec.approval.request", opts, ...)
* without overriding opts.timeout, so the 35s CLI default raced against the
* 120s approval wait on the gateway side. The CLI always lost.
*
* The fix: override opts.timeout for the exec.approval.request call to be at
* least approvalTimeoutMs + 10_000.
*/
const callGatewaySpy = vi.fn(async () => ({ decision: "allow-once" }));
vi.mock("../../gateway/call.js", () => ({
callGateway: (...args: unknown[]) => callGatewaySpy(...args),
randomIdempotencyKey: () => "mock-key",
}));
vi.mock("../progress.js", () => ({
withProgress: (_opts: unknown, fn: () => unknown) => fn(),
}));
describe("nodes run: approval transport timeout (#12098)", () => {
beforeEach(() => {
callGatewaySpy.mockReset();
callGatewaySpy.mockResolvedValue({ decision: "allow-once" });
});
it("callGatewayCli forwards opts.timeout as the transport timeoutMs", async () => {
const { callGatewayCli } = await import("./rpc.js");
await callGatewayCli("exec.approval.request", { timeout: "35000" } as never, {
timeoutMs: 120_000,
});
expect(callGatewaySpy).toHaveBeenCalledTimes(1);
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
expect(callOpts.method).toBe("exec.approval.request");
expect(callOpts.timeoutMs).toBe(35_000);
});
it("fix: overriding opts.timeout gives the approval enough transport time", async () => {
const { callGatewayCli } = await import("./rpc.js");
const approvalTimeoutMs = 120_000;
// Mirror the production code: parseTimeoutMs(opts.timeout) ?? 0
const fixedTimeout = String(Math.max(parseTimeoutMs("35000") ?? 0, approvalTimeoutMs + 10_000));
expect(Number(fixedTimeout)).toBe(130_000);
await callGatewayCli("exec.approval.request", { timeout: fixedTimeout } as never, {
timeoutMs: approvalTimeoutMs,
});
expect(callGatewaySpy).toHaveBeenCalledTimes(1);
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
expect(callOpts.timeoutMs).toBeGreaterThanOrEqual(approvalTimeoutMs);
expect(callOpts.timeoutMs).toBe(130_000);
});
it("fix: user-specified timeout larger than approval is preserved", async () => {
const { callGatewayCli } = await import("./rpc.js");
const approvalTimeoutMs = 120_000;
const userTimeout = 200_000;
// Mirror the production code: parseTimeoutMs preserves valid large values
const fixedTimeout = String(
Math.max(parseTimeoutMs(String(userTimeout)) ?? 0, approvalTimeoutMs + 10_000),
);
expect(Number(fixedTimeout)).toBe(200_000);
await callGatewayCli("exec.approval.request", { timeout: fixedTimeout } as never, {
timeoutMs: approvalTimeoutMs,
});
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
expect(callOpts.timeoutMs).toBe(200_000);
});
it("fix: non-numeric timeout falls back to approval floor", async () => {
const { callGatewayCli } = await import("./rpc.js");
const approvalTimeoutMs = 120_000;
// parseTimeoutMs returns undefined for garbage input, ?? 0 ensures
// Math.max picks the approval floor instead of producing NaN
const fixedTimeout = String(Math.max(parseTimeoutMs("foo") ?? 0, approvalTimeoutMs + 10_000));
expect(Number(fixedTimeout)).toBe(130_000);
await callGatewayCli("exec.approval.request", { timeout: fixedTimeout } as never, {
timeoutMs: approvalTimeoutMs,
});
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
expect(callOpts.timeoutMs).toBe(130_000);
});
});

View File

@@ -272,7 +272,19 @@ export function registerNodesInvokeCommands(nodes: Command) {
let approvalId: string | null = null;
if (requiresAsk) {
approvalId = crypto.randomUUID();
const decisionResult = (await callGatewayCli("exec.approval.request", opts, {
const approvalTimeoutMs = 120_000;
// The CLI transport timeout (opts.timeout) must be longer than the
// gateway-side approval wait so the connection stays alive while the
// user decides. Without this override the default 35 s transport
// timeout races — and always loses — against the 120 s approval
// timeout, causing "gateway timeout after 35000ms" (#12098).
const approvalOpts = {
...opts,
timeout: String(
Math.max(parseTimeoutMs(opts.timeout) ?? 0, approvalTimeoutMs + 10_000),
),
};
const decisionResult = (await callGatewayCli("exec.approval.request", approvalOpts, {
id: approvalId,
command: rawCommand ?? argv.join(" "),
cwd: opts.cwd,
@@ -282,7 +294,7 @@ export function registerNodesInvokeCommands(nodes: Command) {
agentId,
resolvedPath: undefined,
sessionKey: undefined,
timeoutMs: 120_000,
timeoutMs: approvalTimeoutMs,
})) as { decision?: string } | null;
const decision =
decisionResult && typeof decisionResult === "object"