`openclaw nodes run` always timed out after 35s with "gateway timeout after 35000ms" even though `openclaw nodes invoke system.run` worked instantly on the same node. Root cause: the CLI's default --timeout of 35s was used as the WebSocket transport timeout for exec.approval.request, but the gateway-side handler waits up to 120s for user approval — so the transport was always killed 85s too early. Fix: override opts.timeout for the approval call to Math.max(parseTimeoutMs(opts.timeout) ?? 0, approvalTimeoutMs + 10_000) (130s by default), ensuring the transport outlasts the approval wait while still honoring any larger user-supplied --timeout.
This commit is contained in:
@@ -0,0 +1,104 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { parseTimeoutMs } from "../nodes-run.js";
|
||||
|
||||
/**
|
||||
* Regression test for #12098:
|
||||
* `openclaw nodes run` times out after 35s because the CLI transport timeout
|
||||
* (35s default) is shorter than the exec approval timeout (120s). The
|
||||
* exec.approval.request call must use a transport timeout at least as long
|
||||
* as the approval timeout so the gateway has enough time to collect the
|
||||
* user's decision.
|
||||
*
|
||||
* The root cause: callGatewayCli reads opts.timeout for the transport timeout.
|
||||
* Before the fix, nodes run called callGatewayCli("exec.approval.request", opts, ...)
|
||||
* without overriding opts.timeout, so the 35s CLI default raced against the
|
||||
* 120s approval wait on the gateway side. The CLI always lost.
|
||||
*
|
||||
* The fix: override opts.timeout for the exec.approval.request call to be at
|
||||
* least approvalTimeoutMs + 10_000.
|
||||
*/
|
||||
|
||||
const callGatewaySpy = vi.fn(async () => ({ decision: "allow-once" }));
|
||||
|
||||
vi.mock("../../gateway/call.js", () => ({
|
||||
callGateway: (...args: unknown[]) => callGatewaySpy(...args),
|
||||
randomIdempotencyKey: () => "mock-key",
|
||||
}));
|
||||
|
||||
vi.mock("../progress.js", () => ({
|
||||
withProgress: (_opts: unknown, fn: () => unknown) => fn(),
|
||||
}));
|
||||
|
||||
describe("nodes run: approval transport timeout (#12098)", () => {
|
||||
beforeEach(() => {
|
||||
callGatewaySpy.mockReset();
|
||||
callGatewaySpy.mockResolvedValue({ decision: "allow-once" });
|
||||
});
|
||||
|
||||
it("callGatewayCli forwards opts.timeout as the transport timeoutMs", async () => {
|
||||
const { callGatewayCli } = await import("./rpc.js");
|
||||
|
||||
await callGatewayCli("exec.approval.request", { timeout: "35000" } as never, {
|
||||
timeoutMs: 120_000,
|
||||
});
|
||||
|
||||
expect(callGatewaySpy).toHaveBeenCalledTimes(1);
|
||||
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
|
||||
expect(callOpts.method).toBe("exec.approval.request");
|
||||
expect(callOpts.timeoutMs).toBe(35_000);
|
||||
});
|
||||
|
||||
it("fix: overriding opts.timeout gives the approval enough transport time", async () => {
|
||||
const { callGatewayCli } = await import("./rpc.js");
|
||||
|
||||
const approvalTimeoutMs = 120_000;
|
||||
// Mirror the production code: parseTimeoutMs(opts.timeout) ?? 0
|
||||
const fixedTimeout = String(Math.max(parseTimeoutMs("35000") ?? 0, approvalTimeoutMs + 10_000));
|
||||
expect(Number(fixedTimeout)).toBe(130_000);
|
||||
|
||||
await callGatewayCli("exec.approval.request", { timeout: fixedTimeout } as never, {
|
||||
timeoutMs: approvalTimeoutMs,
|
||||
});
|
||||
|
||||
expect(callGatewaySpy).toHaveBeenCalledTimes(1);
|
||||
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
|
||||
expect(callOpts.timeoutMs).toBeGreaterThanOrEqual(approvalTimeoutMs);
|
||||
expect(callOpts.timeoutMs).toBe(130_000);
|
||||
});
|
||||
|
||||
it("fix: user-specified timeout larger than approval is preserved", async () => {
|
||||
const { callGatewayCli } = await import("./rpc.js");
|
||||
|
||||
const approvalTimeoutMs = 120_000;
|
||||
const userTimeout = 200_000;
|
||||
// Mirror the production code: parseTimeoutMs preserves valid large values
|
||||
const fixedTimeout = String(
|
||||
Math.max(parseTimeoutMs(String(userTimeout)) ?? 0, approvalTimeoutMs + 10_000),
|
||||
);
|
||||
expect(Number(fixedTimeout)).toBe(200_000);
|
||||
|
||||
await callGatewayCli("exec.approval.request", { timeout: fixedTimeout } as never, {
|
||||
timeoutMs: approvalTimeoutMs,
|
||||
});
|
||||
|
||||
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
|
||||
expect(callOpts.timeoutMs).toBe(200_000);
|
||||
});
|
||||
|
||||
it("fix: non-numeric timeout falls back to approval floor", async () => {
|
||||
const { callGatewayCli } = await import("./rpc.js");
|
||||
|
||||
const approvalTimeoutMs = 120_000;
|
||||
// parseTimeoutMs returns undefined for garbage input, ?? 0 ensures
|
||||
// Math.max picks the approval floor instead of producing NaN
|
||||
const fixedTimeout = String(Math.max(parseTimeoutMs("foo") ?? 0, approvalTimeoutMs + 10_000));
|
||||
expect(Number(fixedTimeout)).toBe(130_000);
|
||||
|
||||
await callGatewayCli("exec.approval.request", { timeout: fixedTimeout } as never, {
|
||||
timeoutMs: approvalTimeoutMs,
|
||||
});
|
||||
|
||||
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
|
||||
expect(callOpts.timeoutMs).toBe(130_000);
|
||||
});
|
||||
});
|
||||
@@ -272,7 +272,19 @@ export function registerNodesInvokeCommands(nodes: Command) {
|
||||
let approvalId: string | null = null;
|
||||
if (requiresAsk) {
|
||||
approvalId = crypto.randomUUID();
|
||||
const decisionResult = (await callGatewayCli("exec.approval.request", opts, {
|
||||
const approvalTimeoutMs = 120_000;
|
||||
// The CLI transport timeout (opts.timeout) must be longer than the
|
||||
// gateway-side approval wait so the connection stays alive while the
|
||||
// user decides. Without this override the default 35 s transport
|
||||
// timeout races — and always loses — against the 120 s approval
|
||||
// timeout, causing "gateway timeout after 35000ms" (#12098).
|
||||
const approvalOpts = {
|
||||
...opts,
|
||||
timeout: String(
|
||||
Math.max(parseTimeoutMs(opts.timeout) ?? 0, approvalTimeoutMs + 10_000),
|
||||
),
|
||||
};
|
||||
const decisionResult = (await callGatewayCli("exec.approval.request", approvalOpts, {
|
||||
id: approvalId,
|
||||
command: rawCommand ?? argv.join(" "),
|
||||
cwd: opts.cwd,
|
||||
@@ -282,7 +294,7 @@ export function registerNodesInvokeCommands(nodes: Command) {
|
||||
agentId,
|
||||
resolvedPath: undefined,
|
||||
sessionKey: undefined,
|
||||
timeoutMs: 120_000,
|
||||
timeoutMs: approvalTimeoutMs,
|
||||
})) as { decision?: string } | null;
|
||||
const decision =
|
||||
decisionResult && typeof decisionResult === "object"
|
||||
|
||||
Reference in New Issue
Block a user