From 0b8b95f2c97f6ececcd28a7a8b2a7557931ef62f Mon Sep 17 00:00:00 2001 From: Rami Abdelrazzaq Date: Mon, 16 Feb 2026 14:46:51 +0000 Subject: [PATCH] fix(update): prevent gateway crash loop after failed self-update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gateway unconditionally scheduled a SIGUSR1 restart after every update.run call, even when the update itself failed (broken deps, build errors, etc.). This left the process restarting into a broken state — corrupted node_modules, partial builds — causing a crash loop that required manual intervention. Three fixes: 1. Only restart on success: scheduleGatewaySigusr1Restart is now gated on result.status === "ok". Failed or skipped updates still write the restart sentinel (so the status can be reported back to the user) but the running gateway stays alive. 2. Early bail on step failure: deps install, build, and ui:build now check exit codes immediately (matching the preflight section) so a failed deps install no longer cascades into a broken build and ui:build. 3. Auto-repair config during update: the doctor step now runs with --fix alongside --non-interactive, so unknown config keys left over from schema changes between versions are stripped automatically instead of causing a startup validation crash. --- src/gateway/server-methods/update.ts | 16 +++++-- src/infra/update-runner.test.ts | 72 ++++++++++++++++++++++++++-- src/infra/update-runner.ts | 37 +++++++++++++- 3 files changed, 115 insertions(+), 10 deletions(-) diff --git a/src/gateway/server-methods/update.ts b/src/gateway/server-methods/update.ts index a4eb5a93c..a72e2d0ab 100644 --- a/src/gateway/server-methods/update.ts +++ b/src/gateway/server-methods/update.ts @@ -90,15 +90,21 @@ export const updateHandlers: GatewayRequestHandlers = { sentinelPath = null; } - const restart = scheduleGatewaySigusr1Restart({ - delayMs: restartDelayMs, - reason: "update.run", - }); + // Only restart the gateway when the update actually succeeded. + // Restarting after a failed update leaves the process in a broken state + // (corrupted node_modules, partial builds) and causes a crash loop. + const restart = + result.status === "ok" + ? scheduleGatewaySigusr1Restart({ + delayMs: restartDelayMs, + reason: "update.run", + }) + : null; respond( true, { - ok: true, + ok: result.status !== "error", result, restart, sentinel: { diff --git a/src/infra/update-runner.test.ts b/src/infra/update-runner.test.ts index 912a67a14..31766593b 100644 --- a/src/infra/update-runner.test.ts +++ b/src/infra/update-runner.test.ts @@ -55,7 +55,7 @@ describe("runGatewayUpdate", () => { }) { const calls: string[] = []; let uiBuildCount = 0; - const doctorKey = `${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive`; + const doctorKey = `${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive --fix`; const runCommand = async (argv: string[]) => { const key = argv.join(" "); @@ -173,6 +173,69 @@ describe("runGatewayUpdate", () => { expect(calls.some((call) => call.includes("rebase --abort"))).toBe(true); }); + it("returns error and stops early when deps install fails", async () => { + await fs.mkdir(path.join(tempDir, ".git")); + await fs.writeFile( + path.join(tempDir, "package.json"), + JSON.stringify({ name: "openclaw", version: "1.0.0", packageManager: "pnpm@8.0.0" }), + "utf-8", + ); + const stableTag = "v1.0.1-1"; + const { runner, calls } = createRunner({ + [`git -C ${tempDir} rev-parse --show-toplevel`]: { stdout: tempDir }, + [`git -C ${tempDir} rev-parse HEAD`]: { stdout: "abc123" }, + [`git -C ${tempDir} status --porcelain -- :!dist/control-ui/`]: { stdout: "" }, + [`git -C ${tempDir} fetch --all --prune --tags`]: { stdout: "" }, + [`git -C ${tempDir} tag --list v* --sort=-v:refname`]: { stdout: `${stableTag}\n` }, + [`git -C ${tempDir} checkout --detach ${stableTag}`]: { stdout: "" }, + "pnpm install": { code: 1, stderr: "ERR_PNPM_NETWORK" }, + }); + + const result = await runGatewayUpdate({ + cwd: tempDir, + runCommand: async (argv, _options) => runner(argv), + timeoutMs: 5000, + channel: "stable", + }); + + expect(result.status).toBe("error"); + expect(result.reason).toBe("deps-install-failed"); + expect(calls.some((call) => call === "pnpm build")).toBe(false); + expect(calls.some((call) => call === "pnpm ui:build")).toBe(false); + }); + + it("returns error and stops early when build fails", async () => { + await fs.mkdir(path.join(tempDir, ".git")); + await fs.writeFile( + path.join(tempDir, "package.json"), + JSON.stringify({ name: "openclaw", version: "1.0.0", packageManager: "pnpm@8.0.0" }), + "utf-8", + ); + const stableTag = "v1.0.1-1"; + const { runner, calls } = createRunner({ + [`git -C ${tempDir} rev-parse --show-toplevel`]: { stdout: tempDir }, + [`git -C ${tempDir} rev-parse HEAD`]: { stdout: "abc123" }, + [`git -C ${tempDir} status --porcelain -- :!dist/control-ui/`]: { stdout: "" }, + [`git -C ${tempDir} fetch --all --prune --tags`]: { stdout: "" }, + [`git -C ${tempDir} tag --list v* --sort=-v:refname`]: { stdout: `${stableTag}\n` }, + [`git -C ${tempDir} checkout --detach ${stableTag}`]: { stdout: "" }, + "pnpm install": { stdout: "" }, + "pnpm build": { code: 1, stderr: "tsc: error TS2345" }, + }); + + const result = await runGatewayUpdate({ + cwd: tempDir, + runCommand: async (argv, _options) => runner(argv), + timeoutMs: 5000, + channel: "stable", + }); + + expect(result.status).toBe("error"); + expect(result.reason).toBe("build-failed"); + expect(calls.some((call) => call === "pnpm install")).toBe(true); + expect(calls.some((call) => call === "pnpm ui:build")).toBe(false); + }); + it("uses stable tag when beta tag is older than release", async () => { await setupGitCheckout({ packageManager: "pnpm@8.0.0" }); await setupUiIndex(); @@ -190,9 +253,10 @@ describe("runGatewayUpdate", () => { "pnpm install": { stdout: "" }, "pnpm build": { stdout: "" }, "pnpm ui:build": { stdout: "" }, - [`${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive`]: { - stdout: "", - }, + [`${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive --fix`]: + { + stdout: "", + }, }); const result = await runGatewayUpdate({ diff --git a/src/infra/update-runner.ts b/src/infra/update-runner.ts index 4e84cea40..6631b6dd3 100644 --- a/src/infra/update-runner.ts +++ b/src/infra/update-runner.ts @@ -705,14 +705,47 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise< const depsStep = await runStep(step("deps install", managerInstallArgs(manager), gitRoot)); steps.push(depsStep); + if (depsStep.exitCode !== 0) { + return { + status: "error", + mode: "git", + root: gitRoot, + reason: "deps-install-failed", + before: { sha: beforeSha, version: beforeVersion }, + steps, + durationMs: Date.now() - startedAt, + }; + } const buildStep = await runStep(step("build", managerScriptArgs(manager, "build"), gitRoot)); steps.push(buildStep); + if (buildStep.exitCode !== 0) { + return { + status: "error", + mode: "git", + root: gitRoot, + reason: "build-failed", + before: { sha: beforeSha, version: beforeVersion }, + steps, + durationMs: Date.now() - startedAt, + }; + } const uiBuildStep = await runStep( step("ui:build", managerScriptArgs(manager, "ui:build"), gitRoot), ); steps.push(uiBuildStep); + if (uiBuildStep.exitCode !== 0) { + return { + status: "error", + mode: "git", + root: gitRoot, + reason: "ui-build-failed", + before: { sha: beforeSha, version: beforeVersion }, + steps, + durationMs: Date.now() - startedAt, + }; + } const doctorEntry = path.join(gitRoot, "openclaw.mjs"); const doctorEntryExists = await fs @@ -739,7 +772,9 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise< }; } - const doctorArgv = [process.execPath, doctorEntry, "doctor", "--non-interactive"]; + // Use --fix so that doctor auto-strips unknown config keys introduced by + // schema changes between versions, preventing a startup validation crash. + const doctorArgv = [process.execPath, doctorEntry, "doctor", "--non-interactive", "--fix"]; const doctorStep = await runStep( step("openclaw doctor", doctorArgv, gitRoot, { OPENCLAW_UPDATE_IN_PROGRESS: "1" }), );