fix(update): prevent gateway crash loop after failed self-update

The gateway unconditionally scheduled a SIGUSR1 restart after every
update.run call, even when the update itself failed (broken deps,
build errors, etc.). This left the process restarting into a broken
state — corrupted node_modules, partial builds — causing a crash loop
that required manual intervention.

Three fixes:

1. Only restart on success: scheduleGatewaySigusr1Restart is now
   gated on result.status === "ok". Failed or skipped updates still
   write the restart sentinel (so the status can be reported back to
   the user) but the running gateway stays alive.

2. Early bail on step failure: deps install, build, and ui:build now
   check exit codes immediately (matching the preflight section) so a
   failed deps install no longer cascades into a broken build and
   ui:build.

3. Auto-repair config during update: the doctor step now runs with
   --fix alongside --non-interactive, so unknown config keys left over
   from schema changes between versions are stripped automatically
   instead of causing a startup validation crash.
This commit is contained in:
Rami Abdelrazzaq
2026-02-16 14:46:51 +00:00
committed by Peter Steinberger
parent 671f913123
commit 0b8b95f2c9
3 changed files with 115 additions and 10 deletions

View File

@@ -90,15 +90,21 @@ export const updateHandlers: GatewayRequestHandlers = {
sentinelPath = null;
}
const restart = scheduleGatewaySigusr1Restart({
delayMs: restartDelayMs,
reason: "update.run",
});
// Only restart the gateway when the update actually succeeded.
// Restarting after a failed update leaves the process in a broken state
// (corrupted node_modules, partial builds) and causes a crash loop.
const restart =
result.status === "ok"
? scheduleGatewaySigusr1Restart({
delayMs: restartDelayMs,
reason: "update.run",
})
: null;
respond(
true,
{
ok: true,
ok: result.status !== "error",
result,
restart,
sentinel: {

View File

@@ -55,7 +55,7 @@ describe("runGatewayUpdate", () => {
}) {
const calls: string[] = [];
let uiBuildCount = 0;
const doctorKey = `${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive`;
const doctorKey = `${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive --fix`;
const runCommand = async (argv: string[]) => {
const key = argv.join(" ");
@@ -173,6 +173,69 @@ describe("runGatewayUpdate", () => {
expect(calls.some((call) => call.includes("rebase --abort"))).toBe(true);
});
it("returns error and stops early when deps install fails", async () => {
await fs.mkdir(path.join(tempDir, ".git"));
await fs.writeFile(
path.join(tempDir, "package.json"),
JSON.stringify({ name: "openclaw", version: "1.0.0", packageManager: "pnpm@8.0.0" }),
"utf-8",
);
const stableTag = "v1.0.1-1";
const { runner, calls } = createRunner({
[`git -C ${tempDir} rev-parse --show-toplevel`]: { stdout: tempDir },
[`git -C ${tempDir} rev-parse HEAD`]: { stdout: "abc123" },
[`git -C ${tempDir} status --porcelain -- :!dist/control-ui/`]: { stdout: "" },
[`git -C ${tempDir} fetch --all --prune --tags`]: { stdout: "" },
[`git -C ${tempDir} tag --list v* --sort=-v:refname`]: { stdout: `${stableTag}\n` },
[`git -C ${tempDir} checkout --detach ${stableTag}`]: { stdout: "" },
"pnpm install": { code: 1, stderr: "ERR_PNPM_NETWORK" },
});
const result = await runGatewayUpdate({
cwd: tempDir,
runCommand: async (argv, _options) => runner(argv),
timeoutMs: 5000,
channel: "stable",
});
expect(result.status).toBe("error");
expect(result.reason).toBe("deps-install-failed");
expect(calls.some((call) => call === "pnpm build")).toBe(false);
expect(calls.some((call) => call === "pnpm ui:build")).toBe(false);
});
it("returns error and stops early when build fails", async () => {
await fs.mkdir(path.join(tempDir, ".git"));
await fs.writeFile(
path.join(tempDir, "package.json"),
JSON.stringify({ name: "openclaw", version: "1.0.0", packageManager: "pnpm@8.0.0" }),
"utf-8",
);
const stableTag = "v1.0.1-1";
const { runner, calls } = createRunner({
[`git -C ${tempDir} rev-parse --show-toplevel`]: { stdout: tempDir },
[`git -C ${tempDir} rev-parse HEAD`]: { stdout: "abc123" },
[`git -C ${tempDir} status --porcelain -- :!dist/control-ui/`]: { stdout: "" },
[`git -C ${tempDir} fetch --all --prune --tags`]: { stdout: "" },
[`git -C ${tempDir} tag --list v* --sort=-v:refname`]: { stdout: `${stableTag}\n` },
[`git -C ${tempDir} checkout --detach ${stableTag}`]: { stdout: "" },
"pnpm install": { stdout: "" },
"pnpm build": { code: 1, stderr: "tsc: error TS2345" },
});
const result = await runGatewayUpdate({
cwd: tempDir,
runCommand: async (argv, _options) => runner(argv),
timeoutMs: 5000,
channel: "stable",
});
expect(result.status).toBe("error");
expect(result.reason).toBe("build-failed");
expect(calls.some((call) => call === "pnpm install")).toBe(true);
expect(calls.some((call) => call === "pnpm ui:build")).toBe(false);
});
it("uses stable tag when beta tag is older than release", async () => {
await setupGitCheckout({ packageManager: "pnpm@8.0.0" });
await setupUiIndex();
@@ -190,9 +253,10 @@ describe("runGatewayUpdate", () => {
"pnpm install": { stdout: "" },
"pnpm build": { stdout: "" },
"pnpm ui:build": { stdout: "" },
[`${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive`]: {
stdout: "",
},
[`${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive --fix`]:
{
stdout: "",
},
});
const result = await runGatewayUpdate({

View File

@@ -705,14 +705,47 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<
const depsStep = await runStep(step("deps install", managerInstallArgs(manager), gitRoot));
steps.push(depsStep);
if (depsStep.exitCode !== 0) {
return {
status: "error",
mode: "git",
root: gitRoot,
reason: "deps-install-failed",
before: { sha: beforeSha, version: beforeVersion },
steps,
durationMs: Date.now() - startedAt,
};
}
const buildStep = await runStep(step("build", managerScriptArgs(manager, "build"), gitRoot));
steps.push(buildStep);
if (buildStep.exitCode !== 0) {
return {
status: "error",
mode: "git",
root: gitRoot,
reason: "build-failed",
before: { sha: beforeSha, version: beforeVersion },
steps,
durationMs: Date.now() - startedAt,
};
}
const uiBuildStep = await runStep(
step("ui:build", managerScriptArgs(manager, "ui:build"), gitRoot),
);
steps.push(uiBuildStep);
if (uiBuildStep.exitCode !== 0) {
return {
status: "error",
mode: "git",
root: gitRoot,
reason: "ui-build-failed",
before: { sha: beforeSha, version: beforeVersion },
steps,
durationMs: Date.now() - startedAt,
};
}
const doctorEntry = path.join(gitRoot, "openclaw.mjs");
const doctorEntryExists = await fs
@@ -739,7 +772,9 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<
};
}
const doctorArgv = [process.execPath, doctorEntry, "doctor", "--non-interactive"];
// Use --fix so that doctor auto-strips unknown config keys introduced by
// schema changes between versions, preventing a startup validation crash.
const doctorArgv = [process.execPath, doctorEntry, "doctor", "--non-interactive", "--fix"];
const doctorStep = await runStep(
step("openclaw doctor", doctorArgv, gitRoot, { OPENCLAW_UPDATE_IN_PROGRESS: "1" }),
);