fix(update): prevent gateway crash loop after failed self-update
The gateway unconditionally scheduled a SIGUSR1 restart after every update.run call, even when the update itself failed (broken deps, build errors, etc.). This left the process restarting into a broken state — corrupted node_modules, partial builds — causing a crash loop that required manual intervention. Three fixes: 1. Only restart on success: scheduleGatewaySigusr1Restart is now gated on result.status === "ok". Failed or skipped updates still write the restart sentinel (so the status can be reported back to the user) but the running gateway stays alive. 2. Early bail on step failure: deps install, build, and ui:build now check exit codes immediately (matching the preflight section) so a failed deps install no longer cascades into a broken build and ui:build. 3. Auto-repair config during update: the doctor step now runs with --fix alongside --non-interactive, so unknown config keys left over from schema changes between versions are stripped automatically instead of causing a startup validation crash.
This commit is contained in:
committed by
Peter Steinberger
parent
671f913123
commit
0b8b95f2c9
@@ -90,15 +90,21 @@ export const updateHandlers: GatewayRequestHandlers = {
|
||||
sentinelPath = null;
|
||||
}
|
||||
|
||||
const restart = scheduleGatewaySigusr1Restart({
|
||||
delayMs: restartDelayMs,
|
||||
reason: "update.run",
|
||||
});
|
||||
// Only restart the gateway when the update actually succeeded.
|
||||
// Restarting after a failed update leaves the process in a broken state
|
||||
// (corrupted node_modules, partial builds) and causes a crash loop.
|
||||
const restart =
|
||||
result.status === "ok"
|
||||
? scheduleGatewaySigusr1Restart({
|
||||
delayMs: restartDelayMs,
|
||||
reason: "update.run",
|
||||
})
|
||||
: null;
|
||||
|
||||
respond(
|
||||
true,
|
||||
{
|
||||
ok: true,
|
||||
ok: result.status !== "error",
|
||||
result,
|
||||
restart,
|
||||
sentinel: {
|
||||
|
||||
@@ -55,7 +55,7 @@ describe("runGatewayUpdate", () => {
|
||||
}) {
|
||||
const calls: string[] = [];
|
||||
let uiBuildCount = 0;
|
||||
const doctorKey = `${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive`;
|
||||
const doctorKey = `${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive --fix`;
|
||||
|
||||
const runCommand = async (argv: string[]) => {
|
||||
const key = argv.join(" ");
|
||||
@@ -173,6 +173,69 @@ describe("runGatewayUpdate", () => {
|
||||
expect(calls.some((call) => call.includes("rebase --abort"))).toBe(true);
|
||||
});
|
||||
|
||||
it("returns error and stops early when deps install fails", async () => {
|
||||
await fs.mkdir(path.join(tempDir, ".git"));
|
||||
await fs.writeFile(
|
||||
path.join(tempDir, "package.json"),
|
||||
JSON.stringify({ name: "openclaw", version: "1.0.0", packageManager: "pnpm@8.0.0" }),
|
||||
"utf-8",
|
||||
);
|
||||
const stableTag = "v1.0.1-1";
|
||||
const { runner, calls } = createRunner({
|
||||
[`git -C ${tempDir} rev-parse --show-toplevel`]: { stdout: tempDir },
|
||||
[`git -C ${tempDir} rev-parse HEAD`]: { stdout: "abc123" },
|
||||
[`git -C ${tempDir} status --porcelain -- :!dist/control-ui/`]: { stdout: "" },
|
||||
[`git -C ${tempDir} fetch --all --prune --tags`]: { stdout: "" },
|
||||
[`git -C ${tempDir} tag --list v* --sort=-v:refname`]: { stdout: `${stableTag}\n` },
|
||||
[`git -C ${tempDir} checkout --detach ${stableTag}`]: { stdout: "" },
|
||||
"pnpm install": { code: 1, stderr: "ERR_PNPM_NETWORK" },
|
||||
});
|
||||
|
||||
const result = await runGatewayUpdate({
|
||||
cwd: tempDir,
|
||||
runCommand: async (argv, _options) => runner(argv),
|
||||
timeoutMs: 5000,
|
||||
channel: "stable",
|
||||
});
|
||||
|
||||
expect(result.status).toBe("error");
|
||||
expect(result.reason).toBe("deps-install-failed");
|
||||
expect(calls.some((call) => call === "pnpm build")).toBe(false);
|
||||
expect(calls.some((call) => call === "pnpm ui:build")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns error and stops early when build fails", async () => {
|
||||
await fs.mkdir(path.join(tempDir, ".git"));
|
||||
await fs.writeFile(
|
||||
path.join(tempDir, "package.json"),
|
||||
JSON.stringify({ name: "openclaw", version: "1.0.0", packageManager: "pnpm@8.0.0" }),
|
||||
"utf-8",
|
||||
);
|
||||
const stableTag = "v1.0.1-1";
|
||||
const { runner, calls } = createRunner({
|
||||
[`git -C ${tempDir} rev-parse --show-toplevel`]: { stdout: tempDir },
|
||||
[`git -C ${tempDir} rev-parse HEAD`]: { stdout: "abc123" },
|
||||
[`git -C ${tempDir} status --porcelain -- :!dist/control-ui/`]: { stdout: "" },
|
||||
[`git -C ${tempDir} fetch --all --prune --tags`]: { stdout: "" },
|
||||
[`git -C ${tempDir} tag --list v* --sort=-v:refname`]: { stdout: `${stableTag}\n` },
|
||||
[`git -C ${tempDir} checkout --detach ${stableTag}`]: { stdout: "" },
|
||||
"pnpm install": { stdout: "" },
|
||||
"pnpm build": { code: 1, stderr: "tsc: error TS2345" },
|
||||
});
|
||||
|
||||
const result = await runGatewayUpdate({
|
||||
cwd: tempDir,
|
||||
runCommand: async (argv, _options) => runner(argv),
|
||||
timeoutMs: 5000,
|
||||
channel: "stable",
|
||||
});
|
||||
|
||||
expect(result.status).toBe("error");
|
||||
expect(result.reason).toBe("build-failed");
|
||||
expect(calls.some((call) => call === "pnpm install")).toBe(true);
|
||||
expect(calls.some((call) => call === "pnpm ui:build")).toBe(false);
|
||||
});
|
||||
|
||||
it("uses stable tag when beta tag is older than release", async () => {
|
||||
await setupGitCheckout({ packageManager: "pnpm@8.0.0" });
|
||||
await setupUiIndex();
|
||||
@@ -190,9 +253,10 @@ describe("runGatewayUpdate", () => {
|
||||
"pnpm install": { stdout: "" },
|
||||
"pnpm build": { stdout: "" },
|
||||
"pnpm ui:build": { stdout: "" },
|
||||
[`${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive`]: {
|
||||
stdout: "",
|
||||
},
|
||||
[`${process.execPath} ${path.join(tempDir, "openclaw.mjs")} doctor --non-interactive --fix`]:
|
||||
{
|
||||
stdout: "",
|
||||
},
|
||||
});
|
||||
|
||||
const result = await runGatewayUpdate({
|
||||
|
||||
@@ -705,14 +705,47 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<
|
||||
|
||||
const depsStep = await runStep(step("deps install", managerInstallArgs(manager), gitRoot));
|
||||
steps.push(depsStep);
|
||||
if (depsStep.exitCode !== 0) {
|
||||
return {
|
||||
status: "error",
|
||||
mode: "git",
|
||||
root: gitRoot,
|
||||
reason: "deps-install-failed",
|
||||
before: { sha: beforeSha, version: beforeVersion },
|
||||
steps,
|
||||
durationMs: Date.now() - startedAt,
|
||||
};
|
||||
}
|
||||
|
||||
const buildStep = await runStep(step("build", managerScriptArgs(manager, "build"), gitRoot));
|
||||
steps.push(buildStep);
|
||||
if (buildStep.exitCode !== 0) {
|
||||
return {
|
||||
status: "error",
|
||||
mode: "git",
|
||||
root: gitRoot,
|
||||
reason: "build-failed",
|
||||
before: { sha: beforeSha, version: beforeVersion },
|
||||
steps,
|
||||
durationMs: Date.now() - startedAt,
|
||||
};
|
||||
}
|
||||
|
||||
const uiBuildStep = await runStep(
|
||||
step("ui:build", managerScriptArgs(manager, "ui:build"), gitRoot),
|
||||
);
|
||||
steps.push(uiBuildStep);
|
||||
if (uiBuildStep.exitCode !== 0) {
|
||||
return {
|
||||
status: "error",
|
||||
mode: "git",
|
||||
root: gitRoot,
|
||||
reason: "ui-build-failed",
|
||||
before: { sha: beforeSha, version: beforeVersion },
|
||||
steps,
|
||||
durationMs: Date.now() - startedAt,
|
||||
};
|
||||
}
|
||||
|
||||
const doctorEntry = path.join(gitRoot, "openclaw.mjs");
|
||||
const doctorEntryExists = await fs
|
||||
@@ -739,7 +772,9 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise<
|
||||
};
|
||||
}
|
||||
|
||||
const doctorArgv = [process.execPath, doctorEntry, "doctor", "--non-interactive"];
|
||||
// Use --fix so that doctor auto-strips unknown config keys introduced by
|
||||
// schema changes between versions, preventing a startup validation crash.
|
||||
const doctorArgv = [process.execPath, doctorEntry, "doctor", "--non-interactive", "--fix"];
|
||||
const doctorStep = await runStep(
|
||||
step("openclaw doctor", doctorArgv, gitRoot, { OPENCLAW_UPDATE_IN_PROGRESS: "1" }),
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user