The gateway unconditionally scheduled a SIGUSR1 restart after every update.run call, even when the update itself failed (broken deps, build errors, etc.). This left the process restarting into a broken state — corrupted node_modules, partial builds — causing a crash loop that required manual intervention. Three fixes: 1. Only restart on success: scheduleGatewaySigusr1Restart is now gated on result.status === "ok". Failed or skipped updates still write the restart sentinel (so the status can be reported back to the user) but the running gateway stays alive. 2. Early bail on step failure: deps install, build, and ui:build now check exit codes immediately (matching the preflight section) so a failed deps install no longer cascades into a broken build and ui:build. 3. Auto-repair config during update: the doctor step now runs with --fix alongside --non-interactive, so unknown config keys left over from schema changes between versions are stripped automatically instead of causing a startup validation crash.
119 lines
3.7 KiB
TypeScript
119 lines
3.7 KiB
TypeScript
import type { GatewayRequestHandlers } from "./types.js";
|
|
import { loadConfig } from "../../config/config.js";
|
|
import { extractDeliveryInfo } from "../../config/sessions.js";
|
|
import { resolveOpenClawPackageRoot } from "../../infra/openclaw-root.js";
|
|
import {
|
|
formatDoctorNonInteractiveHint,
|
|
type RestartSentinelPayload,
|
|
writeRestartSentinel,
|
|
} from "../../infra/restart-sentinel.js";
|
|
import { scheduleGatewaySigusr1Restart } from "../../infra/restart.js";
|
|
import { normalizeUpdateChannel } from "../../infra/update-channels.js";
|
|
import { runGatewayUpdate } from "../../infra/update-runner.js";
|
|
import { validateUpdateRunParams } from "../protocol/index.js";
|
|
import { parseRestartRequestParams } from "./restart-request.js";
|
|
import { assertValidParams } from "./validation.js";
|
|
|
|
export const updateHandlers: GatewayRequestHandlers = {
|
|
"update.run": async ({ params, respond }) => {
|
|
if (!assertValidParams(params, validateUpdateRunParams, "update.run", respond)) {
|
|
return;
|
|
}
|
|
const { sessionKey, note, restartDelayMs } = parseRestartRequestParams(params);
|
|
const { deliveryContext, threadId } = extractDeliveryInfo(sessionKey);
|
|
const timeoutMsRaw = (params as { timeoutMs?: unknown }).timeoutMs;
|
|
const timeoutMs =
|
|
typeof timeoutMsRaw === "number" && Number.isFinite(timeoutMsRaw)
|
|
? Math.max(1000, Math.floor(timeoutMsRaw))
|
|
: undefined;
|
|
|
|
let result: Awaited<ReturnType<typeof runGatewayUpdate>>;
|
|
try {
|
|
const config = loadConfig();
|
|
const configChannel = normalizeUpdateChannel(config.update?.channel);
|
|
const root =
|
|
(await resolveOpenClawPackageRoot({
|
|
moduleUrl: import.meta.url,
|
|
argv1: process.argv[1],
|
|
cwd: process.cwd(),
|
|
})) ?? process.cwd();
|
|
result = await runGatewayUpdate({
|
|
timeoutMs,
|
|
cwd: root,
|
|
argv1: process.argv[1],
|
|
channel: configChannel ?? undefined,
|
|
});
|
|
} catch (err) {
|
|
result = {
|
|
status: "error",
|
|
mode: "unknown",
|
|
reason: String(err),
|
|
steps: [],
|
|
durationMs: 0,
|
|
};
|
|
}
|
|
|
|
const payload: RestartSentinelPayload = {
|
|
kind: "update",
|
|
status: result.status,
|
|
ts: Date.now(),
|
|
sessionKey,
|
|
deliveryContext,
|
|
threadId,
|
|
message: note ?? null,
|
|
doctorHint: formatDoctorNonInteractiveHint(),
|
|
stats: {
|
|
mode: result.mode,
|
|
root: result.root ?? undefined,
|
|
before: result.before ?? null,
|
|
after: result.after ?? null,
|
|
steps: result.steps.map((step) => ({
|
|
name: step.name,
|
|
command: step.command,
|
|
cwd: step.cwd,
|
|
durationMs: step.durationMs,
|
|
log: {
|
|
stdoutTail: step.stdoutTail ?? null,
|
|
stderrTail: step.stderrTail ?? null,
|
|
exitCode: step.exitCode ?? null,
|
|
},
|
|
})),
|
|
reason: result.reason ?? null,
|
|
durationMs: result.durationMs,
|
|
},
|
|
};
|
|
|
|
let sentinelPath: string | null = null;
|
|
try {
|
|
sentinelPath = await writeRestartSentinel(payload);
|
|
} catch {
|
|
sentinelPath = null;
|
|
}
|
|
|
|
// Only restart the gateway when the update actually succeeded.
|
|
// Restarting after a failed update leaves the process in a broken state
|
|
// (corrupted node_modules, partial builds) and causes a crash loop.
|
|
const restart =
|
|
result.status === "ok"
|
|
? scheduleGatewaySigusr1Restart({
|
|
delayMs: restartDelayMs,
|
|
reason: "update.run",
|
|
})
|
|
: null;
|
|
|
|
respond(
|
|
true,
|
|
{
|
|
ok: result.status !== "error",
|
|
result,
|
|
restart,
|
|
sentinel: {
|
|
path: sentinelPath,
|
|
payload,
|
|
},
|
|
},
|
|
undefined,
|
|
);
|
|
},
|
|
};
|