diff --git a/CHANGELOG.md b/CHANGELOG.md index 550083595..9276788eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ Docs: https://docs.openclaw.ai - Providers/OpenRouter: remove conflicting top-level `reasoning_effort` when injecting nested `reasoning.effort`, preventing OpenRouter 400 payload-validation failures for reasoning models. (#24120) thanks @tenequm. - Providers/Groq: avoid classifying Groq TPM limit errors as context overflow so throttling paths no longer trigger overflow recovery logic. (#16176) Thanks @dddabtc. - Gateway/WS: close repeated post-handshake `unauthorized role:*` request floods per connection and sample duplicate rejection logs, preventing a single misbehaving client from degrading gateway responsiveness. (#20168) Thanks @acy103, @vibecodooor, and @vincentkoc. +- Gateway/Restart: treat child listener PIDs as owned by the service runtime PID during restart health checks to avoid false stale-process kills and restart timeouts on launchd/systemd. (#24696) Thanks @gumadeiras. - Config/Write: apply `unsetPaths` with immutable path-copy updates so config writes never mutate caller-provided objects, and harden `openclaw config get/set/unset` path traversal by rejecting prototype-key segments and inherited-property traversal. (#24134) thanks @frankekn. - Security/Exec: detect obfuscated commands before exec allowlist decisions and require explicit approval for obfuscation patterns. (#8592) Thanks @CornBrother0x and @vincentkoc. - Security/Skills: escape user-controlled prompt, filename, and output-path values in `openai-image-gen` HTML gallery generation to prevent stored XSS in generated `index.html` output. (#12538) Thanks @CornBrother0x. diff --git a/src/cli/daemon-cli/restart-health.test.ts b/src/cli/daemon-cli/restart-health.test.ts new file mode 100644 index 000000000..2dfb5cf59 --- /dev/null +++ b/src/cli/daemon-cli/restart-health.test.ts @@ -0,0 +1,66 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { GatewayService } from "../../daemon/service.js"; +import type { PortListenerKind, PortUsage } from "../../infra/ports.js"; + +const inspectPortUsage = vi.hoisted(() => vi.fn<(port: number) => Promise>()); +const classifyPortListener = vi.hoisted(() => + vi.fn<(_listener: unknown, _port: number) => PortListenerKind>(() => "gateway"), +); + +vi.mock("../../infra/ports.js", () => ({ + classifyPortListener: (listener: unknown, port: number) => classifyPortListener(listener, port), + formatPortDiagnostics: vi.fn(() => []), + inspectPortUsage: (port: number) => inspectPortUsage(port), +})); + +describe("inspectGatewayRestart", () => { + beforeEach(() => { + inspectPortUsage.mockReset(); + inspectPortUsage.mockResolvedValue({ + port: 0, + status: "free", + listeners: [], + hints: [], + }); + classifyPortListener.mockReset(); + classifyPortListener.mockReturnValue("gateway"); + }); + + it("treats a gateway listener child pid as healthy ownership", async () => { + const service = { + readRuntime: vi.fn(async () => ({ status: "running", pid: 7000 })), + } as unknown as GatewayService; + + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "busy", + listeners: [{ pid: 7001, ppid: 7000, commandLine: "openclaw-gateway" }], + hints: [], + }); + + const { inspectGatewayRestart } = await import("./restart-health.js"); + const snapshot = await inspectGatewayRestart({ service, port: 18789 }); + + expect(snapshot.healthy).toBe(true); + expect(snapshot.staleGatewayPids).toEqual([]); + }); + + it("marks non-owned gateway listener pids as stale while runtime is running", async () => { + const service = { + readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })), + } as unknown as GatewayService; + + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "busy", + listeners: [{ pid: 9000, ppid: 8999, commandLine: "openclaw-gateway" }], + hints: [], + }); + + const { inspectGatewayRestart } = await import("./restart-health.js"); + const snapshot = await inspectGatewayRestart({ service, port: 18789 }); + + expect(snapshot.healthy).toBe(false); + expect(snapshot.staleGatewayPids).toEqual([9000]); + }); +}); diff --git a/src/cli/daemon-cli/restart-health.ts b/src/cli/daemon-cli/restart-health.ts index 4a0d5bcf4..3eb46c542 100644 --- a/src/cli/daemon-cli/restart-health.ts +++ b/src/cli/daemon-cli/restart-health.ts @@ -21,6 +21,13 @@ export type GatewayRestartSnapshot = { staleGatewayPids: number[]; }; +function listenerOwnedByRuntimePid(params: { + listener: PortUsage["listeners"][number]; + runtimePid: number; +}): boolean { + return params.listener.pid === params.runtimePid || params.listener.ppid === params.runtimePid; +} + export async function inspectGatewayRestart(params: { service: GatewayService; port: number; @@ -54,18 +61,27 @@ export async function inspectGatewayRestart(params: { ) : []; const running = runtime.status === "running"; + const runtimePid = runtime.pid; const ownsPort = - runtime.pid != null - ? portUsage.listeners.some((listener) => listener.pid === runtime.pid) + runtimePid != null + ? portUsage.listeners.some((listener) => listenerOwnedByRuntimePid({ listener, runtimePid })) : gatewayListeners.length > 0 || (portUsage.status === "busy" && portUsage.listeners.length === 0); const healthy = running && ownsPort; const staleGatewayPids = Array.from( new Set( gatewayListeners - .map((listener) => listener.pid) - .filter((pid): pid is number => Number.isFinite(pid)) - .filter((pid) => runtime.pid == null || pid !== runtime.pid || !running), + .filter((listener) => Number.isFinite(listener.pid)) + .filter((listener) => { + if (!running) { + return true; + } + if (runtimePid == null) { + return true; + } + return !listenerOwnedByRuntimePid({ listener, runtimePid }); + }) + .map((listener) => listener.pid as number), ), ); diff --git a/src/infra/ports-inspect.ts b/src/infra/ports-inspect.ts index d6c172a7b..344086ae1 100644 --- a/src/infra/ports-inspect.ts +++ b/src/infra/ports-inspect.ts @@ -75,6 +75,16 @@ async function resolveUnixUser(pid: number): Promise { return line || undefined; } +async function resolveUnixParentPid(pid: number): Promise { + const res = await runCommandSafe(["ps", "-p", String(pid), "-o", "ppid="]); + if (res.code !== 0) { + return undefined; + } + const line = res.stdout.trim(); + const parentPid = Number.parseInt(line, 10); + return Number.isFinite(parentPid) && parentPid > 0 ? parentPid : undefined; +} + async function readUnixListeners( port: number, ): Promise<{ listeners: PortListener[]; detail?: string; errors: string[] }> { @@ -88,9 +98,10 @@ async function readUnixListeners( if (!listener.pid) { return; } - const [commandLine, user] = await Promise.all([ + const [commandLine, user, parentPid] = await Promise.all([ resolveUnixCommandLine(listener.pid), resolveUnixUser(listener.pid), + resolveUnixParentPid(listener.pid), ]); if (commandLine) { listener.commandLine = commandLine; @@ -98,6 +109,9 @@ async function readUnixListeners( if (user) { listener.user = user; } + if (parentPid !== undefined) { + listener.ppid = parentPid; + } }), ); return { listeners, detail: res.stdout.trim() || undefined, errors }; diff --git a/src/infra/ports-types.ts b/src/infra/ports-types.ts index 56accc93a..827a5b3ad 100644 --- a/src/infra/ports-types.ts +++ b/src/infra/ports-types.ts @@ -1,5 +1,6 @@ export type PortListener = { pid?: number; + ppid?: number; command?: string; commandLine?: string; user?: string;