diff --git a/src/cli/daemon-cli/lifecycle.ts b/src/cli/daemon-cli/lifecycle.ts index f6d230f0b..9c23011d2 100644 --- a/src/cli/daemon-cli/lifecycle.ts +++ b/src/cli/daemon-cli/lifecycle.ts @@ -88,6 +88,7 @@ export async function runDaemonRestart(opts: DaemonLifecycleOptions = {}): Promi port: restartPort, attempts: POST_RESTART_HEALTH_ATTEMPTS, delayMs: POST_RESTART_HEALTH_DELAY_MS, + includeUnknownListenersAsStale: process.platform === "win32", }); if (!health.healthy && health.staleGatewayPids.length > 0) { @@ -105,6 +106,7 @@ export async function runDaemonRestart(opts: DaemonLifecycleOptions = {}): Promi port: restartPort, attempts: POST_RESTART_HEALTH_ATTEMPTS, delayMs: POST_RESTART_HEALTH_DELAY_MS, + includeUnknownListenersAsStale: process.platform === "win32", }); } diff --git a/src/cli/daemon-cli/restart-health.test.ts b/src/cli/daemon-cli/restart-health.test.ts index 2dfb5cf59..647ca00fd 100644 --- a/src/cli/daemon-cli/restart-health.test.ts +++ b/src/cli/daemon-cli/restart-health.test.ts @@ -1,4 +1,4 @@ -import { beforeEach, describe, expect, it, vi } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { GatewayService } from "../../daemon/service.js"; import type { PortListenerKind, PortUsage } from "../../infra/ports.js"; @@ -13,6 +13,8 @@ vi.mock("../../infra/ports.js", () => ({ inspectPortUsage: (port: number) => inspectPortUsage(port), })); +const originalPlatform = process.platform; + describe("inspectGatewayRestart", () => { beforeEach(() => { inspectPortUsage.mockReset(); @@ -26,6 +28,10 @@ describe("inspectGatewayRestart", () => { classifyPortListener.mockReturnValue("gateway"); }); + afterEach(() => { + Object.defineProperty(process, "platform", { value: originalPlatform, configurable: true }); + }); + it("treats a gateway listener child pid as healthy ownership", async () => { const service = { readRuntime: vi.fn(async () => ({ status: "running", pid: 7000 })), @@ -63,4 +69,104 @@ describe("inspectGatewayRestart", () => { expect(snapshot.healthy).toBe(false); expect(snapshot.staleGatewayPids).toEqual([9000]); }); + + it("treats unknown listeners as stale on Windows when enabled", async () => { + Object.defineProperty(process, "platform", { value: "win32", configurable: true }); + classifyPortListener.mockReturnValue("unknown"); + + const service = { + readRuntime: vi.fn(async () => ({ status: "stopped" })), + } as unknown as GatewayService; + + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "busy", + listeners: [{ pid: 10920, command: "unknown" }], + hints: [], + }); + + const { inspectGatewayRestart } = await import("./restart-health.js"); + const snapshot = await inspectGatewayRestart({ + service, + port: 18789, + includeUnknownListenersAsStale: true, + }); + + expect(snapshot.staleGatewayPids).toEqual([10920]); + }); + + it("does not treat unknown listeners as stale when fallback is disabled", async () => { + Object.defineProperty(process, "platform", { value: "win32", configurable: true }); + classifyPortListener.mockReturnValue("unknown"); + + const service = { + readRuntime: vi.fn(async () => ({ status: "stopped" })), + } as unknown as GatewayService; + + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "busy", + listeners: [{ pid: 10920, command: "unknown" }], + hints: [], + }); + + const { inspectGatewayRestart } = await import("./restart-health.js"); + const snapshot = await inspectGatewayRestart({ + service, + port: 18789, + includeUnknownListenersAsStale: false, + }); + + expect(snapshot.staleGatewayPids).toEqual([]); + }); + + it("does not apply unknown-listener fallback while runtime is running", async () => { + Object.defineProperty(process, "platform", { value: "win32", configurable: true }); + classifyPortListener.mockReturnValue("unknown"); + + const service = { + readRuntime: vi.fn(async () => ({ status: "running", pid: 10920 })), + } as unknown as GatewayService; + + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "busy", + listeners: [{ pid: 10920, command: "unknown" }], + hints: [], + }); + + const { inspectGatewayRestart } = await import("./restart-health.js"); + const snapshot = await inspectGatewayRestart({ + service, + port: 18789, + includeUnknownListenersAsStale: true, + }); + + expect(snapshot.staleGatewayPids).toEqual([]); + }); + + it("does not treat known non-gateway listeners as stale in fallback mode", async () => { + Object.defineProperty(process, "platform", { value: "win32", configurable: true }); + classifyPortListener.mockReturnValue("ssh"); + + const service = { + readRuntime: vi.fn(async () => ({ status: "stopped" })), + } as unknown as GatewayService; + + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "busy", + listeners: [{ pid: 22001, command: "nginx.exe" }], + hints: [], + }); + + const { inspectGatewayRestart } = await import("./restart-health.js"); + const snapshot = await inspectGatewayRestart({ + service, + port: 18789, + includeUnknownListenersAsStale: true, + }); + + expect(snapshot.staleGatewayPids).toEqual([]); + }); }); diff --git a/src/cli/daemon-cli/restart-health.ts b/src/cli/daemon-cli/restart-health.ts index 3eb46c542..b6d463a95 100644 --- a/src/cli/daemon-cli/restart-health.ts +++ b/src/cli/daemon-cli/restart-health.ts @@ -6,6 +6,7 @@ import { inspectPortUsage, type PortUsage, } from "../../infra/ports.js"; +import { killProcessTree } from "../../process/kill-tree.js"; import { sleep } from "../../utils.js"; export const DEFAULT_RESTART_HEALTH_TIMEOUT_MS = 60_000; @@ -32,6 +33,7 @@ export async function inspectGatewayRestart(params: { service: GatewayService; port: number; env?: NodeJS.ProcessEnv; + includeUnknownListenersAsStale?: boolean; }): Promise { const env = params.env ?? process.env; let runtime: GatewayServiceRuntime = { status: "unknown" }; @@ -60,6 +62,16 @@ export async function inspectGatewayRestart(params: { (listener) => classifyPortListener(listener, params.port) === "gateway", ) : []; + const fallbackListenerPids = + params.includeUnknownListenersAsStale && + process.platform === "win32" && + runtime.status !== "running" && + portUsage.status === "busy" + ? portUsage.listeners + .filter((listener) => classifyPortListener(listener, params.port) === "unknown") + .map((listener) => listener.pid) + .filter((pid): pid is number => Number.isFinite(pid)) + : []; const running = runtime.status === "running"; const runtimePid = runtime.pid; const ownsPort = @@ -69,8 +81,8 @@ export async function inspectGatewayRestart(params: { (portUsage.status === "busy" && portUsage.listeners.length === 0); const healthy = running && ownsPort; const staleGatewayPids = Array.from( - new Set( - gatewayListeners + new Set([ + ...gatewayListeners .filter((listener) => Number.isFinite(listener.pid)) .filter((listener) => { if (!running) { @@ -82,7 +94,10 @@ export async function inspectGatewayRestart(params: { return !listenerOwnedByRuntimePid({ listener, runtimePid }); }) .map((listener) => listener.pid as number), - ), + ...fallbackListenerPids.filter( + (pid) => runtime.pid == null || pid !== runtime.pid || !running, + ), + ]), ); return { @@ -99,6 +114,7 @@ export async function waitForGatewayHealthyRestart(params: { attempts?: number; delayMs?: number; env?: NodeJS.ProcessEnv; + includeUnknownListenersAsStale?: boolean; }): Promise { const attempts = params.attempts ?? DEFAULT_RESTART_HEALTH_ATTEMPTS; const delayMs = params.delayMs ?? DEFAULT_RESTART_HEALTH_DELAY_MS; @@ -107,6 +123,7 @@ export async function waitForGatewayHealthyRestart(params: { service: params.service, port: params.port, env: params.env, + includeUnknownListenersAsStale: params.includeUnknownListenersAsStale, }); for (let attempt = 0; attempt < attempts; attempt += 1) { @@ -121,6 +138,7 @@ export async function waitForGatewayHealthyRestart(params: { service: params.service, port: params.port, env: params.env, + includeUnknownListenersAsStale: params.includeUnknownListenersAsStale, }); } @@ -156,36 +174,14 @@ export function renderRestartDiagnostics(snapshot: GatewayRestartSnapshot): stri } export async function terminateStaleGatewayPids(pids: number[]): Promise { - const killed: number[] = []; - for (const pid of pids) { - try { - process.kill(pid, "SIGTERM"); - killed.push(pid); - } catch (err) { - const code = (err as NodeJS.ErrnoException)?.code; - if (code !== "ESRCH") { - throw err; - } - } + const targets = Array.from( + new Set(pids.filter((pid): pid is number => Number.isFinite(pid) && pid > 0)), + ); + for (const pid of targets) { + killProcessTree(pid, { graceMs: 300 }); } - - if (killed.length === 0) { - return killed; + if (targets.length > 0) { + await sleep(500); } - - await sleep(400); - - for (const pid of killed) { - try { - process.kill(pid, 0); - process.kill(pid, "SIGKILL"); - } catch (err) { - const code = (err as NodeJS.ErrnoException)?.code; - if (code !== "ESRCH") { - throw err; - } - } - } - - return killed; + return targets; }