Files
openclaw/src/cli/gateway-cli/run.ts

477 lines
15 KiB
TypeScript
Raw Normal View History

import fs from "node:fs";
import path from "node:path";
import type { Command } from "commander";
import type { GatewayAuthMode, GatewayTailscaleMode } from "../../config/config.js";
2026-01-14 01:08:15 +00:00
import {
CONFIG_PATH,
2026-01-14 01:08:15 +00:00
loadConfig,
readConfigFileSnapshot,
resolveStateDir,
2026-01-14 01:08:15 +00:00
resolveGatewayPort,
} from "../../config/config.js";
import { hasConfiguredSecretInput } from "../../config/types.secrets.js";
2026-01-14 01:08:15 +00:00
import { resolveGatewayAuth } from "../../gateway/auth.js";
import { startGatewayServer } from "../../gateway/server.js";
import type { GatewayWsLogStyle } from "../../gateway/ws-logging.js";
2026-01-14 01:08:15 +00:00
import { setGatewayWsLogStyle } from "../../gateway/ws-logging.js";
import { setVerbose } from "../../globals.js";
import { GatewayLockError } from "../../infra/gateway-lock.js";
import { formatPortDiagnostics, inspectPortUsage } from "../../infra/ports.js";
import { cleanStaleGatewayProcessesSync } from "../../infra/restart-stale-pids.js";
import { setConsoleSubsystemFilter, setConsoleTimestampPrefix } from "../../logging/console.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
2026-01-14 01:08:15 +00:00
import { defaultRuntime } from "../../runtime.js";
2026-01-20 07:42:21 +00:00
import { formatCliCommand } from "../command-format.js";
import { inheritOptionFromParent } from "../command-options.js";
fix(gateway): synthesize lifecycle robustness for restart and startup probes (#33831) * fix(gateway): correct launchctl command sequence for gateway restart (closes #20030) * fix(restart): expand HOME and escape label in launchctl plist path * fix(restart): poll port free after SIGKILL to prevent EADDRINUSE restart loop When cleanStaleGatewayProcessesSync() kills a stale gateway process, the kernel may not immediately release the TCP port. Previously the function returned after a fixed 500ms sleep (300ms SIGTERM + 200ms SIGKILL), allowing triggerOpenClawRestart() to hand off to systemd before the port was actually free. The new systemd process then raced the dying socket for port 18789, hit EADDRINUSE, and exited with status 1, causing systemd to retry indefinitely — the zombie restart loop reported in #33103. Fix: add waitForPortFreeSync() that polls lsof at 50ms intervals for up to 2 seconds after SIGKILL. cleanStaleGatewayProcessesSync() now blocks until the port is confirmed free (or the budget expires with a warning) before returning. The increased SIGTERM/SIGKILL wait budgets (600ms / 400ms) also give slow processes more time to exit cleanly. Fixes #33103 Related: #28134 * fix: add EADDRINUSE retry and TIME_WAIT port-bind checks for gateway startup * fix(ports): treat EADDRNOTAVAIL as non-retryable and fix flaky test * fix(gateway): hot-reload agents.defaults.models allowlist changes The reload plan had a rule for `agents.defaults.model` (singular) but not `agents.defaults.models` (plural — the allowlist array). Because `agents.defaults.models` does not prefix-match `agents.defaults.model.`, it fell through to the catch-all `agents` tail rule (kind=none), so allowlist edits in openclaw.json were silently ignored at runtime. Add a dedicated reload rule so changes to the models allowlist trigger a heartbeat restart, which re-reads the config and serves the updated list to clients. Fixes #33600 Co-authored-by: HCL <chenglunhu@gmail.com> Signed-off-by: HCL <chenglunhu@gmail.com> * test(restart): 100% branch coverage — audit round 2 Audit findings fixed: - remove dead guard: terminateStaleProcessesSync pids.length===0 check was unreachable (only caller cleanStaleGatewayProcessesSync already guards) - expose __testing.callSleepSyncRaw so sleepSync's real Atomics.wait path can be unit-tested directly without going through the override - fix broken sleepSync Atomics.wait test: previous test set override=null but cleanStaleGatewayProcessesSync returned before calling sleepSync — replaced with direct callSleepSyncRaw calls that actually exercise L36/L42-47 - fix pid collision: two tests used process.pid+304 (EPERM + dead-at-SIGTERM); EPERM test changed to process.pid+305 - fix misindented tests: 'deduplicates pids' and 'lsof status 1 container edge case' were outside their intended describe blocks; moved to correct scopes (findGatewayPidsOnPortSync and pollPortOnce respectively) - add missing branch tests: - status 1 + non-empty stdout with zero openclaw pids → free:true (L145) - mid-loop non-openclaw cmd in &&-chain (L67) - consecutive p-lines without c-line between them (L67) - invalid PID in p-line (p0 / pNaN) — ternary false branch (L67) - unknown lsof output line (else-if false branch L69) Coverage: 100% stmts / 100% branch / 100% funcs / 100% lines (36 tests) * test(restart): fix stale-pid test typing for tsgo * fix(gateway): address lifecycle review findings * test(update): make restart-helper path assertions windows-safe --------- Signed-off-by: HCL <chenglunhu@gmail.com> Co-authored-by: Glucksberg <markuscontasul@gmail.com> Co-authored-by: Efe Büken <efe@arven.digital> Co-authored-by: Riccardo Marino <rmarino@apple.com> Co-authored-by: HCL <chenglunhu@gmail.com>
2026-03-03 21:31:12 -06:00
import { forceFreePortAndWait, waitForPortBindable } from "../ports.js";
2026-01-14 01:08:15 +00:00
import { ensureDevGatewayConfig } from "./dev.js";
import { runGatewayLoop } from "./run-loop.js";
import {
describeUnknownError,
extractGatewayMiskeys,
maybeExplainGatewayServiceStop,
parsePort,
toOptionString,
} from "./shared.js";
type GatewayRunOpts = {
port?: unknown;
bind?: unknown;
token?: unknown;
auth?: unknown;
password?: unknown;
tailscale?: unknown;
tailscaleResetOnExit?: boolean;
allowUnconfigured?: boolean;
force?: boolean;
verbose?: boolean;
claudeCliLogs?: boolean;
wsLog?: unknown;
compact?: boolean;
rawStream?: boolean;
rawStreamPath?: unknown;
dev?: boolean;
reset?: boolean;
};
const gatewayLog = createSubsystemLogger("gateway");
const GATEWAY_RUN_VALUE_KEYS = [
"port",
"bind",
"token",
"auth",
"password",
"tailscale",
"wsLog",
"rawStreamPath",
] as const;
const GATEWAY_RUN_BOOLEAN_KEYS = [
"tailscaleResetOnExit",
"allowUnconfigured",
"dev",
"reset",
"force",
"verbose",
"claudeCliLogs",
"compact",
"rawStream",
] as const;
const GATEWAY_AUTH_MODES: readonly GatewayAuthMode[] = [
"none",
"token",
"password",
"trusted-proxy",
];
const GATEWAY_TAILSCALE_MODES: readonly GatewayTailscaleMode[] = ["off", "serve", "funnel"];
function parseEnumOption<T extends string>(
raw: string | undefined,
allowed: readonly T[],
): T | null {
if (!raw) {
return null;
}
return (allowed as readonly string[]).includes(raw) ? (raw as T) : null;
}
function formatModeChoices<T extends string>(modes: readonly T[]): string {
return modes.map((mode) => `"${mode}"`).join("|");
}
function formatModeErrorList<T extends string>(modes: readonly T[]): string {
const quoted = modes.map((mode) => `"${mode}"`);
if (quoted.length === 0) {
return "";
}
if (quoted.length === 1) {
return quoted[0];
}
if (quoted.length === 2) {
return `${quoted[0]} or ${quoted[1]}`;
}
return `${quoted.slice(0, -1).join(", ")}, or ${quoted[quoted.length - 1]}`;
}
function resolveGatewayRunOptions(opts: GatewayRunOpts, command?: Command): GatewayRunOpts {
const resolved: GatewayRunOpts = { ...opts };
for (const key of GATEWAY_RUN_VALUE_KEYS) {
const inherited = inheritOptionFromParent(command, key);
if (key === "wsLog") {
// wsLog has a child default ("auto"), so prefer inherited parent CLI value when present.
resolved[key] = inherited ?? resolved[key];
continue;
}
resolved[key] = resolved[key] ?? inherited;
}
for (const key of GATEWAY_RUN_BOOLEAN_KEYS) {
const inherited = inheritOptionFromParent<boolean>(command, key);
resolved[key] = Boolean(resolved[key] || inherited);
}
return resolved;
}
2026-01-15 06:18:34 +00:00
async function runGatewayCommand(opts: GatewayRunOpts) {
2026-01-30 03:15:10 +01:00
const isDevProfile = process.env.OPENCLAW_PROFILE?.trim().toLowerCase() === "dev";
2026-01-14 01:08:15 +00:00
const devMode = Boolean(opts.dev) || isDevProfile;
if (opts.reset && !devMode) {
defaultRuntime.error("Use --reset with --dev.");
defaultRuntime.exit(1);
return;
}
setConsoleTimestampPrefix(true);
2026-01-14 01:08:15 +00:00
setVerbose(Boolean(opts.verbose));
if (opts.claudeCliLogs) {
setConsoleSubsystemFilter(["agent/claude-cli"]);
2026-01-30 03:15:10 +01:00
process.env.OPENCLAW_CLAUDE_CLI_LOG_OUTPUT = "1";
2026-01-14 01:08:15 +00:00
}
const wsLogRaw = (opts.compact ? "compact" : opts.wsLog) as string | undefined;
2026-01-14 01:08:15 +00:00
const wsLogStyle: GatewayWsLogStyle =
wsLogRaw === "compact" ? "compact" : wsLogRaw === "full" ? "full" : "auto";
if (
wsLogRaw !== undefined &&
wsLogRaw !== "auto" &&
wsLogRaw !== "compact" &&
wsLogRaw !== "full"
) {
defaultRuntime.error('Invalid --ws-log (use "auto", "full", "compact")');
defaultRuntime.exit(1);
}
setGatewayWsLogStyle(wsLogStyle);
if (opts.rawStream) {
2026-01-30 03:15:10 +01:00
process.env.OPENCLAW_RAW_STREAM = "1";
2026-01-14 01:08:15 +00:00
}
const rawStreamPath = toOptionString(opts.rawStreamPath);
if (rawStreamPath) {
2026-01-30 03:15:10 +01:00
process.env.OPENCLAW_RAW_STREAM_PATH = rawStreamPath;
2026-01-14 01:08:15 +00:00
}
if (devMode) {
await ensureDevGatewayConfig({ reset: Boolean(opts.reset) });
}
const cfg = loadConfig();
const portOverride = parsePort(opts.port);
if (opts.port !== undefined && portOverride === null) {
defaultRuntime.error("Invalid port");
defaultRuntime.exit(1);
}
const port = portOverride ?? resolveGatewayPort(cfg);
if (!Number.isFinite(port) || port <= 0) {
defaultRuntime.error("Invalid port");
defaultRuntime.exit(1);
}
fix(gateway): synthesize lifecycle robustness for restart and startup probes (#33831) * fix(gateway): correct launchctl command sequence for gateway restart (closes #20030) * fix(restart): expand HOME and escape label in launchctl plist path * fix(restart): poll port free after SIGKILL to prevent EADDRINUSE restart loop When cleanStaleGatewayProcessesSync() kills a stale gateway process, the kernel may not immediately release the TCP port. Previously the function returned after a fixed 500ms sleep (300ms SIGTERM + 200ms SIGKILL), allowing triggerOpenClawRestart() to hand off to systemd before the port was actually free. The new systemd process then raced the dying socket for port 18789, hit EADDRINUSE, and exited with status 1, causing systemd to retry indefinitely — the zombie restart loop reported in #33103. Fix: add waitForPortFreeSync() that polls lsof at 50ms intervals for up to 2 seconds after SIGKILL. cleanStaleGatewayProcessesSync() now blocks until the port is confirmed free (or the budget expires with a warning) before returning. The increased SIGTERM/SIGKILL wait budgets (600ms / 400ms) also give slow processes more time to exit cleanly. Fixes #33103 Related: #28134 * fix: add EADDRINUSE retry and TIME_WAIT port-bind checks for gateway startup * fix(ports): treat EADDRNOTAVAIL as non-retryable and fix flaky test * fix(gateway): hot-reload agents.defaults.models allowlist changes The reload plan had a rule for `agents.defaults.model` (singular) but not `agents.defaults.models` (plural — the allowlist array). Because `agents.defaults.models` does not prefix-match `agents.defaults.model.`, it fell through to the catch-all `agents` tail rule (kind=none), so allowlist edits in openclaw.json were silently ignored at runtime. Add a dedicated reload rule so changes to the models allowlist trigger a heartbeat restart, which re-reads the config and serves the updated list to clients. Fixes #33600 Co-authored-by: HCL <chenglunhu@gmail.com> Signed-off-by: HCL <chenglunhu@gmail.com> * test(restart): 100% branch coverage — audit round 2 Audit findings fixed: - remove dead guard: terminateStaleProcessesSync pids.length===0 check was unreachable (only caller cleanStaleGatewayProcessesSync already guards) - expose __testing.callSleepSyncRaw so sleepSync's real Atomics.wait path can be unit-tested directly without going through the override - fix broken sleepSync Atomics.wait test: previous test set override=null but cleanStaleGatewayProcessesSync returned before calling sleepSync — replaced with direct callSleepSyncRaw calls that actually exercise L36/L42-47 - fix pid collision: two tests used process.pid+304 (EPERM + dead-at-SIGTERM); EPERM test changed to process.pid+305 - fix misindented tests: 'deduplicates pids' and 'lsof status 1 container edge case' were outside their intended describe blocks; moved to correct scopes (findGatewayPidsOnPortSync and pollPortOnce respectively) - add missing branch tests: - status 1 + non-empty stdout with zero openclaw pids → free:true (L145) - mid-loop non-openclaw cmd in &&-chain (L67) - consecutive p-lines without c-line between them (L67) - invalid PID in p-line (p0 / pNaN) — ternary false branch (L67) - unknown lsof output line (else-if false branch L69) Coverage: 100% stmts / 100% branch / 100% funcs / 100% lines (36 tests) * test(restart): fix stale-pid test typing for tsgo * fix(gateway): address lifecycle review findings * test(update): make restart-helper path assertions windows-safe --------- Signed-off-by: HCL <chenglunhu@gmail.com> Co-authored-by: Glucksberg <markuscontasul@gmail.com> Co-authored-by: Efe Büken <efe@arven.digital> Co-authored-by: Riccardo Marino <rmarino@apple.com> Co-authored-by: HCL <chenglunhu@gmail.com>
2026-03-03 21:31:12 -06:00
const bindRaw = toOptionString(opts.bind) ?? cfg.gateway?.bind ?? "loopback";
const bind =
bindRaw === "loopback" ||
bindRaw === "lan" ||
bindRaw === "auto" ||
bindRaw === "custom" ||
bindRaw === "tailnet"
? bindRaw
: null;
if (!bind) {
defaultRuntime.error('Invalid --bind (use "loopback", "lan", "tailnet", "auto", or "custom")');
defaultRuntime.exit(1);
return;
}
if (process.env.OPENCLAW_SERVICE_MARKER?.trim()) {
const stale = cleanStaleGatewayProcessesSync(port);
if (stale.length > 0) {
gatewayLog.info(
`service-mode: cleared ${stale.length} stale gateway pid(s) before bind on port ${port}`,
);
}
}
2026-01-14 01:08:15 +00:00
if (opts.force) {
try {
const { killed, waitedMs, escalatedToSigkill } = await forceFreePortAndWait(port, {
timeoutMs: 2000,
intervalMs: 100,
sigtermTimeoutMs: 700,
});
2026-01-14 01:08:15 +00:00
if (killed.length === 0) {
gatewayLog.info(`force: no listeners on port ${port}`);
} else {
for (const proc of killed) {
gatewayLog.info(
`force: killed pid ${proc.pid}${proc.command ? ` (${proc.command})` : ""} on port ${port}`,
);
}
if (escalatedToSigkill) {
gatewayLog.info(`force: escalated to SIGKILL while freeing port ${port}`);
2026-01-14 01:08:15 +00:00
}
if (waitedMs > 0) {
gatewayLog.info(`force: waited ${waitedMs}ms for port ${port} to free`);
2026-01-14 01:08:15 +00:00
}
}
fix(gateway): synthesize lifecycle robustness for restart and startup probes (#33831) * fix(gateway): correct launchctl command sequence for gateway restart (closes #20030) * fix(restart): expand HOME and escape label in launchctl plist path * fix(restart): poll port free after SIGKILL to prevent EADDRINUSE restart loop When cleanStaleGatewayProcessesSync() kills a stale gateway process, the kernel may not immediately release the TCP port. Previously the function returned after a fixed 500ms sleep (300ms SIGTERM + 200ms SIGKILL), allowing triggerOpenClawRestart() to hand off to systemd before the port was actually free. The new systemd process then raced the dying socket for port 18789, hit EADDRINUSE, and exited with status 1, causing systemd to retry indefinitely — the zombie restart loop reported in #33103. Fix: add waitForPortFreeSync() that polls lsof at 50ms intervals for up to 2 seconds after SIGKILL. cleanStaleGatewayProcessesSync() now blocks until the port is confirmed free (or the budget expires with a warning) before returning. The increased SIGTERM/SIGKILL wait budgets (600ms / 400ms) also give slow processes more time to exit cleanly. Fixes #33103 Related: #28134 * fix: add EADDRINUSE retry and TIME_WAIT port-bind checks for gateway startup * fix(ports): treat EADDRNOTAVAIL as non-retryable and fix flaky test * fix(gateway): hot-reload agents.defaults.models allowlist changes The reload plan had a rule for `agents.defaults.model` (singular) but not `agents.defaults.models` (plural — the allowlist array). Because `agents.defaults.models` does not prefix-match `agents.defaults.model.`, it fell through to the catch-all `agents` tail rule (kind=none), so allowlist edits in openclaw.json were silently ignored at runtime. Add a dedicated reload rule so changes to the models allowlist trigger a heartbeat restart, which re-reads the config and serves the updated list to clients. Fixes #33600 Co-authored-by: HCL <chenglunhu@gmail.com> Signed-off-by: HCL <chenglunhu@gmail.com> * test(restart): 100% branch coverage — audit round 2 Audit findings fixed: - remove dead guard: terminateStaleProcessesSync pids.length===0 check was unreachable (only caller cleanStaleGatewayProcessesSync already guards) - expose __testing.callSleepSyncRaw so sleepSync's real Atomics.wait path can be unit-tested directly without going through the override - fix broken sleepSync Atomics.wait test: previous test set override=null but cleanStaleGatewayProcessesSync returned before calling sleepSync — replaced with direct callSleepSyncRaw calls that actually exercise L36/L42-47 - fix pid collision: two tests used process.pid+304 (EPERM + dead-at-SIGTERM); EPERM test changed to process.pid+305 - fix misindented tests: 'deduplicates pids' and 'lsof status 1 container edge case' were outside their intended describe blocks; moved to correct scopes (findGatewayPidsOnPortSync and pollPortOnce respectively) - add missing branch tests: - status 1 + non-empty stdout with zero openclaw pids → free:true (L145) - mid-loop non-openclaw cmd in &&-chain (L67) - consecutive p-lines without c-line between them (L67) - invalid PID in p-line (p0 / pNaN) — ternary false branch (L67) - unknown lsof output line (else-if false branch L69) Coverage: 100% stmts / 100% branch / 100% funcs / 100% lines (36 tests) * test(restart): fix stale-pid test typing for tsgo * fix(gateway): address lifecycle review findings * test(update): make restart-helper path assertions windows-safe --------- Signed-off-by: HCL <chenglunhu@gmail.com> Co-authored-by: Glucksberg <markuscontasul@gmail.com> Co-authored-by: Efe Büken <efe@arven.digital> Co-authored-by: Riccardo Marino <rmarino@apple.com> Co-authored-by: HCL <chenglunhu@gmail.com>
2026-03-03 21:31:12 -06:00
// After killing, verify the port is actually bindable (handles TIME_WAIT).
const bindProbeHost =
bind === "loopback"
? "127.0.0.1"
: bind === "lan"
? "0.0.0.0"
: bind === "custom"
? toOptionString(cfg.gateway?.customBindHost)
: undefined;
const bindWaitMs = await waitForPortBindable(port, {
timeoutMs: 3000,
intervalMs: 150,
host: bindProbeHost,
});
if (bindWaitMs > 0) {
gatewayLog.info(`force: waited ${bindWaitMs}ms for port ${port} to become bindable`);
}
2026-01-14 01:08:15 +00:00
} catch (err) {
defaultRuntime.error(`Force: ${String(err)}`);
defaultRuntime.exit(1);
return;
}
}
if (opts.token) {
const token = toOptionString(opts.token);
2026-01-30 03:15:10 +01:00
if (token) {
process.env.OPENCLAW_GATEWAY_TOKEN = token;
}
2026-01-14 01:08:15 +00:00
}
const authModeRaw = toOptionString(opts.auth);
const authMode = parseEnumOption(authModeRaw, GATEWAY_AUTH_MODES);
2026-01-14 01:08:15 +00:00
if (authModeRaw && !authMode) {
defaultRuntime.error(`Invalid --auth (use ${formatModeErrorList(GATEWAY_AUTH_MODES)})`);
2026-01-14 01:08:15 +00:00
defaultRuntime.exit(1);
return;
}
const tailscaleRaw = toOptionString(opts.tailscale);
const tailscaleMode = parseEnumOption(tailscaleRaw, GATEWAY_TAILSCALE_MODES);
2026-01-14 01:08:15 +00:00
if (tailscaleRaw && !tailscaleMode) {
defaultRuntime.error(
`Invalid --tailscale (use ${formatModeErrorList(GATEWAY_TAILSCALE_MODES)})`,
);
2026-01-14 01:08:15 +00:00
defaultRuntime.exit(1);
return;
}
const passwordRaw = toOptionString(opts.password);
const tokenRaw = toOptionString(opts.token);
2026-01-28 00:15:54 +00:00
const snapshot = await readConfigFileSnapshot().catch(() => null);
const configExists = snapshot?.exists ?? fs.existsSync(CONFIG_PATH);
const configAuditPath = path.join(resolveStateDir(process.env), "logs", "config-audit.jsonl");
2026-01-14 01:08:15 +00:00
const mode = cfg.gateway?.mode;
if (!opts.allowUnconfigured && mode !== "local") {
if (!configExists) {
defaultRuntime.error(
2026-01-30 03:15:10 +01:00
`Missing config. Run \`${formatCliCommand("openclaw setup")}\` or set gateway.mode=local (or pass --allow-unconfigured).`,
2026-01-14 01:08:15 +00:00
);
} else {
defaultRuntime.error(
`Gateway start blocked: set gateway.mode=local (current: ${mode ?? "unset"}) or pass --allow-unconfigured.`,
);
defaultRuntime.error(`Config write audit: ${configAuditPath}`);
2026-01-14 01:08:15 +00:00
}
defaultRuntime.exit(1);
return;
}
const miskeys = extractGatewayMiskeys(snapshot?.parsed);
const authOverride =
authMode || passwordRaw || tokenRaw || authModeRaw
? {
...(authMode ? { mode: authMode } : {}),
...(tokenRaw ? { token: tokenRaw } : {}),
...(passwordRaw ? { password: passwordRaw } : {}),
}
: undefined;
2026-01-14 01:08:15 +00:00
const resolvedAuth = resolveGatewayAuth({
authConfig: cfg.gateway?.auth,
authOverride,
2026-01-14 01:08:15 +00:00
env: process.env,
tailscaleMode: tailscaleMode ?? cfg.gateway?.tailscale?.mode ?? "off",
});
const resolvedAuthMode = resolvedAuth.mode;
const tokenValue = resolvedAuth.token;
const passwordValue = resolvedAuth.password;
2026-01-26 12:56:33 +00:00
const hasToken = typeof tokenValue === "string" && tokenValue.trim().length > 0;
const hasPassword = typeof passwordValue === "string" && passwordValue.trim().length > 0;
const tokenConfigured =
hasToken ||
hasConfiguredSecretInput(
authOverride?.token ?? cfg.gateway?.auth?.token,
cfg.secrets?.defaults,
);
const passwordConfigured =
hasPassword ||
hasConfiguredSecretInput(
authOverride?.password ?? cfg.gateway?.auth?.password,
cfg.secrets?.defaults,
);
2026-01-26 12:56:33 +00:00
const hasSharedSecret =
(resolvedAuthMode === "token" && tokenConfigured) ||
(resolvedAuthMode === "password" && passwordConfigured);
const canBootstrapToken = resolvedAuthMode === "token" && !tokenConfigured;
2026-01-14 01:08:15 +00:00
const authHints: string[] = [];
if (miskeys.hasGatewayToken) {
authHints.push('Found "gateway.token" in config. Use "gateway.auth.token" instead.');
2026-01-14 01:08:15 +00:00
}
if (miskeys.hasRemoteToken) {
authHints.push(
'"gateway.remote.token" is for remote CLI calls; it does not enable local gateway auth.',
);
}
if (resolvedAuthMode === "password" && !passwordConfigured) {
2026-01-14 01:08:15 +00:00
defaultRuntime.error(
[
"Gateway auth is set to password, but no password is configured.",
2026-01-30 03:15:10 +01:00
"Set gateway.auth.password (or OPENCLAW_GATEWAY_PASSWORD), or pass --password.",
2026-01-14 01:08:15 +00:00
...authHints,
]
.filter(Boolean)
.join("\n"),
);
defaultRuntime.exit(1);
return;
}
if (resolvedAuthMode === "none") {
gatewayLog.warn(
"Gateway auth mode=none explicitly configured; all gateway connections are unauthenticated.",
);
}
if (
bind !== "loopback" &&
!hasSharedSecret &&
!canBootstrapToken &&
resolvedAuthMode !== "trusted-proxy"
) {
2026-01-14 01:08:15 +00:00
defaultRuntime.error(
[
`Refusing to bind gateway to ${bind} without auth.`,
2026-01-30 03:15:10 +01:00
"Set gateway.auth.token/password (or OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD) or pass --token/--password.",
2026-01-14 01:08:15 +00:00
...authHints,
]
.filter(Boolean)
.join("\n"),
);
defaultRuntime.exit(1);
return;
}
const tailscaleOverride =
tailscaleMode || opts.tailscaleResetOnExit
? {
...(tailscaleMode ? { mode: tailscaleMode } : {}),
...(opts.tailscaleResetOnExit ? { resetOnExit: true } : {}),
}
: undefined;
2026-01-14 01:08:15 +00:00
try {
await runGatewayLoop({
runtime: defaultRuntime,
lockPort: port,
2026-01-14 01:08:15 +00:00
start: async () =>
await startGatewayServer(port, {
bind,
auth: authOverride,
tailscale: tailscaleOverride,
2026-01-14 01:08:15 +00:00
}),
});
} catch (err) {
if (
err instanceof GatewayLockError ||
(err && typeof err === "object" && (err as { name?: string }).name === "GatewayLockError")
2026-01-14 01:08:15 +00:00
) {
const errMessage = describeUnknownError(err);
defaultRuntime.error(
2026-01-30 03:15:10 +01:00
`Gateway failed to start: ${errMessage}\nIf the gateway is supervised, stop it with: ${formatCliCommand("openclaw gateway stop")}`,
2026-01-14 01:08:15 +00:00
);
try {
const diagnostics = await inspectPortUsage(port);
if (diagnostics.status === "busy") {
for (const line of formatPortDiagnostics(diagnostics)) {
defaultRuntime.error(line);
}
}
} catch {
// ignore diagnostics failures
}
await maybeExplainGatewayServiceStop();
defaultRuntime.exit(1);
return;
}
defaultRuntime.error(`Gateway failed to start: ${String(err)}`);
defaultRuntime.exit(1);
}
}
2026-01-15 06:18:34 +00:00
export function addGatewayRunCommand(cmd: Command): Command {
2026-01-14 01:08:15 +00:00
return cmd
.option("--port <port>", "Port for the gateway WebSocket")
.option(
"--bind <mode>",
2026-01-21 20:35:39 +00:00
'Bind mode ("loopback"|"lan"|"tailnet"|"auto"|"custom"). Defaults to config gateway.bind (or loopback).',
2026-01-14 01:08:15 +00:00
)
.option(
"--token <token>",
2026-01-30 03:15:10 +01:00
"Shared token required in connect.params.auth.token (default: OPENCLAW_GATEWAY_TOKEN env if set)",
2026-01-14 01:08:15 +00:00
)
.option("--auth <mode>", `Gateway auth mode (${formatModeChoices(GATEWAY_AUTH_MODES)})`)
2026-01-14 01:08:15 +00:00
.option("--password <password>", "Password for auth mode=password")
.option(
"--tailscale <mode>",
`Tailscale exposure mode (${formatModeChoices(GATEWAY_TAILSCALE_MODES)})`,
)
2026-01-14 01:08:15 +00:00
.option(
"--tailscale-reset-on-exit",
"Reset Tailscale serve/funnel configuration on shutdown",
false,
)
.option(
"--allow-unconfigured",
"Allow gateway start without gateway.mode=local in config",
false,
)
.option("--dev", "Create a dev config + workspace if missing (no BOOTSTRAP.md)", false)
2026-01-14 01:08:15 +00:00
.option(
"--reset",
"Reset dev config + credentials + sessions + workspace (requires --dev)",
false,
)
.option("--force", "Kill any existing listener on the target port before starting", false)
2026-01-14 01:08:15 +00:00
.option("--verbose", "Verbose logging to stdout/stderr", false)
.option(
"--claude-cli-logs",
"Only show claude-cli logs in the console (includes stdout/stderr)",
false,
)
.option("--ws-log <style>", 'WebSocket log style ("auto"|"full"|"compact")', "auto")
2026-01-14 01:08:15 +00:00
.option("--compact", 'Alias for "--ws-log compact"', false)
.option("--raw-stream", "Log raw model stream events to jsonl", false)
.option("--raw-stream-path <path>", "Raw stream jsonl path")
.action(async (opts, command) => {
await runGatewayCommand(resolveGatewayRunOptions(opts, command));
2026-01-14 01:08:15 +00:00
});
}