diff --git a/src/config/types.gateway.ts b/src/config/types.gateway.ts index 1f2c30cba..141f5a0b3 100644 --- a/src/config/types.gateway.ts +++ b/src/config/types.gateway.ts @@ -312,4 +312,10 @@ export type GatewayConfig = { trustedProxies?: string[]; /** Tool access restrictions for HTTP /tools/invoke endpoint. */ tools?: GatewayToolsConfig; + /** + * Channel health monitor interval in minutes. + * Periodically checks channel health and restarts unhealthy channels. + * Set to 0 to disable. Default: 5. + */ + channelHealthCheckMinutes?: number; }; diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index 0e7054646..dea9551c3 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -48,6 +48,7 @@ import { createEmptyPluginRegistry } from "../plugins/registry.js"; import { getTotalQueueSize } from "../process/command-queue.js"; import { runOnboardingWizard } from "../wizard/onboarding.js"; import { createAuthRateLimiter, type AuthRateLimiter } from "./auth-rate-limit.js"; +import { startChannelHealthMonitor } from "./channel-health-monitor.js"; import { startGatewayConfigReloader } from "./config-reload.js"; import { ExecApprovalManager } from "./exec-approval-manager.js"; import { NodeRegistry } from "./node-registry.js"; @@ -502,6 +503,15 @@ export async function startGatewayServer( } : startHeartbeatRunner({ cfg: cfgAtStart }); + const healthCheckMinutes = cfgAtStart.gateway?.channelHealthCheckMinutes; + const healthCheckDisabled = healthCheckMinutes === 0; + const channelHealthMonitor = healthCheckDisabled + ? null + : startChannelHealthMonitor({ + channelManager, + checkIntervalMs: (healthCheckMinutes ?? 5) * 60_000, + }); + if (!minimalTestGateway) { void cron.start().catch((err) => logCron.error(`failed to start: ${String(err)}`)); } @@ -720,6 +730,7 @@ export async function startGatewayServer( } skillsChangeUnsub(); authRateLimiter?.dispose(); + channelHealthMonitor?.stop(); await close(opts); }, };