Files
openclaw/src/auto-reply/reply/agent-runner-execution.ts

587 lines
23 KiB
TypeScript
Raw Normal View History

import crypto from "node:crypto";
import fs from "node:fs";
import { runCliAgent } from "../../agents/cli-runner.js";
import { getCliSessionId } from "../../agents/cli-session.js";
import { runWithModelFallback } from "../../agents/model-fallback.js";
import { isCliProvider } from "../../agents/model-selection.js";
import {
isCompactionFailureError,
isContextOverflowError,
isLikelyContextOverflowError,
isTransientHttpError,
sanitizeUserFacingText,
} from "../../agents/pi-embedded-helpers.js";
import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
import {
2026-01-24 15:35:05 +13:00
resolveGroupSessionKey,
resolveSessionTranscriptPath,
type SessionEntry,
updateSessionStore,
} from "../../config/sessions.js";
import { logVerbose } from "../../globals.js";
2026-01-19 00:34:16 +00:00
import { emitAgentEvent, registerAgentRunContext } from "../../infra/agent-events.js";
import { defaultRuntime } from "../../runtime.js";
import {
isMarkdownCapableMessageChannel,
resolveMessageChannel,
} from "../../utils/message-channel.js";
import { stripHeartbeatToken } from "../heartbeat.js";
import type { TemplateContext } from "../templating.js";
import type { VerboseLevel } from "../thinking.js";
import { isSilentReplyPrefixText, isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js";
import type { GetReplyOptions, ReplyPayload } from "../types.js";
import {
buildEmbeddedRunBaseParams,
buildEmbeddedRunContexts,
resolveModelFallbackOptions,
} from "./agent-runner-utils.js";
import { type BlockReplyPipeline } from "./block-reply-pipeline.js";
import type { FollowupRun } from "./queue.js";
import { createBlockReplyDeliveryHandler } from "./reply-delivery.js";
import type { TypingSignaler } from "./typing-mode.js";
export type RuntimeFallbackAttempt = {
provider: string;
model: string;
error: string;
reason?: string;
status?: number;
code?: string;
};
export type AgentRunLoopResult =
| {
kind: "success";
runId: string;
runResult: Awaited<ReturnType<typeof runEmbeddedPiAgent>>;
fallbackProvider?: string;
fallbackModel?: string;
fallbackAttempts: RuntimeFallbackAttempt[];
didLogHeartbeatStrip: boolean;
autoCompactionCompleted: boolean;
/** Payload keys sent directly (not via pipeline) during tool flush. */
directlySentBlockKeys?: Set<string>;
}
| { kind: "final"; payload: ReplyPayload };
export async function runAgentTurnWithFallback(params: {
commandBody: string;
followupRun: FollowupRun;
sessionCtx: TemplateContext;
opts?: GetReplyOptions;
typingSignals: TypingSignaler;
blockReplyPipeline: BlockReplyPipeline | null;
blockStreamingEnabled: boolean;
blockReplyChunking?: {
minChars: number;
maxChars: number;
breakPreference: "paragraph" | "newline" | "sentence";
fix: flush block streaming on paragraph boundaries for chunkMode=newline (#7014) * feat: Implement paragraph boundary flushing in block streaming - Added `flushOnParagraph` option to `BlockReplyChunking` for immediate flushing on paragraph breaks. - Updated `EmbeddedBlockChunker` to handle paragraph boundaries during chunking. - Enhanced `createBlockReplyCoalescer` to support flushing on enqueue. - Added tests to verify behavior of flushing with and without `flushOnEnqueue` set. - Updated relevant types and interfaces to include `flushOnParagraph` and `flushOnEnqueue` options. * fix: Improve streaming behavior and enhance block chunking logic - Resolved issue with stuck typing indicator after streamed BlueBubbles replies. - Refactored `EmbeddedBlockChunker` to streamline fence-split handling and ensure maxChars fallback for newline chunking. - Added tests to validate new chunking behavior, including handling of paragraph breaks and fence scenarios. - Updated changelog to reflect these changes. * test: Add test for clamping long paragraphs in EmbeddedBlockChunker - Introduced a new test case to verify that long paragraphs are correctly clamped to maxChars when flushOnParagraph is enabled. - Updated logic in EmbeddedBlockChunker to handle cases where the next paragraph break exceeds maxChars, ensuring proper chunking behavior. * refactor: streamline logging and improve error handling in message processing - Removed verbose logging statements from the `processMessage` function to reduce clutter. - Enhanced error handling by using `runtime.error` for typing restart failures. - Updated the `applySystemPromptOverrideToSession` function to accept a string directly instead of a function, simplifying the prompt application process. - Adjusted the `runEmbeddedAttempt` function to directly use the system prompt override without invoking it as a function.
2026-02-02 01:22:41 -08:00
flushOnParagraph?: boolean;
};
resolvedBlockStreamingBreak: "text_end" | "message_end";
applyReplyToMode: (payload: ReplyPayload) => ReplyPayload;
shouldEmitToolResult: () => boolean;
2026-01-17 05:33:27 +00:00
shouldEmitToolOutput: () => boolean;
pendingToolTasks: Set<Promise<void>>;
resetSessionAfterCompactionFailure: (reason: string) => Promise<boolean>;
resetSessionAfterRoleOrderingConflict: (reason: string) => Promise<boolean>;
isHeartbeat: boolean;
sessionKey?: string;
getActiveSessionEntry: () => SessionEntry | undefined;
activeSessionStore?: Record<string, SessionEntry>;
storePath?: string;
resolvedVerboseLevel: VerboseLevel;
}): Promise<AgentRunLoopResult> {
const TRANSIENT_HTTP_RETRY_DELAY_MS = 2_500;
let didLogHeartbeatStrip = false;
let autoCompactionCompleted = false;
// Track payloads sent directly (not via pipeline) during tool flush to avoid duplicates.
const directlySentBlockKeys = new Set<string>();
2026-01-23 22:51:37 +00:00
const runId = params.opts?.runId ?? crypto.randomUUID();
let didNotifyAgentRunStart = false;
const notifyAgentRunStart = () => {
if (didNotifyAgentRunStart) {
return;
}
didNotifyAgentRunStart = true;
params.opts?.onAgentRunStart?.(runId);
};
if (params.sessionKey) {
registerAgentRunContext(runId, {
sessionKey: params.sessionKey,
verboseLevel: params.resolvedVerboseLevel,
isHeartbeat: params.isHeartbeat,
});
}
let runResult: Awaited<ReturnType<typeof runEmbeddedPiAgent>>;
let fallbackProvider = params.followupRun.run.provider;
let fallbackModel = params.followupRun.run.model;
let fallbackAttempts: RuntimeFallbackAttempt[] = [];
let didResetAfterCompactionFailure = false;
let didRetryTransientHttpError = false;
while (true) {
try {
2026-01-19 00:34:16 +00:00
const normalizeStreamingText = (payload: ReplyPayload): { text?: string; skip: boolean } => {
let text = payload.text;
if (!params.isHeartbeat && text?.includes("HEARTBEAT_OK")) {
const stripped = stripHeartbeatToken(text, {
mode: "message",
});
if (stripped.didStrip && !didLogHeartbeatStrip) {
didLogHeartbeatStrip = true;
logVerbose("Stripped stray HEARTBEAT_OK token from reply");
}
if (stripped.shouldSkip && (payload.mediaUrls?.length ?? 0) === 0) {
return { skip: true };
}
text = stripped.text;
}
if (isSilentReplyText(text, SILENT_REPLY_TOKEN)) {
return { skip: true };
}
if (!text) {
// Allow media-only payloads (e.g. tool result screenshots) through.
if ((payload.mediaUrls?.length ?? 0) > 0) {
return { text: undefined, skip: false };
}
return { skip: true };
}
const sanitized = sanitizeUserFacingText(text, {
errorContext: Boolean(payload.isError),
});
if (!sanitized.trim()) {
return { skip: true };
}
return { text: sanitized, skip: false };
};
2026-01-19 00:34:16 +00:00
const handlePartialForTyping = async (payload: ReplyPayload): Promise<string | undefined> => {
if (isSilentReplyPrefixText(payload.text, SILENT_REPLY_TOKEN)) {
return undefined;
}
const { text, skip } = normalizeStreamingText(payload);
if (skip || !text) {
return undefined;
}
await params.typingSignals.signalTextDelta(text);
return text;
};
const blockReplyPipeline = params.blockReplyPipeline;
const onToolResult = params.opts?.onToolResult;
const fallbackResult = await runWithModelFallback({
...resolveModelFallbackOptions(params.followupRun.run),
run: (provider, model) => {
// Notify that model selection is complete (including after fallback).
// This allows responsePrefix template interpolation with the actual model.
params.opts?.onModelSelected?.({
provider,
model,
thinkLevel: params.followupRun.run.thinkLevel,
});
if (isCliProvider(provider, params.followupRun.run.config)) {
const startedAt = Date.now();
notifyAgentRunStart();
emitAgentEvent({
runId,
stream: "lifecycle",
data: {
phase: "start",
startedAt,
},
});
2026-01-19 00:34:16 +00:00
const cliSessionId = getCliSessionId(params.getActiveSessionEntry(), provider);
2026-02-02 02:06:14 -08:00
return (async () => {
let lifecycleTerminalEmitted = false;
try {
const result = await runCliAgent({
sessionId: params.followupRun.run.sessionId,
sessionKey: params.sessionKey,
agentId: params.followupRun.run.agentId,
2026-02-02 02:06:14 -08:00
sessionFile: params.followupRun.run.sessionFile,
workspaceDir: params.followupRun.run.workspaceDir,
config: params.followupRun.run.config,
prompt: params.commandBody,
provider,
model,
thinkLevel: params.followupRun.run.thinkLevel,
timeoutMs: params.followupRun.run.timeoutMs,
runId,
extraSystemPrompt: params.followupRun.run.extraSystemPrompt,
ownerNumbers: params.followupRun.run.ownerNumbers,
cliSessionId,
images: params.opts?.images,
});
// CLI backends don't emit streaming assistant events, so we need to
// emit one with the final text so server-chat can populate its buffer
// and send the response to TUI/WebSocket clients.
const cliText = result.payloads?.[0]?.text?.trim();
if (cliText) {
emitAgentEvent({
runId,
stream: "assistant",
data: { text: cliText },
});
}
2026-02-02 02:06:14 -08:00
emitAgentEvent({
runId,
stream: "lifecycle",
data: {
phase: "end",
startedAt,
endedAt: Date.now(),
},
});
2026-02-02 02:06:14 -08:00
lifecycleTerminalEmitted = true;
return result;
2026-02-02 02:06:14 -08:00
} catch (err) {
emitAgentEvent({
runId,
stream: "lifecycle",
data: {
phase: "error",
startedAt,
endedAt: Date.now(),
2026-02-02 02:06:14 -08:00
error: String(err),
},
});
2026-02-02 02:06:14 -08:00
lifecycleTerminalEmitted = true;
throw err;
2026-02-02 02:06:14 -08:00
} finally {
// Defensive backstop: never let a CLI run complete without a terminal
// lifecycle event, otherwise downstream consumers can hang.
if (!lifecycleTerminalEmitted) {
emitAgentEvent({
runId,
stream: "lifecycle",
data: {
phase: "error",
startedAt,
endedAt: Date.now(),
error: "CLI run completed without lifecycle terminal event",
},
});
}
}
})();
}
const { authProfile, embeddedContext, senderContext } = buildEmbeddedRunContexts({
run: params.followupRun.run,
sessionCtx: params.sessionCtx,
hasRepliedRef: params.opts?.hasRepliedRef,
provider,
});
const runBaseParams = buildEmbeddedRunBaseParams({
run: params.followupRun.run,
provider,
model,
runId,
authProfile,
});
return runEmbeddedPiAgent({
...embeddedContext,
2026-01-24 15:35:05 +13:00
groupId: resolveGroupSessionKey(params.sessionCtx)?.id,
groupChannel:
params.sessionCtx.GroupChannel?.trim() ?? params.sessionCtx.GroupSubject?.trim(),
groupSpace: params.sessionCtx.GroupSpace?.trim() ?? undefined,
...senderContext,
...runBaseParams,
prompt: params.commandBody,
extraSystemPrompt: params.followupRun.run.extraSystemPrompt,
toolResultFormat: (() => {
const channel = resolveMessageChannel(
params.sessionCtx.Surface,
params.sessionCtx.Provider,
);
if (!channel) {
return "markdown";
}
2026-01-19 00:34:16 +00:00
return isMarkdownCapableMessageChannel(channel) ? "markdown" : "plain";
})(),
suppressToolErrorWarnings: params.opts?.suppressToolErrorWarnings,
2026-01-23 22:51:37 +00:00
images: params.opts?.images,
abortSignal: params.opts?.abortSignal,
blockReplyBreak: params.resolvedBlockStreamingBreak,
blockReplyChunking: params.blockReplyChunking,
onPartialReply: async (payload) => {
const textForTyping = await handlePartialForTyping(payload);
if (!params.opts?.onPartialReply || textForTyping === undefined) {
return;
}
await params.opts.onPartialReply({
text: textForTyping,
mediaUrls: payload.mediaUrls,
});
},
onAssistantMessageStart: async () => {
await params.typingSignals.signalMessageStart();
await params.opts?.onAssistantMessageStart?.();
},
onReasoningStream:
2026-01-19 00:34:16 +00:00
params.typingSignals.shouldStartOnReasoning || params.opts?.onReasoningStream
? async (payload) => {
await params.typingSignals.signalReasoningDelta();
await params.opts?.onReasoningStream?.({
text: payload.text,
mediaUrls: payload.mediaUrls,
});
}
: undefined,
onReasoningEnd: params.opts?.onReasoningEnd,
onAgentEvent: async (evt) => {
// Signal run start only after the embedded agent emits real activity.
const hasLifecyclePhase =
evt.stream === "lifecycle" && typeof evt.data.phase === "string";
if (evt.stream !== "lifecycle" || hasLifecyclePhase) {
notifyAgentRunStart();
}
// Trigger typing when tools start executing.
// Must await to ensure typing indicator starts before tool summaries are emitted.
if (evt.stream === "tool") {
2026-01-19 00:34:16 +00:00
const phase = typeof evt.data.phase === "string" ? evt.data.phase : "";
const name = typeof evt.data.name === "string" ? evt.data.name : undefined;
if (phase === "start" || phase === "update") {
await params.typingSignals.signalToolStart();
await params.opts?.onToolStart?.({ name, phase });
}
}
// Track auto-compaction completion
if (evt.stream === "compaction") {
2026-01-19 00:34:16 +00:00
const phase = typeof evt.data.phase === "string" ? evt.data.phase : "";
Agents: add nested subagent orchestration controls and reduce subagent token waste (#14447) * Agents: add subagent orchestration controls * Agents: add subagent orchestration controls (WIP uncommitted changes) * feat(subagents): add depth-based spawn gating for sub-sub-agents * feat(subagents): tool policy, registry, and announce chain for nested agents * feat(subagents): system prompt, docs, changelog for nested sub-agents * fix(subagents): prevent model fallback override, show model during active runs, and block context overflow fallback Bug 1: When a session has an explicit model override (e.g., gpt/openai-codex), the fallback candidate logic in resolveFallbackCandidates silently appended the global primary model (opus) as a backstop. On reinjection/steer with a transient error, the session could fall back to opus which has a smaller context window and crash. Fix: when storedModelOverride is set, pass fallbacksOverride ?? [] instead of undefined, preventing the implicit primary backstop. Bug 2: Active subagents showed 'model n/a' in /subagents list because resolveModelDisplay only read entry.model/modelProvider (populated after run completes). Fix: fall back to modelOverride/providerOverride fields which are populated at spawn time via sessions.patch. Bug 3: Context overflow errors (prompt too long, context_length_exceeded) could theoretically escape runEmbeddedPiAgent and be treated as failover candidates in runWithModelFallback, causing a switch to a model with a smaller context window. Fix: in runWithModelFallback, detect context overflow errors via isLikelyContextOverflowError and rethrow them immediately instead of trying the next model candidate. * fix(subagents): track spawn depth in session store and fix announce routing for nested agents * Fix compaction status tracking and dedupe overflow compaction triggers * fix(subagents): enforce depth block via session store and implement cascade kill * fix: inject group chat context into system prompt * fix(subagents): always write model to session store at spawn time * Preserve spawnDepth when agent handler rewrites session entry * fix(subagents): suppress announce on steer-restart * fix(subagents): fallback spawned session model to runtime default * fix(subagents): enforce spawn depth when caller key resolves by sessionId * feat(subagents): implement active-first ordering for numeric targets and enhance task display - Added a test to verify that subagents with numeric targets follow an active-first list ordering. - Updated `resolveSubagentTarget` to sort subagent runs based on active status and recent activity. - Enhanced task display in command responses to prevent truncation of long task descriptions. - Introduced new utility functions for compacting task text and managing subagent run states. * fix(subagents): show model for active runs via run record fallback When the spawned model matches the agent's default model, the session store's override fields are intentionally cleared (isDefault: true). The model/modelProvider fields are only populated after the run completes. This left active subagents showing 'model n/a'. Fix: store the resolved model on SubagentRunRecord at registration time, and use it as a fallback in both display paths (subagents tool and /subagents command) when the session store entry has no model info. Changes: - SubagentRunRecord: add optional model field - registerSubagentRun: accept and persist model param - sessions-spawn-tool: pass resolvedModel to registerSubagentRun - subagents-tool: pass run record model as fallback to resolveModelDisplay - commands-subagents: pass run record model as fallback to resolveModelDisplay * feat(chat): implement session key resolution and reset on sidebar navigation - Added functions to resolve the main session key and reset chat state when switching sessions from the sidebar. - Updated the `renderTab` function to handle session key changes when navigating to the chat tab. - Introduced a test to verify that the session resets to "main" when opening chat from the sidebar navigation. * fix: subagent timeout=0 passthrough and fallback prompt duplication Bug 1: runTimeoutSeconds=0 now means 'no timeout' instead of applying 600s default - sessions-spawn-tool: default to undefined (not 0) when neither timeout param is provided; use != null check so explicit 0 passes through to gateway - agent.ts: accept 0 as valid timeout (resolveAgentTimeoutMs already handles 0 → MAX_SAFE_TIMEOUT_MS) Bug 2: model fallback no longer re-injects the original prompt as a duplicate - agent.ts: track fallback attempt index; on retries use a short continuation message instead of the full original prompt since the session file already contains it from the first attempt - Also skip re-sending images on fallback retries (already in session) * feat(subagents): truncate long task descriptions in subagents command output - Introduced a new utility function to format task previews, limiting their length to improve readability. - Updated the command handler to use the new formatting function, ensuring task descriptions are truncated appropriately. - Adjusted related tests to verify that long task descriptions are now truncated in the output. * refactor(subagents): update subagent registry path resolution and improve command output formatting - Replaced direct import of STATE_DIR with a utility function to resolve the state directory dynamically. - Enhanced the formatting of command output for active and recent subagents, adding separators for better readability. - Updated related tests to reflect changes in command output structure. * fix(subagent): default sessions_spawn to no timeout when runTimeoutSeconds omitted The previous fix (75a791106) correctly handled the case where runTimeoutSeconds was explicitly set to 0 ("no timeout"). However, when models omit the parameter entirely (which is common since the schema marks it as optional), runTimeoutSeconds resolved to undefined. undefined flowed through the chain as: sessions_spawn → timeout: undefined (since undefined != null is false) → gateway agent handler → agentCommand opts.timeout: undefined → resolveAgentTimeoutMs({ overrideSeconds: undefined }) → DEFAULT_AGENT_TIMEOUT_SECONDS (600s = 10 minutes) This caused subagents to be killed at exactly 10 minutes even though the user's intent (via TOOLS.md) was for subagents to run without a timeout. Fix: default runTimeoutSeconds to 0 (no timeout) when neither runTimeoutSeconds nor timeoutSeconds is provided by the caller. Subagent spawns are long-running by design and should not inherit the 600s agent-command default timeout. * fix(subagent): accept timeout=0 in agent-via-gateway path (second 600s default) * fix: thread timeout override through getReplyFromConfig dispatch path getReplyFromConfig called resolveAgentTimeoutMs({ cfg }) with no override, always falling back to the config default (600s). Add timeoutOverrideSeconds to GetReplyOptions and pass it through as overrideSeconds so callers of the dispatch chain can specify a custom timeout (0 = no timeout). This complements the existing timeout threading in agentCommand and the cron isolated-agent runner, which already pass overrideSeconds correctly. * feat(model-fallback): normalize OpenAI Codex model references and enhance fallback handling - Added normalization for OpenAI Codex model references, specifically converting "gpt-5.3-codex" to "openai-codex" before execution. - Updated the `resolveFallbackCandidates` function to utilize the new normalization logic. - Enhanced tests to verify the correct behavior of model normalization and fallback mechanisms. - Introduced a new test case to ensure that the normalization process works as expected for various input formats. * feat(tests): add unit tests for steer failure behavior in openclaw-tools - Introduced a new test file to validate the behavior of subagents when steer replacement dispatch fails. - Implemented tests to ensure that the announce behavior is restored correctly and that the suppression reason is cleared as expected. - Enhanced the subagent registry with a new function to clear steer restart suppression. - Updated related components to support the new test scenarios. * fix(subagents): replace stop command with kill in slash commands and documentation - Updated the `/subagents` command to replace `stop` with `kill` for consistency in controlling sub-agent runs. - Modified related documentation to reflect the change in command usage. - Removed legacy timeoutSeconds references from the sessions-spawn-tool schema and tests to streamline timeout handling. - Enhanced tests to ensure correct behavior of the updated commands and their interactions. * feat(tests): add unit tests for readLatestAssistantReply function - Introduced a new test file for the `readLatestAssistantReply` function to validate its behavior with various message scenarios. - Implemented tests to ensure the function correctly retrieves the latest assistant message and handles cases where the latest message has no text. - Mocked the gateway call to simulate different message histories for comprehensive testing. * feat(tests): enhance subagent kill-all cascade tests and announce formatting - Added a new test to verify that the `kill-all` command cascades through ended parents to active descendants in subagents. - Updated the subagent announce formatting tests to reflect changes in message structure, including the replacement of "Findings:" with "Result:" and the addition of new expectations for message content. - Improved the handling of long findings and stats in the announce formatting logic to ensure concise output. - Refactored related functions to enhance clarity and maintainability in the subagent registry and tools. * refactor(subagent): update announce formatting and remove unused constants - Modified the subagent announce formatting to replace "Findings:" with "Result:" and adjusted related expectations in tests. - Removed constants for maximum announce findings characters and summary words, simplifying the announcement logic. - Updated the handling of findings to retain full content instead of truncating, ensuring more informative outputs. - Cleaned up unused imports in the commands-subagents file to enhance code clarity. * feat(tests): enhance billing error handling in user-facing text - Added tests to ensure that normal text mentioning billing plans is not rewritten, preserving user context. - Updated the `isBillingErrorMessage` and `sanitizeUserFacingText` functions to improve handling of billing-related messages. - Introduced new test cases for various scenarios involving billing messages to ensure accurate processing and output. - Enhanced the subagent announce flow to correctly manage active descendant runs, preventing premature announcements. * feat(subagent): enhance workflow guidance and auto-announcement clarity - Added a new guideline in the subagent system prompt to emphasize trust in push-based completion, discouraging busy polling for status updates. - Updated documentation to clarify that sub-agents will automatically announce their results, improving user understanding of the workflow. - Enhanced tests to verify the new guidance on avoiding polling loops and to ensure the accuracy of the updated prompts. * fix(cron): avoid announcing interim subagent spawn acks * chore: clean post-rebase imports * fix(cron): fall back to child replies when parent stays interim * fix(subagents): make active-run guidance advisory * fix(subagents): update announce flow to handle active descendants and enhance test coverage - Modified the announce flow to defer announcements when active descendant runs are present, ensuring accurate status reporting. - Updated tests to verify the new behavior, including scenarios where no fallback requester is available and ensuring proper handling of finished subagents. - Enhanced the announce formatting to include an `expectFinal` flag for better clarity in the announcement process. * fix(subagents): enhance announce flow and formatting for user updates - Updated the announce flow to provide clearer instructions for user updates based on active subagent runs and requester context. - Refactored the announcement logic to improve clarity and ensure internal context remains private. - Enhanced tests to verify the new message expectations and formatting, including updated prompts for user-facing updates. - Introduced a new function to build reply instructions based on session context, improving the overall announcement process. * fix: resolve prep blockers and changelog placement (#14447) (thanks @tyler6204) * fix: restore cron delivery-plan import after rebase (#14447) (thanks @tyler6204) * fix: resolve test failures from rebase conflicts (#14447) (thanks @tyler6204) * fix: apply formatting after rebase (#14447) (thanks @tyler6204)
2026-02-14 22:03:45 -08:00
if (phase === "end") {
autoCompactionCompleted = true;
}
}
},
// Always pass onBlockReply so flushBlockReplyBuffer works before tool execution,
// even when regular block streaming is disabled. The handler sends directly
// via opts.onBlockReply when the pipeline isn't available.
onBlockReply: params.opts?.onBlockReply
? createBlockReplyDeliveryHandler({
onBlockReply: params.opts.onBlockReply,
currentMessageId:
params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid,
normalizeStreamingText,
applyReplyToMode: params.applyReplyToMode,
typingSignals: params.typingSignals,
blockStreamingEnabled: params.blockStreamingEnabled,
blockReplyPipeline,
directlySentBlockKeys,
})
: undefined,
onBlockReplyFlush:
params.blockStreamingEnabled && blockReplyPipeline
? async () => {
await blockReplyPipeline.flush({ force: true });
}
: undefined,
shouldEmitToolResult: params.shouldEmitToolResult,
2026-01-17 05:33:27 +00:00
shouldEmitToolOutput: params.shouldEmitToolOutput,
onToolResult: onToolResult
? (() => {
// Serialize tool result delivery to preserve message ordering.
// Without this, concurrent tool callbacks race through typing signals
// and message sends, causing out-of-order delivery to the user.
// See: https://github.com/openclaw/openclaw/issues/11044
let toolResultChain: Promise<void> = Promise.resolve();
return (payload: ReplyPayload) => {
toolResultChain = toolResultChain
.then(async () => {
const { text, skip } = normalizeStreamingText(payload);
if (skip) {
return;
}
await params.typingSignals.signalTextDelta(text);
await onToolResult({
text,
mediaUrls: payload.mediaUrls,
});
})
.catch((err) => {
// Keep chain healthy after an error so later tool results still deliver.
logVerbose(`tool result delivery failed: ${String(err)}`);
});
const task = toolResultChain.finally(() => {
params.pendingToolTasks.delete(task);
});
params.pendingToolTasks.add(task);
};
})()
: undefined,
});
},
});
runResult = fallbackResult.result;
fallbackProvider = fallbackResult.provider;
fallbackModel = fallbackResult.model;
fallbackAttempts = Array.isArray(fallbackResult.attempts)
? fallbackResult.attempts.map((attempt) => ({
provider: String(attempt.provider ?? ""),
model: String(attempt.model ?? ""),
error: String(attempt.error ?? ""),
reason: attempt.reason ? String(attempt.reason) : undefined,
status: typeof attempt.status === "number" ? attempt.status : undefined,
code: attempt.code ? String(attempt.code) : undefined,
}))
: [];
// Some embedded runs surface context overflow as an error payload instead of throwing.
// Treat those as a session-level failure and auto-recover by starting a fresh session.
const embeddedError = runResult.meta?.error;
if (
embeddedError &&
isContextOverflowError(embeddedError.message) &&
!didResetAfterCompactionFailure &&
(await params.resetSessionAfterCompactionFailure(embeddedError.message))
) {
didResetAfterCompactionFailure = true;
return {
kind: "final",
payload: {
text: "⚠️ Context limit exceeded. I've reset our conversation to start fresh - please try again.\n\nTo prevent this, increase your compaction buffer by setting `agents.defaults.compaction.reserveTokensFloor` to 20000 or higher in your config.",
},
};
}
if (embeddedError?.kind === "role_ordering") {
2026-01-19 00:34:16 +00:00
const didReset = await params.resetSessionAfterRoleOrderingConflict(embeddedError.message);
if (didReset) {
return {
kind: "final",
payload: {
text: "⚠️ Message ordering conflict. I've reset the conversation - please try again.",
},
};
}
}
break;
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
const isContextOverflow = isLikelyContextOverflowError(message);
const isCompactionFailure = isCompactionFailureError(message);
2026-01-19 00:34:16 +00:00
const isSessionCorruption = /function call turn comes immediately after/i.test(message);
const isRoleOrderingError = /incorrect role information|roles must alternate/i.test(message);
const isTransientHttp = isTransientHttpError(message);
if (
isCompactionFailure &&
!didResetAfterCompactionFailure &&
(await params.resetSessionAfterCompactionFailure(message))
) {
didResetAfterCompactionFailure = true;
return {
kind: "final",
payload: {
text: "⚠️ Context limit exceeded during compaction. I've reset our conversation to start fresh - please try again.\n\nTo prevent this, increase your compaction buffer by setting `agents.defaults.compaction.reserveTokensFloor` to 20000 or higher in your config.",
},
};
}
if (isRoleOrderingError) {
2026-01-19 00:34:16 +00:00
const didReset = await params.resetSessionAfterRoleOrderingConflict(message);
if (didReset) {
return {
kind: "final",
payload: {
text: "⚠️ Message ordering conflict. I've reset the conversation - please try again.",
},
};
}
}
// Auto-recover from Gemini session corruption by resetting the session
if (
isSessionCorruption &&
params.sessionKey &&
params.activeSessionStore &&
params.storePath
) {
const sessionKey = params.sessionKey;
const corruptedSessionId = params.getActiveSessionEntry()?.sessionId;
defaultRuntime.error(
`Session history corrupted (Gemini function call ordering). Resetting session: ${params.sessionKey}`,
);
try {
// Delete transcript file if it exists
if (corruptedSessionId) {
2026-01-19 00:34:16 +00:00
const transcriptPath = resolveSessionTranscriptPath(corruptedSessionId);
try {
fs.unlinkSync(transcriptPath);
} catch {
// Ignore if file doesn't exist
}
}
// Keep the in-memory snapshot consistent with the on-disk store reset.
delete params.activeSessionStore[sessionKey];
// Remove session entry from store using a fresh, locked snapshot.
await updateSessionStore(params.storePath, (store) => {
delete store[sessionKey];
});
} catch (cleanupErr) {
defaultRuntime.error(
`Failed to reset corrupted session ${params.sessionKey}: ${String(cleanupErr)}`,
);
}
return {
kind: "final",
payload: {
text: "⚠️ Session history was corrupted. I've reset the conversation - please try again!",
},
};
}
if (isTransientHttp && !didRetryTransientHttpError) {
didRetryTransientHttpError = true;
// Retry the full runWithModelFallback() cycle — transient errors
// (502/521/etc.) typically affect the whole provider, so falling
// back to an alternate model first would not help. Instead we wait
// and retry the complete primary→fallback chain.
defaultRuntime.error(
`Transient HTTP provider error before reply (${message}). Retrying once in ${TRANSIENT_HTTP_RETRY_DELAY_MS}ms.`,
);
await new Promise<void>((resolve) => {
setTimeout(resolve, TRANSIENT_HTTP_RETRY_DELAY_MS);
});
continue;
}
defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
const safeMessage = isTransientHttp
? sanitizeUserFacingText(message, { errorContext: true })
: message;
const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
const fallbackText = isContextOverflow
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
: isRoleOrderingError
? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
2026-01-30 03:15:10 +01:00
: `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
return {
kind: "final",
payload: {
text: fallbackText,
},
};
}
}
return {
kind: "success",
runId,
runResult,
fallbackProvider,
fallbackModel,
fallbackAttempts,
didLogHeartbeatStrip,
autoCompactionCompleted,
2026-01-19 00:34:16 +00:00
directlySentBlockKeys: directlySentBlockKeys.size > 0 ? directlySentBlockKeys : undefined,
};
}