From ee03ade0d63fff3098d27b603ce20a5f01036e64 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 1 Mar 2026 23:51:43 +0000 Subject: [PATCH] fix(agents): harden tool-name normalization and transcript repair Landed from contributor PRs #30620 and #30735 by @Sid-Qin, plus #30881 by @liuxiaopai-ai. Co-authored-by: SidQin-cyber Co-authored-by: liuxiaopai-ai <73659136+liuxiaopai-ai@users.noreply.github.com> --- CHANGELOG.md | 1 + .../pi-embedded-runner/run/attempt.test.ts | 44 +++++++++++++ src/agents/pi-embedded-runner/run/attempt.ts | 66 +++++++++++++++---- src/agents/session-tool-result-guard.test.ts | 22 +++++++ src/agents/session-tool-result-guard.ts | 39 ++++++++++- src/agents/session-transcript-repair.test.ts | 23 +++++++ src/agents/session-transcript-repair.ts | 42 +++++++++++- 7 files changed, 224 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 696a1b60e..e8a6fee69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -139,6 +139,7 @@ Docs: https://docs.openclaw.ai - Dashboard/Sessions: allow authenticated Control UI clients to delete and patch sessions while still blocking regular webchat clients from session mutation RPCs, fixing Dashboard session delete failures. (#21264) Thanks @jskoiz. - TUI/Session model status: clear stale runtime model identity when model overrides change so `/model` updates are reflected immediately in `sessions.patch` responses and `sessions.list` status surfaces. (#28619) Thanks @lejean2000. - Agents/Session status: read thinking/verbose/reasoning levels from persisted session state in `session_status` output when resolved levels are not provided, so status reflects runtime toggles correctly. (#30129) Thanks @YuzuruS. +- Agents/Tool-name recovery chain: normalize streamed alias/case tool names against the allowed set, preserve whitespace-only streamed placeholders to avoid collapsing to empty names, and repair/guard persisted blank `toolResult.toolName` values from matching tool calls to reduce repeated `Tool not found` loops in long sessions. Landed from contributor PRs #30620 and #30735 by @Sid-Qin, plus #30881 by @liuxiaopai-ai. Thanks @Sid-Qin and @liuxiaopai-ai. - TUI/SIGTERM shutdown: ignore `setRawMode EBADF` teardown errors during `SIGTERM` exit so long-running TUI sessions do not crash on terminal shutdown races, while still rethrowing unrelated stop errors. (#29430) Thanks @Cormazabal. - Memory/Hybrid recall: when strict hybrid scoring yields no hits, preserve keyword-backed matches using a text-weight floor so freshly indexed lexical canaries no longer disappear behind `minScore` filtering. (#29112) Thanks @ceo-nada. - Android/Notifications auth race: return `NOT_AUTHORIZED` when `POST_NOTIFICATIONS` is revoked between authorization precheck and delivery, instead of returning success while dropping the notification. (#30726) Thanks @obviyus. diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index cb83508ab..c7ef97c41 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -177,6 +177,50 @@ describe("wrapStreamFnTrimToolCallNames", () => { expect(result).toBe(finalMessage); expect(baseFn).toHaveBeenCalledTimes(1); }); + it("normalizes common tool aliases when the canonical name is allowed", async () => { + const finalToolCall = { type: "toolCall", name: " BASH " }; + const finalMessage = { role: "assistant", content: [finalToolCall] }; + const baseFn = vi.fn(() => + createFakeStream({ + events: [], + resultMessage: finalMessage, + }), + ); + + const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never, new Set(["exec"])); + const stream = wrappedFn({} as never, {} as never, {} as never) as Awaited< + ReturnType + >; + const result = await stream.result(); + + expect(finalToolCall.name).toBe("exec"); + expect(result).toBe(finalMessage); + }); + + it("does not collapse whitespace-only tool names to empty strings", async () => { + const partialToolCall = { type: "toolCall", name: " " }; + const finalToolCall = { type: "toolCall", name: "\t " }; + const event = { + type: "toolcall_delta", + partial: { role: "assistant", content: [partialToolCall] }, + }; + const finalMessage = { role: "assistant", content: [finalToolCall] }; + const baseFn = vi.fn(() => createFakeStream({ events: [event], resultMessage: finalMessage })); + + const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never); + const stream = wrappedFn({} as never, {} as never, {} as never) as Awaited< + ReturnType + >; + + for await (const _item of stream) { + // drain + } + await stream.result(); + + expect(partialToolCall.name).toBe(" "); + expect(finalToolCall.name).toBe("\t "); + expect(baseFn).toHaveBeenCalledTimes(1); + }); }); describe("isOllamaCompatProvider", () => { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 7d7f47382..29cb6122d 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -75,6 +75,7 @@ import { buildSystemPromptParams } from "../../system-prompt-params.js"; import { buildSystemPromptReport } from "../../system-prompt-report.js"; import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js"; import { resolveEffectiveToolFsWorkspaceOnly } from "../../tool-fs-policy.js"; +import { normalizeToolName } from "../../tool-policy.js"; import { resolveTranscriptPolicy } from "../../transcript-policy.js"; import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js"; import { isRunnerAbortError } from "../abort.js"; @@ -226,7 +227,41 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num }); } -function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void { +function normalizeToolCallNameForDispatch(rawName: string, allowedToolNames?: Set): string { + const trimmed = rawName.trim(); + if (!trimmed) { + // Keep whitespace-only placeholders unchanged so they do not collapse to + // empty names (which can later surface as toolName="" loops). + return rawName; + } + if (!allowedToolNames || allowedToolNames.size === 0) { + return trimmed; + } + if (allowedToolNames.has(trimmed)) { + return trimmed; + } + const normalized = normalizeToolName(trimmed); + if (allowedToolNames.has(normalized)) { + return normalized; + } + const folded = trimmed.toLowerCase(); + let caseInsensitiveMatch: string | null = null; + for (const name of allowedToolNames) { + if (name.toLowerCase() !== folded) { + continue; + } + if (caseInsensitiveMatch && caseInsensitiveMatch !== name) { + return trimmed; + } + caseInsensitiveMatch = name; + } + return caseInsensitiveMatch ?? trimmed; +} + +function trimWhitespaceFromToolCallNamesInMessage( + message: unknown, + allowedToolNames?: Set, +): void { if (!message || typeof message !== "object") { return; } @@ -242,20 +277,21 @@ function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void { if (typedBlock.type !== "toolCall" || typeof typedBlock.name !== "string") { continue; } - const trimmed = typedBlock.name.trim(); - if (trimmed !== typedBlock.name) { - typedBlock.name = trimmed; + const normalized = normalizeToolCallNameForDispatch(typedBlock.name, allowedToolNames); + if (normalized !== typedBlock.name) { + typedBlock.name = normalized; } } } function wrapStreamTrimToolCallNames( stream: ReturnType, + allowedToolNames?: Set, ): ReturnType { const originalResult = stream.result.bind(stream); stream.result = async () => { const message = await originalResult(); - trimWhitespaceFromToolCallNamesInMessage(message); + trimWhitespaceFromToolCallNamesInMessage(message, allowedToolNames); return message; }; @@ -271,8 +307,8 @@ function wrapStreamTrimToolCallNames( partial?: unknown; message?: unknown; }; - trimWhitespaceFromToolCallNamesInMessage(event.partial); - trimWhitespaceFromToolCallNamesInMessage(event.message); + trimWhitespaceFromToolCallNamesInMessage(event.partial, allowedToolNames); + trimWhitespaceFromToolCallNamesInMessage(event.message, allowedToolNames); } return result; }, @@ -288,13 +324,18 @@ function wrapStreamTrimToolCallNames( return stream; } -export function wrapStreamFnTrimToolCallNames(baseFn: StreamFn): StreamFn { +export function wrapStreamFnTrimToolCallNames( + baseFn: StreamFn, + allowedToolNames?: Set, +): StreamFn { return (model, context, options) => { const maybeStream = baseFn(model, context, options); if (maybeStream && typeof maybeStream === "object" && "then" in maybeStream) { - return Promise.resolve(maybeStream).then((stream) => wrapStreamTrimToolCallNames(stream)); + return Promise.resolve(maybeStream).then((stream) => + wrapStreamTrimToolCallNames(stream, allowedToolNames), + ); } - return wrapStreamTrimToolCallNames(maybeStream); + return wrapStreamTrimToolCallNames(maybeStream, allowedToolNames); }; } @@ -974,7 +1015,10 @@ export async function runEmbeddedAttempt( // Some models emit tool names with surrounding whitespace (e.g. " read "). // pi-agent-core dispatches tool calls with exact string matching, so normalize // names on the live response stream before tool execution. - activeSession.agent.streamFn = wrapStreamFnTrimToolCallNames(activeSession.agent.streamFn); + activeSession.agent.streamFn = wrapStreamFnTrimToolCallNames( + activeSession.agent.streamFn, + allowedToolNames, + ); if (anthropicPayloadLogger) { activeSession.agent.streamFn = anthropicPayloadLogger.wrapStreamFn( diff --git a/src/agents/session-tool-result-guard.test.ts b/src/agents/session-tool-result-guard.test.ts index 7df8b8d48..1e5b772c7 100644 --- a/src/agents/session-tool-result-guard.test.ts +++ b/src/agents/session-tool-result-guard.test.ts @@ -102,6 +102,28 @@ describe("installSessionToolResultGuard", () => { expectPersistedRoles(sm, ["assistant", "toolResult"]); }); + it("backfills blank toolResult names from pending tool calls", () => { + const sm = SessionManager.inMemory(); + installSessionToolResultGuard(sm); + + sm.appendMessage(toolCallMessage); + sm.appendMessage( + asAppendMessage({ + role: "toolResult", + toolCallId: "call_1", + toolName: " ", + content: [{ type: "text", text: "ok" }], + isError: false, + }), + ); + + const messages = expectPersistedRoles(sm, ["assistant", "toolResult"]) as Array<{ + role: string; + toolName?: string; + }>; + expect(messages[1]?.toolName).toBe("read"); + }); + it("preserves ordering with multiple tool calls and partial results", () => { const sm = SessionManager.inMemory(); const guard = installSessionToolResultGuard(sm); diff --git a/src/agents/session-tool-result-guard.ts b/src/agents/session-tool-result-guard.ts index dba618a31..5e27a30bd 100644 --- a/src/agents/session-tool-result-guard.ts +++ b/src/agents/session-tool-result-guard.ts @@ -31,6 +31,42 @@ function capToolResultSize(msg: AgentMessage): AgentMessage { }); } +function trimNonEmptyString(value: unknown): string | undefined { + if (typeof value !== "string") { + return undefined; + } + const trimmed = value.trim(); + return trimmed || undefined; +} + +function normalizePersistedToolResultName( + message: AgentMessage, + fallbackName?: string, +): AgentMessage { + if ((message as { role?: unknown }).role !== "toolResult") { + return message; + } + const toolResult = message as Extract; + const rawToolName = (toolResult as { toolName?: unknown }).toolName; + const normalizedToolName = trimNonEmptyString(rawToolName); + if (normalizedToolName) { + if (rawToolName === normalizedToolName) { + return toolResult; + } + return { ...toolResult, toolName: normalizedToolName }; + } + + const normalizedFallback = trimNonEmptyString(fallbackName); + if (normalizedFallback) { + return { ...toolResult, toolName: normalizedFallback }; + } + + if (typeof rawToolName === "string") { + return { ...toolResult, toolName: "unknown" }; + } + return toolResult; +} + export function installSessionToolResultGuard( sessionManager: SessionManager, opts?: { @@ -150,9 +186,10 @@ export function installSessionToolResultGuard( if (id) { pending.delete(id); } + const normalizedToolResult = normalizePersistedToolResultName(nextMessage, toolName); // Apply hard size cap before persistence to prevent oversized tool results // from consuming the entire context window on subsequent LLM calls. - const capped = capToolResultSize(persistMessage(nextMessage)); + const capped = capToolResultSize(persistMessage(normalizedToolResult)); const persisted = applyBeforeWriteHook( persistToolResult(capped, { toolCallId: id ?? undefined, diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index 1ff6b50ff..e9c60d730 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -74,6 +74,29 @@ describe("sanitizeToolUseResultPairing", () => { expect(out[3]?.role).toBe("user"); }); + it("repairs blank tool result names from matching tool calls", () => { + const input = [ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: " ", + content: [{ type: "text", text: "ok" }], + isError: false, + }, + ] as unknown as AgentMessage[]; + + const out = sanitizeToolUseResultPairing(input); + const toolResult = out.find((message) => message.role === "toolResult") as { + toolName?: string; + }; + + expect(toolResult?.toolName).toBe("read"); + }); + it("drops duplicate tool results for the same id within a span", () => { const input = [ ...buildDuplicateToolResultInput(), diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index 33d7fcc55..b860b2a08 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -91,6 +91,38 @@ function makeMissingToolResult(params: { } as Extract; } +function trimNonEmptyString(value: unknown): string | undefined { + if (typeof value !== "string") { + return undefined; + } + const trimmed = value.trim(); + return trimmed || undefined; +} + +function normalizeToolResultName( + message: Extract, + fallbackName?: string, +): Extract { + const rawToolName = (message as { toolName?: unknown }).toolName; + const normalizedToolName = trimNonEmptyString(rawToolName); + if (normalizedToolName) { + if (rawToolName === normalizedToolName) { + return message; + } + return { ...message, toolName: normalizedToolName }; + } + + const normalizedFallback = trimNonEmptyString(fallbackName); + if (normalizedFallback) { + return { ...message, toolName: normalizedFallback }; + } + + if (typeof rawToolName === "string") { + return { ...message, toolName: "unknown" }; + } + return message; +} + export { makeMissingToolResult }; export type ToolCallInputRepairReport = { @@ -291,6 +323,7 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep } const toolCallIds = new Set(toolCalls.map((t) => t.id)); + const toolCallNamesById = new Map(toolCalls.map((t) => [t.id, t.name] as const)); const spanResultsById = new Map>(); const remainder: AgentMessage[] = []; @@ -317,8 +350,15 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep changed = true; continue; } + const normalizedToolResult = normalizeToolResultName( + toolResult, + toolCallNamesById.get(id), + ); + if (normalizedToolResult !== toolResult) { + changed = true; + } if (!spanResultsById.has(id)) { - spanResultsById.set(id, toolResult); + spanResultsById.set(id, normalizedToolResult); } continue; }