fix(agents): harden tool-name normalization and transcript repair

Landed from contributor PRs #30620 and #30735 by @Sid-Qin, plus #30881 by @liuxiaopai-ai.

Co-authored-by: SidQin-cyber <sidqin0410@gmail.com>
Co-authored-by: liuxiaopai-ai <73659136+liuxiaopai-ai@users.noreply.github.com>
This commit is contained in:
Peter Steinberger
2026-03-01 23:51:43 +00:00
parent 50e2674dfc
commit ee03ade0d6
7 changed files with 224 additions and 13 deletions

View File

@@ -139,6 +139,7 @@ Docs: https://docs.openclaw.ai
- Dashboard/Sessions: allow authenticated Control UI clients to delete and patch sessions while still blocking regular webchat clients from session mutation RPCs, fixing Dashboard session delete failures. (#21264) Thanks @jskoiz.
- TUI/Session model status: clear stale runtime model identity when model overrides change so `/model` updates are reflected immediately in `sessions.patch` responses and `sessions.list` status surfaces. (#28619) Thanks @lejean2000.
- Agents/Session status: read thinking/verbose/reasoning levels from persisted session state in `session_status` output when resolved levels are not provided, so status reflects runtime toggles correctly. (#30129) Thanks @YuzuruS.
- Agents/Tool-name recovery chain: normalize streamed alias/case tool names against the allowed set, preserve whitespace-only streamed placeholders to avoid collapsing to empty names, and repair/guard persisted blank `toolResult.toolName` values from matching tool calls to reduce repeated `Tool not found` loops in long sessions. Landed from contributor PRs #30620 and #30735 by @Sid-Qin, plus #30881 by @liuxiaopai-ai. Thanks @Sid-Qin and @liuxiaopai-ai.
- TUI/SIGTERM shutdown: ignore `setRawMode EBADF` teardown errors during `SIGTERM` exit so long-running TUI sessions do not crash on terminal shutdown races, while still rethrowing unrelated stop errors. (#29430) Thanks @Cormazabal.
- Memory/Hybrid recall: when strict hybrid scoring yields no hits, preserve keyword-backed matches using a text-weight floor so freshly indexed lexical canaries no longer disappear behind `minScore` filtering. (#29112) Thanks @ceo-nada.
- Android/Notifications auth race: return `NOT_AUTHORIZED` when `POST_NOTIFICATIONS` is revoked between authorization precheck and delivery, instead of returning success while dropping the notification. (#30726) Thanks @obviyus.

View File

@@ -177,6 +177,50 @@ describe("wrapStreamFnTrimToolCallNames", () => {
expect(result).toBe(finalMessage);
expect(baseFn).toHaveBeenCalledTimes(1);
});
it("normalizes common tool aliases when the canonical name is allowed", async () => {
const finalToolCall = { type: "toolCall", name: " BASH " };
const finalMessage = { role: "assistant", content: [finalToolCall] };
const baseFn = vi.fn(() =>
createFakeStream({
events: [],
resultMessage: finalMessage,
}),
);
const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never, new Set(["exec"]));
const stream = wrappedFn({} as never, {} as never, {} as never) as Awaited<
ReturnType<typeof wrappedFn>
>;
const result = await stream.result();
expect(finalToolCall.name).toBe("exec");
expect(result).toBe(finalMessage);
});
it("does not collapse whitespace-only tool names to empty strings", async () => {
const partialToolCall = { type: "toolCall", name: " " };
const finalToolCall = { type: "toolCall", name: "\t " };
const event = {
type: "toolcall_delta",
partial: { role: "assistant", content: [partialToolCall] },
};
const finalMessage = { role: "assistant", content: [finalToolCall] };
const baseFn = vi.fn(() => createFakeStream({ events: [event], resultMessage: finalMessage }));
const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never);
const stream = wrappedFn({} as never, {} as never, {} as never) as Awaited<
ReturnType<typeof wrappedFn>
>;
for await (const _item of stream) {
// drain
}
await stream.result();
expect(partialToolCall.name).toBe(" ");
expect(finalToolCall.name).toBe("\t ");
expect(baseFn).toHaveBeenCalledTimes(1);
});
});
describe("isOllamaCompatProvider", () => {

View File

@@ -75,6 +75,7 @@ import { buildSystemPromptParams } from "../../system-prompt-params.js";
import { buildSystemPromptReport } from "../../system-prompt-report.js";
import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js";
import { resolveEffectiveToolFsWorkspaceOnly } from "../../tool-fs-policy.js";
import { normalizeToolName } from "../../tool-policy.js";
import { resolveTranscriptPolicy } from "../../transcript-policy.js";
import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
import { isRunnerAbortError } from "../abort.js";
@@ -226,7 +227,41 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
});
}
function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void {
function normalizeToolCallNameForDispatch(rawName: string, allowedToolNames?: Set<string>): string {
const trimmed = rawName.trim();
if (!trimmed) {
// Keep whitespace-only placeholders unchanged so they do not collapse to
// empty names (which can later surface as toolName="" loops).
return rawName;
}
if (!allowedToolNames || allowedToolNames.size === 0) {
return trimmed;
}
if (allowedToolNames.has(trimmed)) {
return trimmed;
}
const normalized = normalizeToolName(trimmed);
if (allowedToolNames.has(normalized)) {
return normalized;
}
const folded = trimmed.toLowerCase();
let caseInsensitiveMatch: string | null = null;
for (const name of allowedToolNames) {
if (name.toLowerCase() !== folded) {
continue;
}
if (caseInsensitiveMatch && caseInsensitiveMatch !== name) {
return trimmed;
}
caseInsensitiveMatch = name;
}
return caseInsensitiveMatch ?? trimmed;
}
function trimWhitespaceFromToolCallNamesInMessage(
message: unknown,
allowedToolNames?: Set<string>,
): void {
if (!message || typeof message !== "object") {
return;
}
@@ -242,20 +277,21 @@ function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void {
if (typedBlock.type !== "toolCall" || typeof typedBlock.name !== "string") {
continue;
}
const trimmed = typedBlock.name.trim();
if (trimmed !== typedBlock.name) {
typedBlock.name = trimmed;
const normalized = normalizeToolCallNameForDispatch(typedBlock.name, allowedToolNames);
if (normalized !== typedBlock.name) {
typedBlock.name = normalized;
}
}
}
function wrapStreamTrimToolCallNames(
stream: ReturnType<typeof streamSimple>,
allowedToolNames?: Set<string>,
): ReturnType<typeof streamSimple> {
const originalResult = stream.result.bind(stream);
stream.result = async () => {
const message = await originalResult();
trimWhitespaceFromToolCallNamesInMessage(message);
trimWhitespaceFromToolCallNamesInMessage(message, allowedToolNames);
return message;
};
@@ -271,8 +307,8 @@ function wrapStreamTrimToolCallNames(
partial?: unknown;
message?: unknown;
};
trimWhitespaceFromToolCallNamesInMessage(event.partial);
trimWhitespaceFromToolCallNamesInMessage(event.message);
trimWhitespaceFromToolCallNamesInMessage(event.partial, allowedToolNames);
trimWhitespaceFromToolCallNamesInMessage(event.message, allowedToolNames);
}
return result;
},
@@ -288,13 +324,18 @@ function wrapStreamTrimToolCallNames(
return stream;
}
export function wrapStreamFnTrimToolCallNames(baseFn: StreamFn): StreamFn {
export function wrapStreamFnTrimToolCallNames(
baseFn: StreamFn,
allowedToolNames?: Set<string>,
): StreamFn {
return (model, context, options) => {
const maybeStream = baseFn(model, context, options);
if (maybeStream && typeof maybeStream === "object" && "then" in maybeStream) {
return Promise.resolve(maybeStream).then((stream) => wrapStreamTrimToolCallNames(stream));
return Promise.resolve(maybeStream).then((stream) =>
wrapStreamTrimToolCallNames(stream, allowedToolNames),
);
}
return wrapStreamTrimToolCallNames(maybeStream);
return wrapStreamTrimToolCallNames(maybeStream, allowedToolNames);
};
}
@@ -974,7 +1015,10 @@ export async function runEmbeddedAttempt(
// Some models emit tool names with surrounding whitespace (e.g. " read ").
// pi-agent-core dispatches tool calls with exact string matching, so normalize
// names on the live response stream before tool execution.
activeSession.agent.streamFn = wrapStreamFnTrimToolCallNames(activeSession.agent.streamFn);
activeSession.agent.streamFn = wrapStreamFnTrimToolCallNames(
activeSession.agent.streamFn,
allowedToolNames,
);
if (anthropicPayloadLogger) {
activeSession.agent.streamFn = anthropicPayloadLogger.wrapStreamFn(

View File

@@ -102,6 +102,28 @@ describe("installSessionToolResultGuard", () => {
expectPersistedRoles(sm, ["assistant", "toolResult"]);
});
it("backfills blank toolResult names from pending tool calls", () => {
const sm = SessionManager.inMemory();
installSessionToolResultGuard(sm);
sm.appendMessage(toolCallMessage);
sm.appendMessage(
asAppendMessage({
role: "toolResult",
toolCallId: "call_1",
toolName: " ",
content: [{ type: "text", text: "ok" }],
isError: false,
}),
);
const messages = expectPersistedRoles(sm, ["assistant", "toolResult"]) as Array<{
role: string;
toolName?: string;
}>;
expect(messages[1]?.toolName).toBe("read");
});
it("preserves ordering with multiple tool calls and partial results", () => {
const sm = SessionManager.inMemory();
const guard = installSessionToolResultGuard(sm);

View File

@@ -31,6 +31,42 @@ function capToolResultSize(msg: AgentMessage): AgentMessage {
});
}
function trimNonEmptyString(value: unknown): string | undefined {
if (typeof value !== "string") {
return undefined;
}
const trimmed = value.trim();
return trimmed || undefined;
}
function normalizePersistedToolResultName(
message: AgentMessage,
fallbackName?: string,
): AgentMessage {
if ((message as { role?: unknown }).role !== "toolResult") {
return message;
}
const toolResult = message as Extract<AgentMessage, { role: "toolResult" }>;
const rawToolName = (toolResult as { toolName?: unknown }).toolName;
const normalizedToolName = trimNonEmptyString(rawToolName);
if (normalizedToolName) {
if (rawToolName === normalizedToolName) {
return toolResult;
}
return { ...toolResult, toolName: normalizedToolName };
}
const normalizedFallback = trimNonEmptyString(fallbackName);
if (normalizedFallback) {
return { ...toolResult, toolName: normalizedFallback };
}
if (typeof rawToolName === "string") {
return { ...toolResult, toolName: "unknown" };
}
return toolResult;
}
export function installSessionToolResultGuard(
sessionManager: SessionManager,
opts?: {
@@ -150,9 +186,10 @@ export function installSessionToolResultGuard(
if (id) {
pending.delete(id);
}
const normalizedToolResult = normalizePersistedToolResultName(nextMessage, toolName);
// Apply hard size cap before persistence to prevent oversized tool results
// from consuming the entire context window on subsequent LLM calls.
const capped = capToolResultSize(persistMessage(nextMessage));
const capped = capToolResultSize(persistMessage(normalizedToolResult));
const persisted = applyBeforeWriteHook(
persistToolResult(capped, {
toolCallId: id ?? undefined,

View File

@@ -74,6 +74,29 @@ describe("sanitizeToolUseResultPairing", () => {
expect(out[3]?.role).toBe("user");
});
it("repairs blank tool result names from matching tool calls", () => {
const input = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
},
{
role: "toolResult",
toolCallId: "call_1",
toolName: " ",
content: [{ type: "text", text: "ok" }],
isError: false,
},
] as unknown as AgentMessage[];
const out = sanitizeToolUseResultPairing(input);
const toolResult = out.find((message) => message.role === "toolResult") as {
toolName?: string;
};
expect(toolResult?.toolName).toBe("read");
});
it("drops duplicate tool results for the same id within a span", () => {
const input = [
...buildDuplicateToolResultInput(),

View File

@@ -91,6 +91,38 @@ function makeMissingToolResult(params: {
} as Extract<AgentMessage, { role: "toolResult" }>;
}
function trimNonEmptyString(value: unknown): string | undefined {
if (typeof value !== "string") {
return undefined;
}
const trimmed = value.trim();
return trimmed || undefined;
}
function normalizeToolResultName(
message: Extract<AgentMessage, { role: "toolResult" }>,
fallbackName?: string,
): Extract<AgentMessage, { role: "toolResult" }> {
const rawToolName = (message as { toolName?: unknown }).toolName;
const normalizedToolName = trimNonEmptyString(rawToolName);
if (normalizedToolName) {
if (rawToolName === normalizedToolName) {
return message;
}
return { ...message, toolName: normalizedToolName };
}
const normalizedFallback = trimNonEmptyString(fallbackName);
if (normalizedFallback) {
return { ...message, toolName: normalizedFallback };
}
if (typeof rawToolName === "string") {
return { ...message, toolName: "unknown" };
}
return message;
}
export { makeMissingToolResult };
export type ToolCallInputRepairReport = {
@@ -291,6 +323,7 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
}
const toolCallIds = new Set(toolCalls.map((t) => t.id));
const toolCallNamesById = new Map(toolCalls.map((t) => [t.id, t.name] as const));
const spanResultsById = new Map<string, Extract<AgentMessage, { role: "toolResult" }>>();
const remainder: AgentMessage[] = [];
@@ -317,8 +350,15 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
changed = true;
continue;
}
const normalizedToolResult = normalizeToolResultName(
toolResult,
toolCallNamesById.get(id),
);
if (normalizedToolResult !== toolResult) {
changed = true;
}
if (!spanResultsById.has(id)) {
spanResultsById.set(id, toolResult);
spanResultsById.set(id, normalizedToolResult);
}
continue;
}