fix(agents): harden tool-name normalization and transcript repair
Landed from contributor PRs #30620 and #30735 by @Sid-Qin, plus #30881 by @liuxiaopai-ai. Co-authored-by: SidQin-cyber <sidqin0410@gmail.com> Co-authored-by: liuxiaopai-ai <73659136+liuxiaopai-ai@users.noreply.github.com>
This commit is contained in:
@@ -139,6 +139,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Dashboard/Sessions: allow authenticated Control UI clients to delete and patch sessions while still blocking regular webchat clients from session mutation RPCs, fixing Dashboard session delete failures. (#21264) Thanks @jskoiz.
|
||||
- TUI/Session model status: clear stale runtime model identity when model overrides change so `/model` updates are reflected immediately in `sessions.patch` responses and `sessions.list` status surfaces. (#28619) Thanks @lejean2000.
|
||||
- Agents/Session status: read thinking/verbose/reasoning levels from persisted session state in `session_status` output when resolved levels are not provided, so status reflects runtime toggles correctly. (#30129) Thanks @YuzuruS.
|
||||
- Agents/Tool-name recovery chain: normalize streamed alias/case tool names against the allowed set, preserve whitespace-only streamed placeholders to avoid collapsing to empty names, and repair/guard persisted blank `toolResult.toolName` values from matching tool calls to reduce repeated `Tool not found` loops in long sessions. Landed from contributor PRs #30620 and #30735 by @Sid-Qin, plus #30881 by @liuxiaopai-ai. Thanks @Sid-Qin and @liuxiaopai-ai.
|
||||
- TUI/SIGTERM shutdown: ignore `setRawMode EBADF` teardown errors during `SIGTERM` exit so long-running TUI sessions do not crash on terminal shutdown races, while still rethrowing unrelated stop errors. (#29430) Thanks @Cormazabal.
|
||||
- Memory/Hybrid recall: when strict hybrid scoring yields no hits, preserve keyword-backed matches using a text-weight floor so freshly indexed lexical canaries no longer disappear behind `minScore` filtering. (#29112) Thanks @ceo-nada.
|
||||
- Android/Notifications auth race: return `NOT_AUTHORIZED` when `POST_NOTIFICATIONS` is revoked between authorization precheck and delivery, instead of returning success while dropping the notification. (#30726) Thanks @obviyus.
|
||||
|
||||
@@ -177,6 +177,50 @@ describe("wrapStreamFnTrimToolCallNames", () => {
|
||||
expect(result).toBe(finalMessage);
|
||||
expect(baseFn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
it("normalizes common tool aliases when the canonical name is allowed", async () => {
|
||||
const finalToolCall = { type: "toolCall", name: " BASH " };
|
||||
const finalMessage = { role: "assistant", content: [finalToolCall] };
|
||||
const baseFn = vi.fn(() =>
|
||||
createFakeStream({
|
||||
events: [],
|
||||
resultMessage: finalMessage,
|
||||
}),
|
||||
);
|
||||
|
||||
const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never, new Set(["exec"]));
|
||||
const stream = wrappedFn({} as never, {} as never, {} as never) as Awaited<
|
||||
ReturnType<typeof wrappedFn>
|
||||
>;
|
||||
const result = await stream.result();
|
||||
|
||||
expect(finalToolCall.name).toBe("exec");
|
||||
expect(result).toBe(finalMessage);
|
||||
});
|
||||
|
||||
it("does not collapse whitespace-only tool names to empty strings", async () => {
|
||||
const partialToolCall = { type: "toolCall", name: " " };
|
||||
const finalToolCall = { type: "toolCall", name: "\t " };
|
||||
const event = {
|
||||
type: "toolcall_delta",
|
||||
partial: { role: "assistant", content: [partialToolCall] },
|
||||
};
|
||||
const finalMessage = { role: "assistant", content: [finalToolCall] };
|
||||
const baseFn = vi.fn(() => createFakeStream({ events: [event], resultMessage: finalMessage }));
|
||||
|
||||
const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never);
|
||||
const stream = wrappedFn({} as never, {} as never, {} as never) as Awaited<
|
||||
ReturnType<typeof wrappedFn>
|
||||
>;
|
||||
|
||||
for await (const _item of stream) {
|
||||
// drain
|
||||
}
|
||||
await stream.result();
|
||||
|
||||
expect(partialToolCall.name).toBe(" ");
|
||||
expect(finalToolCall.name).toBe("\t ");
|
||||
expect(baseFn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isOllamaCompatProvider", () => {
|
||||
|
||||
@@ -75,6 +75,7 @@ import { buildSystemPromptParams } from "../../system-prompt-params.js";
|
||||
import { buildSystemPromptReport } from "../../system-prompt-report.js";
|
||||
import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js";
|
||||
import { resolveEffectiveToolFsWorkspaceOnly } from "../../tool-fs-policy.js";
|
||||
import { normalizeToolName } from "../../tool-policy.js";
|
||||
import { resolveTranscriptPolicy } from "../../transcript-policy.js";
|
||||
import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
|
||||
import { isRunnerAbortError } from "../abort.js";
|
||||
@@ -226,7 +227,41 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
|
||||
});
|
||||
}
|
||||
|
||||
function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void {
|
||||
function normalizeToolCallNameForDispatch(rawName: string, allowedToolNames?: Set<string>): string {
|
||||
const trimmed = rawName.trim();
|
||||
if (!trimmed) {
|
||||
// Keep whitespace-only placeholders unchanged so they do not collapse to
|
||||
// empty names (which can later surface as toolName="" loops).
|
||||
return rawName;
|
||||
}
|
||||
if (!allowedToolNames || allowedToolNames.size === 0) {
|
||||
return trimmed;
|
||||
}
|
||||
if (allowedToolNames.has(trimmed)) {
|
||||
return trimmed;
|
||||
}
|
||||
const normalized = normalizeToolName(trimmed);
|
||||
if (allowedToolNames.has(normalized)) {
|
||||
return normalized;
|
||||
}
|
||||
const folded = trimmed.toLowerCase();
|
||||
let caseInsensitiveMatch: string | null = null;
|
||||
for (const name of allowedToolNames) {
|
||||
if (name.toLowerCase() !== folded) {
|
||||
continue;
|
||||
}
|
||||
if (caseInsensitiveMatch && caseInsensitiveMatch !== name) {
|
||||
return trimmed;
|
||||
}
|
||||
caseInsensitiveMatch = name;
|
||||
}
|
||||
return caseInsensitiveMatch ?? trimmed;
|
||||
}
|
||||
|
||||
function trimWhitespaceFromToolCallNamesInMessage(
|
||||
message: unknown,
|
||||
allowedToolNames?: Set<string>,
|
||||
): void {
|
||||
if (!message || typeof message !== "object") {
|
||||
return;
|
||||
}
|
||||
@@ -242,20 +277,21 @@ function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void {
|
||||
if (typedBlock.type !== "toolCall" || typeof typedBlock.name !== "string") {
|
||||
continue;
|
||||
}
|
||||
const trimmed = typedBlock.name.trim();
|
||||
if (trimmed !== typedBlock.name) {
|
||||
typedBlock.name = trimmed;
|
||||
const normalized = normalizeToolCallNameForDispatch(typedBlock.name, allowedToolNames);
|
||||
if (normalized !== typedBlock.name) {
|
||||
typedBlock.name = normalized;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function wrapStreamTrimToolCallNames(
|
||||
stream: ReturnType<typeof streamSimple>,
|
||||
allowedToolNames?: Set<string>,
|
||||
): ReturnType<typeof streamSimple> {
|
||||
const originalResult = stream.result.bind(stream);
|
||||
stream.result = async () => {
|
||||
const message = await originalResult();
|
||||
trimWhitespaceFromToolCallNamesInMessage(message);
|
||||
trimWhitespaceFromToolCallNamesInMessage(message, allowedToolNames);
|
||||
return message;
|
||||
};
|
||||
|
||||
@@ -271,8 +307,8 @@ function wrapStreamTrimToolCallNames(
|
||||
partial?: unknown;
|
||||
message?: unknown;
|
||||
};
|
||||
trimWhitespaceFromToolCallNamesInMessage(event.partial);
|
||||
trimWhitespaceFromToolCallNamesInMessage(event.message);
|
||||
trimWhitespaceFromToolCallNamesInMessage(event.partial, allowedToolNames);
|
||||
trimWhitespaceFromToolCallNamesInMessage(event.message, allowedToolNames);
|
||||
}
|
||||
return result;
|
||||
},
|
||||
@@ -288,13 +324,18 @@ function wrapStreamTrimToolCallNames(
|
||||
return stream;
|
||||
}
|
||||
|
||||
export function wrapStreamFnTrimToolCallNames(baseFn: StreamFn): StreamFn {
|
||||
export function wrapStreamFnTrimToolCallNames(
|
||||
baseFn: StreamFn,
|
||||
allowedToolNames?: Set<string>,
|
||||
): StreamFn {
|
||||
return (model, context, options) => {
|
||||
const maybeStream = baseFn(model, context, options);
|
||||
if (maybeStream && typeof maybeStream === "object" && "then" in maybeStream) {
|
||||
return Promise.resolve(maybeStream).then((stream) => wrapStreamTrimToolCallNames(stream));
|
||||
return Promise.resolve(maybeStream).then((stream) =>
|
||||
wrapStreamTrimToolCallNames(stream, allowedToolNames),
|
||||
);
|
||||
}
|
||||
return wrapStreamTrimToolCallNames(maybeStream);
|
||||
return wrapStreamTrimToolCallNames(maybeStream, allowedToolNames);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -974,7 +1015,10 @@ export async function runEmbeddedAttempt(
|
||||
// Some models emit tool names with surrounding whitespace (e.g. " read ").
|
||||
// pi-agent-core dispatches tool calls with exact string matching, so normalize
|
||||
// names on the live response stream before tool execution.
|
||||
activeSession.agent.streamFn = wrapStreamFnTrimToolCallNames(activeSession.agent.streamFn);
|
||||
activeSession.agent.streamFn = wrapStreamFnTrimToolCallNames(
|
||||
activeSession.agent.streamFn,
|
||||
allowedToolNames,
|
||||
);
|
||||
|
||||
if (anthropicPayloadLogger) {
|
||||
activeSession.agent.streamFn = anthropicPayloadLogger.wrapStreamFn(
|
||||
|
||||
@@ -102,6 +102,28 @@ describe("installSessionToolResultGuard", () => {
|
||||
expectPersistedRoles(sm, ["assistant", "toolResult"]);
|
||||
});
|
||||
|
||||
it("backfills blank toolResult names from pending tool calls", () => {
|
||||
const sm = SessionManager.inMemory();
|
||||
installSessionToolResultGuard(sm);
|
||||
|
||||
sm.appendMessage(toolCallMessage);
|
||||
sm.appendMessage(
|
||||
asAppendMessage({
|
||||
role: "toolResult",
|
||||
toolCallId: "call_1",
|
||||
toolName: " ",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
isError: false,
|
||||
}),
|
||||
);
|
||||
|
||||
const messages = expectPersistedRoles(sm, ["assistant", "toolResult"]) as Array<{
|
||||
role: string;
|
||||
toolName?: string;
|
||||
}>;
|
||||
expect(messages[1]?.toolName).toBe("read");
|
||||
});
|
||||
|
||||
it("preserves ordering with multiple tool calls and partial results", () => {
|
||||
const sm = SessionManager.inMemory();
|
||||
const guard = installSessionToolResultGuard(sm);
|
||||
|
||||
@@ -31,6 +31,42 @@ function capToolResultSize(msg: AgentMessage): AgentMessage {
|
||||
});
|
||||
}
|
||||
|
||||
function trimNonEmptyString(value: unknown): string | undefined {
|
||||
if (typeof value !== "string") {
|
||||
return undefined;
|
||||
}
|
||||
const trimmed = value.trim();
|
||||
return trimmed || undefined;
|
||||
}
|
||||
|
||||
function normalizePersistedToolResultName(
|
||||
message: AgentMessage,
|
||||
fallbackName?: string,
|
||||
): AgentMessage {
|
||||
if ((message as { role?: unknown }).role !== "toolResult") {
|
||||
return message;
|
||||
}
|
||||
const toolResult = message as Extract<AgentMessage, { role: "toolResult" }>;
|
||||
const rawToolName = (toolResult as { toolName?: unknown }).toolName;
|
||||
const normalizedToolName = trimNonEmptyString(rawToolName);
|
||||
if (normalizedToolName) {
|
||||
if (rawToolName === normalizedToolName) {
|
||||
return toolResult;
|
||||
}
|
||||
return { ...toolResult, toolName: normalizedToolName };
|
||||
}
|
||||
|
||||
const normalizedFallback = trimNonEmptyString(fallbackName);
|
||||
if (normalizedFallback) {
|
||||
return { ...toolResult, toolName: normalizedFallback };
|
||||
}
|
||||
|
||||
if (typeof rawToolName === "string") {
|
||||
return { ...toolResult, toolName: "unknown" };
|
||||
}
|
||||
return toolResult;
|
||||
}
|
||||
|
||||
export function installSessionToolResultGuard(
|
||||
sessionManager: SessionManager,
|
||||
opts?: {
|
||||
@@ -150,9 +186,10 @@ export function installSessionToolResultGuard(
|
||||
if (id) {
|
||||
pending.delete(id);
|
||||
}
|
||||
const normalizedToolResult = normalizePersistedToolResultName(nextMessage, toolName);
|
||||
// Apply hard size cap before persistence to prevent oversized tool results
|
||||
// from consuming the entire context window on subsequent LLM calls.
|
||||
const capped = capToolResultSize(persistMessage(nextMessage));
|
||||
const capped = capToolResultSize(persistMessage(normalizedToolResult));
|
||||
const persisted = applyBeforeWriteHook(
|
||||
persistToolResult(capped, {
|
||||
toolCallId: id ?? undefined,
|
||||
|
||||
@@ -74,6 +74,29 @@ describe("sanitizeToolUseResultPairing", () => {
|
||||
expect(out[3]?.role).toBe("user");
|
||||
});
|
||||
|
||||
it("repairs blank tool result names from matching tool calls", () => {
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_1",
|
||||
toolName: " ",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
isError: false,
|
||||
},
|
||||
] as unknown as AgentMessage[];
|
||||
|
||||
const out = sanitizeToolUseResultPairing(input);
|
||||
const toolResult = out.find((message) => message.role === "toolResult") as {
|
||||
toolName?: string;
|
||||
};
|
||||
|
||||
expect(toolResult?.toolName).toBe("read");
|
||||
});
|
||||
|
||||
it("drops duplicate tool results for the same id within a span", () => {
|
||||
const input = [
|
||||
...buildDuplicateToolResultInput(),
|
||||
|
||||
@@ -91,6 +91,38 @@ function makeMissingToolResult(params: {
|
||||
} as Extract<AgentMessage, { role: "toolResult" }>;
|
||||
}
|
||||
|
||||
function trimNonEmptyString(value: unknown): string | undefined {
|
||||
if (typeof value !== "string") {
|
||||
return undefined;
|
||||
}
|
||||
const trimmed = value.trim();
|
||||
return trimmed || undefined;
|
||||
}
|
||||
|
||||
function normalizeToolResultName(
|
||||
message: Extract<AgentMessage, { role: "toolResult" }>,
|
||||
fallbackName?: string,
|
||||
): Extract<AgentMessage, { role: "toolResult" }> {
|
||||
const rawToolName = (message as { toolName?: unknown }).toolName;
|
||||
const normalizedToolName = trimNonEmptyString(rawToolName);
|
||||
if (normalizedToolName) {
|
||||
if (rawToolName === normalizedToolName) {
|
||||
return message;
|
||||
}
|
||||
return { ...message, toolName: normalizedToolName };
|
||||
}
|
||||
|
||||
const normalizedFallback = trimNonEmptyString(fallbackName);
|
||||
if (normalizedFallback) {
|
||||
return { ...message, toolName: normalizedFallback };
|
||||
}
|
||||
|
||||
if (typeof rawToolName === "string") {
|
||||
return { ...message, toolName: "unknown" };
|
||||
}
|
||||
return message;
|
||||
}
|
||||
|
||||
export { makeMissingToolResult };
|
||||
|
||||
export type ToolCallInputRepairReport = {
|
||||
@@ -291,6 +323,7 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
||||
}
|
||||
|
||||
const toolCallIds = new Set(toolCalls.map((t) => t.id));
|
||||
const toolCallNamesById = new Map(toolCalls.map((t) => [t.id, t.name] as const));
|
||||
|
||||
const spanResultsById = new Map<string, Extract<AgentMessage, { role: "toolResult" }>>();
|
||||
const remainder: AgentMessage[] = [];
|
||||
@@ -317,8 +350,15 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
||||
changed = true;
|
||||
continue;
|
||||
}
|
||||
const normalizedToolResult = normalizeToolResultName(
|
||||
toolResult,
|
||||
toolCallNamesById.get(id),
|
||||
);
|
||||
if (normalizedToolResult !== toolResult) {
|
||||
changed = true;
|
||||
}
|
||||
if (!spanResultsById.has(id)) {
|
||||
spanResultsById.set(id, toolResult);
|
||||
spanResultsById.set(id, normalizedToolResult);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user