Files
openclaw/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
2026-03-02 07:13:10 +00:00

587 lines
18 KiB
TypeScript

import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { beforeEach, describe, expect, it, vi } from "vitest";
import * as helpers from "./pi-embedded-helpers.js";
import {
expectGoogleModelApiFullSanitizeCall,
loadSanitizeSessionHistoryWithCleanMocks,
makeMockSessionManager,
makeInMemorySessionManager,
makeModelSnapshotEntry,
makeReasoningAssistantMessages,
makeSimpleUserMessages,
sanitizeSnapshotChangedOpenAIReasoning,
type SanitizeSessionHistoryFn,
sanitizeWithOpenAIResponses,
TEST_SESSION_ID,
} from "./pi-embedded-runner.sanitize-session-history.test-harness.js";
import { makeZeroUsageSnapshot } from "./usage.js";
vi.mock("./pi-embedded-helpers.js", async () => ({
...(await vi.importActual("./pi-embedded-helpers.js")),
isGoogleModelApi: vi.fn(),
sanitizeSessionMessagesImages: vi.fn(async (msgs) => msgs),
}));
let sanitizeSessionHistory: SanitizeSessionHistoryFn;
// We don't mock session-transcript-repair.js as it is a pure function and complicates mocking.
// We rely on the real implementation which should pass through our simple messages.
describe("sanitizeSessionHistory", () => {
const mockSessionManager = makeMockSessionManager();
const mockMessages = makeSimpleUserMessages();
const setNonGoogleModelApi = () => {
vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false);
};
const sanitizeGithubCopilotHistory = async (params: {
messages: AgentMessage[];
modelApi?: string;
modelId?: string;
}) =>
sanitizeSessionHistory({
messages: params.messages,
modelApi: params.modelApi ?? "openai-completions",
provider: "github-copilot",
modelId: params.modelId ?? "claude-opus-4.6",
sessionManager: makeMockSessionManager(),
sessionId: TEST_SESSION_ID,
});
const getAssistantMessage = (messages: AgentMessage[]) => {
expect(messages[1]?.role).toBe("assistant");
return messages[1] as Extract<AgentMessage, { role: "assistant" }>;
};
const getAssistantContentTypes = (messages: AgentMessage[]) =>
getAssistantMessage(messages).content.map((block: { type: string }) => block.type);
const makeThinkingAndTextAssistantMessages = (
thinkingSignature: string = "some_sig",
): AgentMessage[] =>
[
{ role: "user", content: "hello" },
{
role: "assistant",
content: [
{
type: "thinking",
thinking: "internal",
thinkingSignature,
},
{ type: "text", text: "hi" },
],
},
] as unknown as AgentMessage[];
const makeUsage = (input: number, output: number, totalTokens: number) => ({
input,
output,
cacheRead: 0,
cacheWrite: 0,
totalTokens,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
});
const makeAssistantUsageMessage = (params: {
text: string;
usage: ReturnType<typeof makeUsage>;
timestamp?: number;
}) =>
({
role: "assistant",
content: [{ type: "text", text: params.text }],
stopReason: "stop",
...(typeof params.timestamp === "number" ? { timestamp: params.timestamp } : {}),
usage: params.usage,
}) as unknown as AgentMessage;
const makeCompactionSummaryMessage = (tokensBefore: number, timestamp: string) =>
({
role: "compactionSummary",
summary: "compressed",
tokensBefore,
timestamp,
}) as unknown as AgentMessage;
const sanitizeOpenAIHistory = async (
messages: AgentMessage[],
overrides: Partial<Parameters<SanitizeSessionHistoryFn>[0]> = {},
) =>
sanitizeSessionHistory({
messages,
modelApi: "openai-responses",
provider: "openai",
sessionManager: mockSessionManager,
sessionId: TEST_SESSION_ID,
...overrides,
});
const getAssistantMessages = (messages: AgentMessage[]) =>
messages.filter((message) => message.role === "assistant") as Array<
AgentMessage & { usage?: unknown; content?: unknown }
>;
beforeEach(async () => {
sanitizeSessionHistory = await loadSanitizeSessionHistoryWithCleanMocks();
});
it("sanitizes tool call ids for Google model APIs", async () => {
await expectGoogleModelApiFullSanitizeCall({
sanitizeSessionHistory,
messages: mockMessages,
sessionManager: mockSessionManager,
});
});
it("sanitizes tool call ids with strict9 for Mistral models", async () => {
setNonGoogleModelApi();
await sanitizeSessionHistory({
messages: mockMessages,
modelApi: "openai-responses",
provider: "openrouter",
modelId: "mistralai/devstral-2512:free",
sessionManager: mockSessionManager,
sessionId: TEST_SESSION_ID,
});
expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith(
mockMessages,
"session:history",
expect.objectContaining({
sanitizeMode: "full",
sanitizeToolCallIds: true,
toolCallIdMode: "strict9",
}),
);
});
it("sanitizes tool call ids for Anthropic APIs", async () => {
setNonGoogleModelApi();
await sanitizeSessionHistory({
messages: mockMessages,
modelApi: "anthropic-messages",
provider: "anthropic",
sessionManager: mockSessionManager,
sessionId: TEST_SESSION_ID,
});
expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith(
mockMessages,
"session:history",
expect.objectContaining({ sanitizeMode: "full", sanitizeToolCallIds: true }),
);
});
it("does not sanitize tool call ids for openai-responses", async () => {
setNonGoogleModelApi();
await sanitizeWithOpenAIResponses({
sanitizeSessionHistory,
messages: mockMessages,
sessionManager: mockSessionManager,
});
expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith(
mockMessages,
"session:history",
expect.objectContaining({ sanitizeMode: "images-only", sanitizeToolCallIds: false }),
);
});
it("annotates inter-session user messages before context sanitization", async () => {
setNonGoogleModelApi();
const messages: AgentMessage[] = [
{
role: "user",
content: "forwarded instruction",
provenance: {
kind: "inter_session",
sourceSessionKey: "agent:main:req",
sourceTool: "sessions_send",
},
} as unknown as AgentMessage,
];
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-responses",
provider: "openai",
sessionManager: mockSessionManager,
sessionId: TEST_SESSION_ID,
});
const first = result[0] as Extract<AgentMessage, { role: "user" }>;
expect(first.role).toBe("user");
expect(typeof first.content).toBe("string");
expect(first.content as string).toContain("[Inter-session message]");
expect(first.content as string).toContain("sourceSession=agent:main:req");
});
it("drops stale assistant usage snapshots kept before latest compaction summary", async () => {
vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false);
const messages = [
{ role: "user", content: "old context" },
makeAssistantUsageMessage({
text: "old answer",
usage: makeUsage(191_919, 2_000, 193_919),
}),
makeCompactionSummaryMessage(191_919, new Date().toISOString()),
] as unknown as AgentMessage[];
const result = await sanitizeOpenAIHistory(messages);
const staleAssistant = result.find((message) => message.role === "assistant") as
| (AgentMessage & { usage?: unknown })
| undefined;
expect(staleAssistant).toBeDefined();
expect(staleAssistant?.usage).toEqual(makeZeroUsageSnapshot());
});
it("preserves fresh assistant usage snapshots created after latest compaction summary", async () => {
vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false);
const messages = [
makeAssistantUsageMessage({
text: "pre-compaction answer",
usage: makeUsage(120_000, 3_000, 123_000),
}),
makeCompactionSummaryMessage(123_000, new Date().toISOString()),
{ role: "user", content: "new question" },
makeAssistantUsageMessage({
text: "fresh answer",
usage: makeUsage(1_000, 250, 1_250),
}),
] as unknown as AgentMessage[];
const result = await sanitizeOpenAIHistory(messages);
const assistants = getAssistantMessages(result);
expect(assistants).toHaveLength(2);
expect(assistants[0]?.usage).toEqual(makeZeroUsageSnapshot());
expect(assistants[1]?.usage).toBeDefined();
});
it("drops stale usage when compaction summary appears before kept assistant messages", async () => {
vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false);
const compactionTs = Date.parse("2026-02-26T12:00:00.000Z");
const messages = [
makeCompactionSummaryMessage(191_919, new Date(compactionTs).toISOString()),
makeAssistantUsageMessage({
text: "kept pre-compaction answer",
timestamp: compactionTs - 1_000,
usage: makeUsage(191_919, 2_000, 193_919),
}),
] as unknown as AgentMessage[];
const result = await sanitizeOpenAIHistory(messages);
const assistant = result.find((message) => message.role === "assistant") as
| (AgentMessage & { usage?: unknown })
| undefined;
expect(assistant?.usage).toEqual(makeZeroUsageSnapshot());
});
it("keeps fresh usage after compaction timestamp in summary-first ordering", async () => {
vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false);
const compactionTs = Date.parse("2026-02-26T12:00:00.000Z");
const messages = [
makeCompactionSummaryMessage(123_000, new Date(compactionTs).toISOString()),
makeAssistantUsageMessage({
text: "kept pre-compaction answer",
timestamp: compactionTs - 2_000,
usage: makeUsage(120_000, 3_000, 123_000),
}),
{ role: "user", content: "new question", timestamp: compactionTs + 1_000 },
makeAssistantUsageMessage({
text: "fresh answer",
timestamp: compactionTs + 2_000,
usage: makeUsage(1_000, 250, 1_250),
}),
] as unknown as AgentMessage[];
const result = await sanitizeOpenAIHistory(messages);
const assistants = getAssistantMessages(result);
const keptAssistant = assistants.find((message) =>
JSON.stringify(message.content).includes("kept pre-compaction answer"),
);
const freshAssistant = assistants.find((message) =>
JSON.stringify(message.content).includes("fresh answer"),
);
expect(keptAssistant?.usage).toEqual(makeZeroUsageSnapshot());
expect(freshAssistant?.usage).toBeDefined();
});
it("keeps reasoning-only assistant messages for openai-responses", async () => {
setNonGoogleModelApi();
const messages = [
{ role: "user", content: "hello" },
{
role: "assistant",
stopReason: "aborted",
content: [
{
type: "thinking",
thinking: "reasoning",
thinkingSignature: "sig",
},
],
},
] as unknown as AgentMessage[];
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-responses",
provider: "openai",
sessionManager: mockSessionManager,
sessionId: TEST_SESSION_ID,
});
expect(result).toHaveLength(2);
expect(result[1]?.role).toBe("assistant");
});
it("synthesizes missing tool results for openai-responses after repair", async () => {
const messages = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
},
] as unknown as AgentMessage[];
const result = await sanitizeOpenAIHistory(messages);
// repairToolUseResultPairing now runs for all providers (including OpenAI)
// to fix orphaned function_call_output items that OpenAI would reject.
expect(result).toHaveLength(2);
expect(result[0]?.role).toBe("assistant");
expect(result[1]?.role).toBe("toolResult");
});
it("drops malformed tool calls missing input or arguments", async () => {
const messages = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_1", name: "read" }],
},
{ role: "user", content: "hello" },
] as unknown as AgentMessage[];
const result = await sanitizeOpenAIHistory(messages, { sessionId: "test-session" });
expect(result.map((msg) => msg.role)).toEqual(["user"]);
});
it("drops malformed tool calls with invalid/overlong names", async () => {
const messages = [
{
role: "assistant",
content: [
{
type: "toolCall",
id: "call_bad",
name: 'toolu_01mvznfebfuu <|tool_call_argument_begin|> {"command"',
arguments: {},
},
{ type: "toolCall", id: "call_long", name: `read_${"x".repeat(80)}`, arguments: {} },
],
},
{ role: "user", content: "hello" },
] as unknown as AgentMessage[];
const result = await sanitizeOpenAIHistory(messages);
expect(result.map((msg) => msg.role)).toEqual(["user"]);
});
it("drops tool calls that are not in the allowed tool set", async () => {
const messages = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_1", name: "write", arguments: {} }],
},
] as unknown as AgentMessage[];
const result = await sanitizeOpenAIHistory(messages, {
allowedToolNames: ["read"],
});
expect(result).toEqual([]);
});
it("downgrades orphaned openai reasoning even when the model has not changed", async () => {
const sessionEntries = [
makeModelSnapshotEntry({
provider: "openai",
modelApi: "openai-responses",
modelId: "gpt-5.2-codex",
}),
];
const sessionManager = makeInMemorySessionManager(sessionEntries);
const messages = makeReasoningAssistantMessages({ thinkingSignature: "json" });
const result = await sanitizeWithOpenAIResponses({
sanitizeSessionHistory,
messages,
modelId: "gpt-5.2-codex",
sessionManager,
});
expect(result).toEqual([]);
});
it("downgrades orphaned openai reasoning when the model changes too", async () => {
const result = await sanitizeSnapshotChangedOpenAIReasoning({
sanitizeSessionHistory,
});
expect(result).toEqual([]);
});
it("drops orphaned toolResult entries when switching from openai history to anthropic", async () => {
const sessionEntries = [
makeModelSnapshotEntry({
provider: "openai",
modelApi: "openai-responses",
modelId: "gpt-5.2",
}),
];
const sessionManager = makeInMemorySessionManager(sessionEntries);
const messages = [
{
role: "assistant",
content: [{ type: "toolCall", id: "tool_abc123", name: "read", arguments: {} }],
},
{
role: "toolResult",
toolCallId: "tool_abc123",
toolName: "read",
content: [{ type: "text", text: "ok" }],
} as unknown as AgentMessage,
{ role: "user", content: "continue" },
{
role: "toolResult",
toolCallId: "tool_01VihkDRptyLpX1ApUPe7ooU",
toolName: "read",
content: [{ type: "text", text: "stale result" }],
} as unknown as AgentMessage,
] as unknown as AgentMessage[];
const result = await sanitizeSessionHistory({
messages,
modelApi: "anthropic-messages",
provider: "anthropic",
modelId: "claude-opus-4-6",
sessionManager,
sessionId: TEST_SESSION_ID,
});
expect(result.map((msg) => msg.role)).toEqual(["assistant", "toolResult", "user"]);
expect(
result.some(
(msg) =>
msg.role === "toolResult" &&
(msg as { toolCallId?: string }).toolCallId === "tool_01VihkDRptyLpX1ApUPe7ooU",
),
).toBe(false);
});
it("drops assistant thinking blocks for github-copilot models", async () => {
setNonGoogleModelApi();
const messages = makeThinkingAndTextAssistantMessages("reasoning_text");
const result = await sanitizeGithubCopilotHistory({ messages });
const assistant = getAssistantMessage(result);
expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
});
it("preserves assistant turn when all content is thinking blocks (github-copilot)", async () => {
setNonGoogleModelApi();
const messages = [
{ role: "user", content: "hello" },
{
role: "assistant",
content: [
{
type: "thinking",
thinking: "some reasoning",
thinkingSignature: "reasoning_text",
},
],
},
{ role: "user", content: "follow up" },
] as unknown as AgentMessage[];
const result = await sanitizeGithubCopilotHistory({ messages });
// Assistant turn should be preserved (not dropped) to maintain turn alternation
expect(result).toHaveLength(3);
const assistant = getAssistantMessage(result);
expect(assistant.content).toEqual([{ type: "text", text: "" }]);
});
it("preserves tool_use blocks when dropping thinking blocks (github-copilot)", async () => {
setNonGoogleModelApi();
const messages = [
{ role: "user", content: "read a file" },
{
role: "assistant",
content: [
{
type: "thinking",
thinking: "I should use the read tool",
thinkingSignature: "reasoning_text",
},
{ type: "toolCall", id: "tool_123", name: "read", arguments: { path: "/tmp/test" } },
{ type: "text", text: "Let me read that file." },
],
},
] as unknown as AgentMessage[];
const result = await sanitizeGithubCopilotHistory({ messages });
const types = getAssistantContentTypes(result);
expect(types).toContain("toolCall");
expect(types).toContain("text");
expect(types).not.toContain("thinking");
});
it("does not drop thinking blocks for non-copilot providers", async () => {
setNonGoogleModelApi();
const messages = makeThinkingAndTextAssistantMessages();
const result = await sanitizeSessionHistory({
messages,
modelApi: "anthropic-messages",
provider: "anthropic",
modelId: "claude-opus-4-6",
sessionManager: makeMockSessionManager(),
sessionId: TEST_SESSION_ID,
});
const types = getAssistantContentTypes(result);
expect(types).toContain("thinking");
});
it("does not drop thinking blocks for non-claude copilot models", async () => {
setNonGoogleModelApi();
const messages = makeThinkingAndTextAssistantMessages();
const result = await sanitizeGithubCopilotHistory({ messages, modelId: "gpt-5.2" });
const types = getAssistantContentTypes(result);
expect(types).toContain("thinking");
});
});