Files
openclaw/src/agents/pi-embedded-runner/usage-reporting.test.ts
artale a62ff19a66 fix(agent): isolate last-turn total in token usage reporting (#17016)
recordAssistantUsage accumulated cacheRead across the entire multi-turn
run, and totalTokens was clamped to contextTokens. This caused
session_status to report 100% context usage regardless of actual load.

Changes:
- run.ts: capture lastTurnTotal from the most recent model call and
  inject it into the normalized usage before it reaches agentMeta.
- usage-reporting.test.ts: verify usage.total reflects current turn,
  not accumulated total.

Fixes #17016
2026-02-17 00:00:12 +01:00

118 lines
3.9 KiB
TypeScript

import "./run.overflow-compaction.mocks.shared.js";
import { beforeEach, describe, expect, it, vi } from "vitest";
vi.mock("../auth-profiles.js", () => ({
isProfileInCooldown: vi.fn(() => false),
markAuthProfileFailure: vi.fn(async () => {}),
markAuthProfileGood: vi.fn(async () => {}),
markAuthProfileUsed: vi.fn(async () => {}),
}));
vi.mock("../usage.js", () => ({
normalizeUsage: vi.fn((usage?: unknown) =>
usage && typeof usage === "object" ? usage : undefined,
),
derivePromptTokens: vi.fn(
(usage?: { input?: number; cacheRead?: number; cacheWrite?: number }) => {
if (!usage) {
return undefined;
}
const input = usage.input ?? 0;
const cacheRead = usage.cacheRead ?? 0;
const cacheWrite = usage.cacheWrite ?? 0;
const sum = input + cacheRead + cacheWrite;
return sum > 0 ? sum : undefined;
},
),
}));
vi.mock("../workspace-run.js", () => ({
resolveRunWorkspaceDir: vi.fn((params: { workspaceDir: string }) => ({
workspaceDir: params.workspaceDir,
usedFallback: false,
fallbackReason: undefined,
agentId: "main",
})),
redactRunIdentifier: vi.fn((value?: string) => value ?? ""),
}));
vi.mock("../pi-embedded-helpers.js", () => ({
formatBillingErrorMessage: vi.fn(() => ""),
classifyFailoverReason: vi.fn(() => null),
formatAssistantErrorText: vi.fn(() => ""),
isAuthAssistantError: vi.fn(() => false),
isBillingAssistantError: vi.fn(() => false),
isCompactionFailureError: vi.fn(() => false),
isLikelyContextOverflowError: vi.fn((msg?: string) => {
const lower = (msg ?? "").toLowerCase();
return lower.includes("request_too_large") || lower.includes("context window exceeded");
}),
isFailoverAssistantError: vi.fn(() => false),
isFailoverErrorMessage: vi.fn(() => false),
parseImageSizeError: vi.fn(() => null),
parseImageDimensionError: vi.fn(() => null),
isRateLimitAssistantError: vi.fn(() => false),
isTimeoutErrorMessage: vi.fn(() => false),
pickFallbackThinkingLevel: vi.fn(() => null),
}));
import { runEmbeddedPiAgent } from "./run.js";
import { runEmbeddedAttempt } from "./run/attempt.js";
const mockedRunEmbeddedAttempt = vi.mocked(runEmbeddedAttempt);
describe("runEmbeddedPiAgent usage reporting", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("reports total usage from the last turn instead of accumulated total", async () => {
// Simulate a multi-turn run result.
// Turn 1: Input 100, Output 50. Total 150.
// Turn 2: Input 150, Output 50. Total 200.
// The accumulated usage (attemptUsage) will be the sum:
// Input: 100 + 150 = 250 (Note: runEmbeddedAttempt actually returns accumulated usage)
// Output: 50 + 50 = 100
// Total: 150 + 200 = 350
// The last assistant usage (lastAssistant.usage) will be Turn 2:
// Input: 150, Output 50, Total 200.
// We expect result.meta.agentMeta.usage.total to be 200 (last turn total).
// The bug causes it to be 350 (accumulated total).
mockedRunEmbeddedAttempt.mockResolvedValueOnce({
aborted: false,
promptError: null,
timedOut: false,
sessionIdUsed: "test-session",
assistantTexts: ["Response 1", "Response 2"],
lastAssistant: {
usage: { input: 150, output: 50, total: 200 },
stopReason: "end_turn",
},
attemptUsage: { input: 250, output: 100, total: 350 },
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} as any);
const result = await runEmbeddedPiAgent({
sessionId: "test-session",
sessionKey: "test-key",
sessionFile: "/tmp/session.json",
workspaceDir: "/tmp/workspace",
prompt: "hello",
timeoutMs: 30000,
runId: "run-1",
});
// Check usage in meta
const usage = result.meta.agentMeta.usage;
expect(usage).toBeDefined();
// Check if total matches the last turn's total (200)
// If the bug exists, it will likely be 350
expect(usage?.total).toBe(200);
});
});