diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f7d00fbe..6cac218f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -112,6 +112,7 @@ Docs: https://docs.openclaw.ai - Providers/Copilot: add `claude-sonnet-4.6` and `claude-sonnet-4.5` to the default GitHub Copilot model catalog and add coverage for model-list/definition helpers. (#20270, fixes #20091) Thanks @Clawborn. - Auto-reply/WebChat: avoid defaulting inbound runtime channel labels to unrelated providers (for example `whatsapp`) for webchat sessions so channel-specific formatting guidance stays accurate. (#21534) Thanks @lbo728. - Status: include persisted `cacheRead`/`cacheWrite` in session summaries so compact `/status` output consistently shows cache hit percentages from real session data. +- Sessions/Usage: persist `totalTokens` from `promptTokens` snapshots even when providers omit structured usage payloads, so session history/status no longer regress to `unknown` token utilization for otherwise successful runs. (#21819) Thanks @zymclaw. - Heartbeat/Cron: restore interval heartbeat behavior so missing `HEARTBEAT.md` no longer suppresses runs (only effectively empty files skip), preserving prompt-driven and tagged-cron execution paths. - WhatsApp/Cron/Heartbeat: enforce allowlisted routing for implicit scheduled/system delivery by merging pairing-store + configured `allowFrom` recipients, selecting authorized recipients when last-route context points to a non-allowlisted chat, and preventing heartbeat fan-out to recent unauthorized chats. - Heartbeat/Active hours: constrain active-hours `24` sentinel parsing to `24:00` in time validation so invalid values like `24:30` are rejected early. (#21410) thanks @adhitShet. diff --git a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts index a1ad2d0a9..3d19d8d29 100644 --- a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts +++ b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts @@ -960,6 +960,43 @@ describe("runReplyAgent messaging tool suppression", () => { expect(store[sessionKey]?.totalTokensFresh).toBe(true); expect(store[sessionKey]?.model).toBe("claude-opus-4-5"); }); + + it("persists totalTokens from promptTokens when provider omits usage", async () => { + const storePath = path.join( + await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-session-store-")), + "sessions.json", + ); + const sessionKey = "main"; + const entry: SessionEntry = { + sessionId: "session", + updatedAt: Date.now(), + inputTokens: 111, + outputTokens: 22, + }; + await saveSessionStore(storePath, { [sessionKey]: entry }); + + runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "hello world!" }], + messagingToolSentTexts: ["different message"], + messagingToolSentTargets: [{ tool: "slack", provider: "slack", to: "channel:C1" }], + meta: { + agentMeta: { + promptTokens: 41_000, + model: "claude-opus-4-5", + provider: "anthropic", + }, + }, + }); + + const result = await createRun("slack", { storePath, sessionKey }); + + expect(result).toBeUndefined(); + const store = loadSessionStore(storePath, { skipCache: true }); + expect(store[sessionKey]?.totalTokens).toBe(41_000); + expect(store[sessionKey]?.totalTokensFresh).toBe(true); + expect(store[sessionKey]?.inputTokens).toBe(111); + expect(store[sessionKey]?.outputTokens).toBe(22); + }); }); describe("runReplyAgent reminder commitment guard", () => { diff --git a/src/auto-reply/reply/session-usage.ts b/src/auto-reply/reply/session-usage.ts index d1945a5ec..2d7b6e7f9 100644 --- a/src/auto-reply/reply/session-usage.ts +++ b/src/auto-reply/reply/session-usage.ts @@ -57,25 +57,25 @@ export async function persistSessionUsageUpdate(params: { } const label = params.logLabel ? `${params.logLabel} ` : ""; - if (hasNonzeroUsage(params.usage)) { + const hasUsage = hasNonzeroUsage(params.usage); + const hasPromptTokens = + typeof params.promptTokens === "number" && + Number.isFinite(params.promptTokens) && + params.promptTokens > 0; + const hasFreshContextSnapshot = Boolean(params.lastCallUsage) || hasPromptTokens; + + if (hasUsage || hasFreshContextSnapshot) { try { await updateSessionStoreEntry({ storePath, sessionKey, update: async (entry) => { - const input = params.usage?.input ?? 0; - const output = params.usage?.output ?? 0; const resolvedContextTokens = params.contextTokensUsed ?? entry.contextTokens; - const hasPromptTokens = - typeof params.promptTokens === "number" && - Number.isFinite(params.promptTokens) && - params.promptTokens > 0; - const hasFreshContextSnapshot = Boolean(params.lastCallUsage) || hasPromptTokens; // Use last-call usage for totalTokens when available. The accumulated // `usage.input` sums input tokens from every API call in the run // (tool-use loops, compaction retries), overstating actual context. // `lastCallUsage` reflects only the final API call — the true context. - const usageForContext = params.lastCallUsage ?? params.usage; + const usageForContext = params.lastCallUsage ?? (hasUsage ? params.usage : undefined); const totalTokens = hasFreshContextSnapshot ? deriveSessionTotalTokens({ usage: usageForContext, @@ -84,19 +84,22 @@ export async function persistSessionUsageUpdate(params: { }) : undefined; const patch: Partial = { - inputTokens: input, - outputTokens: output, - cacheRead: params.usage?.cacheRead ?? 0, - cacheWrite: params.usage?.cacheWrite ?? 0, - // Missing a last-call snapshot means context utilization is stale/unknown. - totalTokens, - totalTokensFresh: typeof totalTokens === "number", modelProvider: params.providerUsed ?? entry.modelProvider, model: params.modelUsed ?? entry.model, contextTokens: resolvedContextTokens, systemPromptReport: params.systemPromptReport ?? entry.systemPromptReport, updatedAt: Date.now(), }; + if (hasUsage) { + patch.inputTokens = params.usage?.input ?? 0; + patch.outputTokens = params.usage?.output ?? 0; + patch.cacheRead = params.usage?.cacheRead ?? 0; + patch.cacheWrite = params.usage?.cacheWrite ?? 0; + } + // Missing a last-call snapshot (and promptTokens fallback) means + // context utilization is stale/unknown. + patch.totalTokens = totalTokens; + patch.totalTokensFresh = typeof totalTokens === "number"; return applyCliSessionIdToSessionPatch(params, entry, patch); }, }); diff --git a/src/auto-reply/reply/session.test.ts b/src/auto-reply/reply/session.test.ts index 181934f98..5ac167fd6 100644 --- a/src/auto-reply/reply/session.test.ts +++ b/src/auto-reply/reply/session.test.ts @@ -1138,6 +1138,35 @@ describe("persistSessionUsageUpdate", () => { expect(stored[sessionKey].totalTokensFresh).toBe(true); }); + it("persists totalTokens from promptTokens when usage is unavailable", async () => { + const storePath = await createStorePath("openclaw-usage-"); + const sessionKey = "main"; + await seedSessionStore({ + storePath, + sessionKey, + entry: { + sessionId: "s1", + updatedAt: Date.now(), + inputTokens: 1_234, + outputTokens: 456, + }, + }); + + await persistSessionUsageUpdate({ + storePath, + sessionKey, + usage: undefined, + promptTokens: 39_000, + contextTokensUsed: 200_000, + }); + + const stored = JSON.parse(await fs.readFile(storePath, "utf-8")); + expect(stored[sessionKey].totalTokens).toBe(39_000); + expect(stored[sessionKey].totalTokensFresh).toBe(true); + expect(stored[sessionKey].inputTokens).toBe(1_234); + expect(stored[sessionKey].outputTokens).toBe(456); + }); + it("keeps non-clamped lastCallUsage totalTokens when exceeding context window", async () => { const storePath = await createStorePath("openclaw-usage-"); const sessionKey = "main";