agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman
This commit is contained in:
Kai
2026-03-05 04:02:22 +08:00
committed by GitHub
parent 53b2479eed
commit 4242c5152f
2 changed files with 64 additions and 9 deletions

View File

@@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Agents/session usage tracking: preserve accumulated usage metadata on embedded Pi runner error exits so failed turns still update session `totalTokens` from real usage instead of stale prior values. (#34275) thanks @RealKai42.
- Nodes/system.run approval hardening: use explicit argv-mutation signaling when regenerating prepared `rawCommand`, and cover the `system.run.prepare -> system.run` handoff so direct PATH-based `nodes.run` commands no longer fail with `rawCommand does not match command`. (#33137) thanks @Sid-Qin.
- Models/custom provider headers: propagate `models.providers.<name>.headers` across inline, fallback, and registry-found model resolution so header-authenticated proxies consistently receive configured request headers. (#27490) thanks @Sid-Qin.
- Ollama/custom provider headers: forward resolved model headers into native Ollama stream requests so header-authenticated Ollama proxies receive configured request headers. (#24337) thanks @echoVic.

View File

@@ -200,6 +200,43 @@ function resolveActiveErrorContext(params: {
};
}
/**
* Build agentMeta for error return paths, preserving accumulated usage so that
* session totalTokens reflects the actual context size rather than going stale.
* Without this, error returns omit usage and the session keeps whatever
* totalTokens was set by the previous successful run.
*/
function buildErrorAgentMeta(params: {
sessionId: string;
provider: string;
model: string;
usageAccumulator: UsageAccumulator;
lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
lastAssistant?: { usage?: unknown } | null;
/** API-reported total from the most recent call, mirroring the success path correction. */
lastTurnTotal?: number;
}): EmbeddedPiAgentMeta {
const usage = toNormalizedUsage(params.usageAccumulator);
// Apply the same lastTurnTotal correction the success path uses so
// usage.total reflects the API-reported context size, not accumulated totals.
if (usage && params.lastTurnTotal && params.lastTurnTotal > 0) {
usage.total = params.lastTurnTotal;
}
const lastCallUsage = params.lastAssistant
? normalizeUsage(params.lastAssistant.usage as UsageLike)
: undefined;
const promptTokens = derivePromptTokens(params.lastRunPromptUsage);
return {
sessionId: params.sessionId,
provider: params.provider,
model: params.model,
// Only include usage fields when we have actual data from prior API calls.
...(usage ? { usage } : {}),
...(lastCallUsage ? { lastCallUsage } : {}),
...(promptTokens ? { promptTokens } : {}),
};
}
export async function runEmbeddedPiAgent(
params: RunEmbeddedPiAgentParams,
): Promise<EmbeddedPiRunResult> {
@@ -678,6 +715,8 @@ export async function runEmbeddedPiAgent(
};
try {
let authRetryPending = false;
// Hoisted so the retry-limit error path can use the most recent API total.
let lastTurnTotal: number | undefined;
while (true) {
if (runLoopIterations >= MAX_RUN_LOOP_ITERATIONS) {
const message =
@@ -699,11 +738,14 @@ export async function runEmbeddedPiAgent(
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
agentMeta: buildErrorAgentMeta({
sessionId: params.sessionId,
provider,
model: model.id,
},
usageAccumulator,
lastRunPromptUsage,
lastTurnTotal,
}),
error: { kind: "retry_limit", message },
},
};
@@ -806,7 +848,7 @@ export async function runEmbeddedPiAgent(
// Keep prompt size from the latest model call so session totalTokens
// reflects current context usage, not accumulated tool-loop usage.
lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
const lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total;
lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total;
const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0);
autoCompactionCount += attemptCompactionCount;
const activeErrorContext = resolveActiveErrorContext({
@@ -998,11 +1040,15 @@ export async function runEmbeddedPiAgent(
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
agentMeta: buildErrorAgentMeta({
sessionId: sessionIdUsed,
provider,
model: model.id,
},
usageAccumulator,
lastRunPromptUsage,
lastAssistant,
lastTurnTotal,
}),
systemPromptReport: attempt.systemPromptReport,
error: { kind, message: errorText },
},
@@ -1028,11 +1074,15 @@ export async function runEmbeddedPiAgent(
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
agentMeta: buildErrorAgentMeta({
sessionId: sessionIdUsed,
provider,
model: model.id,
},
usageAccumulator,
lastRunPromptUsage,
lastAssistant,
lastTurnTotal,
}),
systemPromptReport: attempt.systemPromptReport,
error: { kind: "role_ordering", message: errorText },
},
@@ -1056,11 +1106,15 @@ export async function runEmbeddedPiAgent(
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
agentMeta: buildErrorAgentMeta({
sessionId: sessionIdUsed,
provider,
model: model.id,
},
usageAccumulator,
lastRunPromptUsage,
lastAssistant,
lastTurnTotal,
}),
systemPromptReport: attempt.systemPromptReport,
error: { kind: "image_size", message: errorText },
},