From 31537c669a01e4df28fb734e7ab2b09827097832 Mon Sep 17 00:00:00 2001 From: Marcus Castro Date: Fri, 13 Feb 2026 16:55:16 -0300 Subject: [PATCH] fix: archive old transcript files on /new and /reset (#14949) Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: 4724df7dea247970b909ef8d293ba4a612b7b1b4 Co-authored-by: mcaxtr <7562095+mcaxtr@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras --- CHANGELOG.md | 1 + src/auto-reply/reply/session-resets.test.ts | 43 ++++++++++ src/auto-reply/reply/session.ts | 12 +++ src/gateway/server-methods/sessions.ts | 59 +++++++++----- ...ions.gateway-server-sessions-a.e2e.test.ts | 2 + src/gateway/session-utils.fs.test.ts | 78 +++++++++++++++++++ src/gateway/session-utils.fs.ts | 34 +++++++- src/gateway/session-utils.ts | 1 + 8 files changed, 211 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19b09b036..ae4fe6235 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ Docs: https://docs.openclaw.ai - Outbound/Threading: pass `replyTo` and `threadId` from `message send` tool actions through the core outbound send path to channel adapters, preserving thread/reply routing. (#14948) Thanks @mcaxtr. - Sessions/Agents: pass `agentId` when resolving existing transcript paths in reply runs so non-default agents and heartbeat/chat handlers no longer fail with `Session file path must be within sessions directory`. (#15141) Thanks @Goldenmonstew. - Sessions/Agents: pass `agentId` through status and usage transcript-resolution paths (auto-reply, gateway usage APIs, and session cost/log loaders) so non-default agents can resolve absolute session files without path-validation failures. (#15103) Thanks @jalehman. +- Sessions: archive previous transcript files on `/new` and `/reset` session resets (including gateway `sessions.reset`) so stale transcripts do not accumulate on disk. (#14869) Thanks @mcaxtr. - Signal/Install: auto-install `signal-cli` via Homebrew on non-x64 Linux architectures, avoiding x86_64 native binary `Exec format error` failures on arm64/arm hosts. (#15443) Thanks @jogvan-k. - Discord: avoid misrouting numeric guild allowlist entries to `/channels/` by prefixing guild-only inputs with `guild:` during resolution. (#12326) Thanks @headswim. - Config: preserve `${VAR}` env references when writing config files so `openclaw config set/apply/patch` does not persist secrets to disk. Thanks @thewilloftheshadow. diff --git a/src/auto-reply/reply/session-resets.test.ts b/src/auto-reply/reply/session-resets.test.ts index 52b9d59d4..3c4810388 100644 --- a/src/auto-reply/reply/session-resets.test.ts +++ b/src/auto-reply/reply/session-resets.test.ts @@ -583,6 +583,49 @@ describe("initSessionState preserves behavior overrides across /new and /reset", expect(result.sessionEntry.ttsAuto).toBe("on"); }); + it("archives previous transcript file on /new reset", async () => { + const storePath = await createStorePath("openclaw-reset-archive-"); + const sessionKey = "agent:main:telegram:dm:user-archive"; + const existingSessionId = "existing-session-archive"; + await seedSessionStoreWithOverrides({ + storePath, + sessionKey, + sessionId: existingSessionId, + overrides: {}, + }); + const transcriptPath = path.join(path.dirname(storePath), `${existingSessionId}.jsonl`); + await fs.writeFile( + transcriptPath, + `${JSON.stringify({ message: { role: "user", content: "hello" } })}\n`, + "utf-8", + ); + + const cfg = { + session: { store: storePath, idleMinutes: 999 }, + } as OpenClawConfig; + + const result = await initSessionState({ + ctx: { + Body: "/new", + RawBody: "/new", + CommandBody: "/new", + From: "user-archive", + To: "bot", + ChatType: "direct", + SessionKey: sessionKey, + Provider: "telegram", + Surface: "telegram", + }, + cfg, + commandAuthorized: true, + }); + + expect(result.isNewSession).toBe(true); + expect(result.resetTriggered).toBe(true); + const files = await fs.readdir(path.dirname(storePath)); + expect(files.some((f) => f.startsWith(`${existingSessionId}.jsonl.reset.`))).toBe(true); + }); + it("idle-based new session does NOT preserve overrides (no entry to read)", async () => { const storePath = await createStorePath("openclaw-idle-no-preserve-"); const sessionKey = "agent:main:telegram:dm:new-user"; diff --git a/src/auto-reply/reply/session.ts b/src/auto-reply/reply/session.ts index 1f46b0f3a..5979c3966 100644 --- a/src/auto-reply/reply/session.ts +++ b/src/auto-reply/reply/session.ts @@ -26,6 +26,7 @@ import { type SessionScope, updateSessionStore, } from "../../config/sessions.js"; +import { archiveSessionTranscripts } from "../../gateway/session-utils.fs.js"; import { deliverSessionMaintenanceWarning } from "../../infra/session-maintenance-warning.js"; import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js"; import { normalizeMainKey } from "../../routing/session-key.js"; @@ -380,6 +381,17 @@ export async function initSessionState(params: { }, ); + // Archive old transcript so it doesn't accumulate on disk (#14869). + if (previousSessionEntry?.sessionId) { + archiveSessionTranscripts({ + sessionId: previousSessionEntry.sessionId, + storePath, + sessionFile: previousSessionEntry.sessionFile, + agentId, + reason: "reset", + }); + } + const sessionCtx: TemplateContext = { ...ctx, // Keep BodyStripped aligned with Body (best default for agent prompts). diff --git a/src/gateway/server-methods/sessions.ts b/src/gateway/server-methods/sessions.ts index 9dbe051a7..eb6618989 100644 --- a/src/gateway/server-methods/sessions.ts +++ b/src/gateway/server-methods/sessions.ts @@ -28,6 +28,7 @@ import { } from "../protocol/index.js"; import { archiveFileOnDisk, + archiveSessionTranscripts, listSessionsFromStore, loadCombinedSessionStoreForGateway, loadSessionEntry, @@ -68,6 +69,25 @@ function migrateAndPruneSessionStoreKey(params: { return { target, primaryKey, entry: params.store[primaryKey] }; } +function archiveSessionTranscriptsForSession(params: { + sessionId: string | undefined; + storePath: string; + sessionFile?: string; + agentId?: string; + reason: "reset" | "deleted"; +}): string[] { + if (!params.sessionId) { + return []; + } + return archiveSessionTranscripts({ + sessionId: params.sessionId, + storePath: params.storePath, + sessionFile: params.sessionFile, + agentId: params.agentId, + reason: params.reason, + }); +} + export const sessionsHandlers: GatewayRequestHandlers = { "sessions.list": ({ params, respond }) => { if (!validateSessionsListParams(params)) { @@ -259,9 +279,13 @@ export const sessionsHandlers: GatewayRequestHandlers = { const cfg = loadConfig(); const target = resolveGatewaySessionStoreTarget({ cfg, key }); const storePath = target.storePath; + let oldSessionId: string | undefined; + let oldSessionFile: string | undefined; const next = await updateSessionStore(storePath, (store) => { const { primaryKey } = migrateAndPruneSessionStoreKey({ cfg, key, store }); const entry = store[primaryKey]; + oldSessionId = entry?.sessionId; + oldSessionFile = entry?.sessionFile; const now = Date.now(); const nextEntry: SessionEntry = { sessionId: randomUUID(), @@ -289,6 +313,14 @@ export const sessionsHandlers: GatewayRequestHandlers = { store[primaryKey] = nextEntry; return nextEntry; }); + // Archive old transcript so it doesn't accumulate on disk (#14869). + archiveSessionTranscriptsForSession({ + sessionId: oldSessionId, + storePath, + sessionFile: oldSessionFile, + agentId: target.agentId, + reason: "reset", + }); respond(true, { ok: true, key: target.canonicalKey, entry: next }, undefined); }, "sessions.delete": async ({ params, respond }) => { @@ -357,24 +389,15 @@ export const sessionsHandlers: GatewayRequestHandlers = { } }); - const archived: string[] = []; - if (deleteTranscript && sessionId) { - for (const candidate of resolveSessionTranscriptCandidates( - sessionId, - storePath, - entry?.sessionFile, - target.agentId, - )) { - if (!fs.existsSync(candidate)) { - continue; - } - try { - archived.push(archiveFileOnDisk(candidate, "deleted")); - } catch { - // Best-effort. - } - } - } + const archived = deleteTranscript + ? archiveSessionTranscriptsForSession({ + sessionId, + storePath, + sessionFile: entry?.sessionFile, + agentId: target.agentId, + reason: "deleted", + }) + : []; respond(true, { ok: true, key: target.canonicalKey, deleted: existed, archived }, undefined); }, diff --git a/src/gateway/server.sessions.gateway-server-sessions-a.e2e.test.ts b/src/gateway/server.sessions.gateway-server-sessions-a.e2e.test.ts index d7b2c1f3f..1eb83fcf7 100644 --- a/src/gateway/server.sessions.gateway-server-sessions-a.e2e.test.ts +++ b/src/gateway/server.sessions.gateway-server-sessions-a.e2e.test.ts @@ -361,6 +361,8 @@ describe("gateway server sessions", () => { expect(reset.ok).toBe(true); expect(reset.payload?.key).toBe("agent:main:main"); expect(reset.payload?.entry.sessionId).not.toBe("sess-main"); + const filesAfterReset = await fs.readdir(dir); + expect(filesAfterReset.some((f) => f.startsWith("sess-main.jsonl.reset."))).toBe(true); const badThinking = await rpcReq(ws, "sessions.patch", { key: "agent:main:main", diff --git a/src/gateway/session-utils.fs.test.ts b/src/gateway/session-utils.fs.test.ts index 0924f2fe7..0e9346f30 100644 --- a/src/gateway/session-utils.fs.test.ts +++ b/src/gateway/session-utils.fs.test.ts @@ -3,6 +3,7 @@ import os from "node:os"; import path from "node:path"; import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; import { + archiveSessionTranscripts, readFirstUserMessageFromTranscript, readLastMessagePreviewFromTranscript, readSessionMessages, @@ -553,3 +554,80 @@ describe("resolveSessionTranscriptCandidates safety", () => { expect(normalizedCandidates).toContain(expectedFallback); }); }); + +describe("archiveSessionTranscripts", () => { + let tmpDir: string; + let storePath: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-archive-test-")); + storePath = path.join(tmpDir, "sessions.json"); + vi.stubEnv("OPENCLAW_HOME", tmpDir); + }); + + afterEach(() => { + vi.unstubAllEnvs(); + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + test("archives existing transcript file and returns archived path", () => { + const sessionId = "sess-archive-1"; + const transcriptPath = path.join(tmpDir, `${sessionId}.jsonl`); + fs.writeFileSync(transcriptPath, '{"type":"session"}\n', "utf-8"); + + const archived = archiveSessionTranscripts({ + sessionId, + storePath, + reason: "reset", + }); + + expect(archived).toHaveLength(1); + expect(archived[0]).toContain(".reset."); + expect(fs.existsSync(transcriptPath)).toBe(false); + expect(fs.existsSync(archived[0])).toBe(true); + }); + + test("archives transcript found via explicit sessionFile path", () => { + const sessionId = "sess-archive-2"; + const customPath = path.join(tmpDir, "custom-transcript.jsonl"); + fs.writeFileSync(customPath, '{"type":"session"}\n', "utf-8"); + + const archived = archiveSessionTranscripts({ + sessionId, + storePath: undefined, + sessionFile: customPath, + reason: "reset", + }); + + expect(archived).toHaveLength(1); + expect(fs.existsSync(customPath)).toBe(false); + expect(fs.existsSync(archived[0])).toBe(true); + }); + + test("returns empty array when no transcript files exist", () => { + const archived = archiveSessionTranscripts({ + sessionId: "nonexistent-session", + storePath, + reason: "reset", + }); + + expect(archived).toEqual([]); + }); + + test("skips files that do not exist and archives only existing ones", () => { + const sessionId = "sess-archive-3"; + const transcriptPath = path.join(tmpDir, `${sessionId}.jsonl`); + fs.writeFileSync(transcriptPath, '{"type":"session"}\n', "utf-8"); + + const archived = archiveSessionTranscripts({ + sessionId, + storePath, + sessionFile: "/nonexistent/path/file.jsonl", + reason: "deleted", + }); + + expect(archived).toHaveLength(1); + expect(archived[0]).toContain(".deleted."); + expect(fs.existsSync(transcriptPath)).toBe(false); + }); +}); diff --git a/src/gateway/session-utils.fs.ts b/src/gateway/session-utils.fs.ts index 87ea63170..c919214d4 100644 --- a/src/gateway/session-utils.fs.ts +++ b/src/gateway/session-utils.fs.ts @@ -102,13 +102,45 @@ export function resolveSessionTranscriptCandidates( return Array.from(new Set(candidates)); } -export function archiveFileOnDisk(filePath: string, reason: string): string { +export type ArchiveFileReason = "bak" | "reset" | "deleted"; + +export function archiveFileOnDisk(filePath: string, reason: ArchiveFileReason): string { const ts = new Date().toISOString().replaceAll(":", "-"); const archived = `${filePath}.${reason}.${ts}`; fs.renameSync(filePath, archived); return archived; } +/** + * Archives all transcript files for a given session. + * Best-effort: silently skips files that don't exist or fail to rename. + */ +export function archiveSessionTranscripts(opts: { + sessionId: string; + storePath: string | undefined; + sessionFile?: string; + agentId?: string; + reason: "reset" | "deleted"; +}): string[] { + const archived: string[] = []; + for (const candidate of resolveSessionTranscriptCandidates( + opts.sessionId, + opts.storePath, + opts.sessionFile, + opts.agentId, + )) { + if (!fs.existsSync(candidate)) { + continue; + } + try { + archived.push(archiveFileOnDisk(candidate, opts.reason)); + } catch { + // Best-effort. + } + } + return archived; +} + function jsonUtf8Bytes(value: unknown): number { try { return Buffer.byteLength(JSON.stringify(value), "utf8"); diff --git a/src/gateway/session-utils.ts b/src/gateway/session-utils.ts index 1c51a91e1..fe13f78b0 100644 --- a/src/gateway/session-utils.ts +++ b/src/gateway/session-utils.ts @@ -40,6 +40,7 @@ import { export { archiveFileOnDisk, + archiveSessionTranscripts, capArrayByJsonBytes, readFirstUserMessageFromTranscript, readLastMessagePreviewFromTranscript,