import { describe, expect, it } from "vitest"; import type { SessionState } from "../logging/diagnostic-session-state.js"; import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD, TOOL_CALL_HISTORY_SIZE, WARNING_THRESHOLD, detectToolCallLoop, getToolCallStats, hashToolCall, recordToolCall, recordToolCallOutcome, } from "./tool-loop-detection.js"; function createState(): SessionState { return { lastActivity: Date.now(), state: "processing", queueDepth: 0, }; } function recordSuccessfulCall( state: SessionState, toolName: string, params: unknown, result: unknown, index: number, ): void { const toolCallId = `${toolName}-${index}`; recordToolCall(state, toolName, params, toolCallId); recordToolCallOutcome(state, { toolName, toolParams: params, toolCallId, result, }); } describe("tool-loop-detection", () => { describe("hashToolCall", () => { it("creates consistent hash for same tool and params", () => { const hash1 = hashToolCall("read", { path: "/file.txt" }); const hash2 = hashToolCall("read", { path: "/file.txt" }); expect(hash1).toBe(hash2); }); it("creates different hashes for different params", () => { const hash1 = hashToolCall("read", { path: "/file1.txt" }); const hash2 = hashToolCall("read", { path: "/file2.txt" }); expect(hash1).not.toBe(hash2); }); it("creates different hashes for different tools", () => { const hash1 = hashToolCall("read", { path: "/file.txt" }); const hash2 = hashToolCall("write", { path: "/file.txt" }); expect(hash1).not.toBe(hash2); }); it("handles non-object params", () => { expect(() => hashToolCall("tool", "string-param")).not.toThrow(); expect(() => hashToolCall("tool", 123)).not.toThrow(); expect(() => hashToolCall("tool", null)).not.toThrow(); }); it("produces deterministic hashes regardless of key order", () => { const hash1 = hashToolCall("tool", { a: 1, b: 2 }); const hash2 = hashToolCall("tool", { b: 2, a: 1 }); expect(hash1).toBe(hash2); }); it("keeps hashes fixed-size even for large params", () => { const payload = { data: "x".repeat(20_000) }; const hash = hashToolCall("read", payload); expect(hash.startsWith("read:")).toBe(true); expect(hash.length).toBe("read:".length + 64); }); }); describe("recordToolCall", () => { it("adds tool call to empty history", () => { const state = createState(); recordToolCall(state, "read", { path: "/file.txt" }, "call-1"); expect(state.toolCallHistory).toHaveLength(1); expect(state.toolCallHistory?.[0]?.toolName).toBe("read"); expect(state.toolCallHistory?.[0]?.toolCallId).toBe("call-1"); }); it("maintains sliding window of last N calls", () => { const state = createState(); for (let i = 0; i < TOOL_CALL_HISTORY_SIZE + 10; i += 1) { recordToolCall(state, "tool", { iteration: i }, `call-${i}`); } expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE); const oldestCall = state.toolCallHistory?.[0]; expect(oldestCall?.argsHash).toBe(hashToolCall("tool", { iteration: 10 })); }); it("records timestamp for each call", () => { const state = createState(); const before = Date.now(); recordToolCall(state, "tool", { arg: 1 }, "call-ts"); const after = Date.now(); const timestamp = state.toolCallHistory?.[0]?.timestamp ?? 0; expect(timestamp).toBeGreaterThanOrEqual(before); expect(timestamp).toBeLessThanOrEqual(after); }); }); describe("detectToolCallLoop", () => { it("does not flag unique tool calls", () => { const state = createState(); for (let i = 0; i < 15; i += 1) { recordToolCall(state, "read", { path: `/file${i}.txt` }, `call-${i}`); } const result = detectToolCallLoop(state, "read", { path: "/new-file.txt" }); expect(result.stuck).toBe(false); }); it("warns on generic repeated tool+args calls", () => { const state = createState(); for (let i = 0; i < WARNING_THRESHOLD; i += 1) { recordToolCall(state, "read", { path: "/same.txt" }, `warn-${i}`); } const result = detectToolCallLoop(state, "read", { path: "/same.txt" }); expect(result.stuck).toBe(true); if (result.stuck) { expect(result.level).toBe("warning"); expect(result.detector).toBe("generic_repeat"); expect(result.count).toBe(WARNING_THRESHOLD); expect(result.message).toContain("WARNING"); expect(result.message).toContain(`${WARNING_THRESHOLD} times`); } }); it("keeps generic loops warn-only below global breaker threshold", () => { const state = createState(); const params = { path: "/same.txt" }; const result = { content: [{ type: "text", text: "same output" }], details: { ok: true }, }; for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) { recordSuccessfulCall(state, "read", params, result, i); } const loopResult = detectToolCallLoop(state, "read", params); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("warning"); } }); it("warns for known polling no-progress loops", () => { const state = createState(); const params = { action: "poll", sessionId: "sess-1" }; const result = { content: [{ type: "text", text: "(no new output)\n\nProcess still running." }], details: { status: "running", aggregated: "steady" }, }; for (let i = 0; i < WARNING_THRESHOLD; i += 1) { recordSuccessfulCall(state, "process", params, result, i); } const loopResult = detectToolCallLoop(state, "process", params); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("warning"); expect(loopResult.detector).toBe("known_poll_no_progress"); expect(loopResult.message).toContain("no progress"); } }); it("blocks known polling no-progress loops at critical threshold", () => { const state = createState(); const params = { action: "poll", sessionId: "sess-1" }; const result = { content: [{ type: "text", text: "(no new output)\n\nProcess still running." }], details: { status: "running", aggregated: "steady" }, }; for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) { recordSuccessfulCall(state, "process", params, result, i); } const loopResult = detectToolCallLoop(state, "process", params); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("critical"); expect(loopResult.detector).toBe("known_poll_no_progress"); expect(loopResult.message).toContain("CRITICAL"); } }); it("does not block known polling when output progresses", () => { const state = createState(); const params = { action: "poll", sessionId: "sess-1" }; for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) { const result = { content: [{ type: "text", text: `line ${i}` }], details: { status: "running", aggregated: `line ${i}` }, }; recordSuccessfulCall(state, "process", params, result, i); } const loopResult = detectToolCallLoop(state, "process", params); expect(loopResult.stuck).toBe(false); }); it("blocks any tool with global no-progress breaker at 30", () => { const state = createState(); const params = { path: "/same.txt" }; const result = { content: [{ type: "text", text: "same output" }], details: { ok: true }, }; for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) { recordSuccessfulCall(state, "read", params, result, i); } const loopResult = detectToolCallLoop(state, "read", params); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("critical"); expect(loopResult.detector).toBe("global_circuit_breaker"); expect(loopResult.message).toContain("global circuit breaker"); } }); it("warns on ping-pong alternating patterns", () => { const state = createState(); const readParams = { path: "/a.txt" }; const listParams = { dir: "/workspace" }; for (let i = 0; i < WARNING_THRESHOLD - 1; i += 1) { if (i % 2 === 0) { recordToolCall(state, "read", readParams, `read-${i}`); } else { recordToolCall(state, "list", listParams, `list-${i}`); } } const loopResult = detectToolCallLoop(state, "list", listParams); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("warning"); expect(loopResult.detector).toBe("ping_pong"); expect(loopResult.count).toBe(WARNING_THRESHOLD); expect(loopResult.message).toContain("ping-pong loop"); } }); it("blocks ping-pong alternating patterns at critical threshold", () => { const state = createState(); const readParams = { path: "/a.txt" }; const listParams = { dir: "/workspace" }; for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) { if (i % 2 === 0) { recordSuccessfulCall( state, "read", readParams, { content: [{ type: "text", text: "read stable" }], details: { ok: true } }, i, ); } else { recordSuccessfulCall( state, "list", listParams, { content: [{ type: "text", text: "list stable" }], details: { ok: true } }, i, ); } } const loopResult = detectToolCallLoop(state, "list", listParams); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("critical"); expect(loopResult.detector).toBe("ping_pong"); expect(loopResult.count).toBe(CRITICAL_THRESHOLD); expect(loopResult.message).toContain("CRITICAL"); expect(loopResult.message).toContain("ping-pong loop"); } }); it("does not block ping-pong at critical threshold when outcomes are progressing", () => { const state = createState(); const readParams = { path: "/a.txt" }; const listParams = { dir: "/workspace" }; for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) { if (i % 2 === 0) { recordSuccessfulCall( state, "read", readParams, { content: [{ type: "text", text: `read ${i}` }], details: { ok: true } }, i, ); } else { recordSuccessfulCall( state, "list", listParams, { content: [{ type: "text", text: `list ${i}` }], details: { ok: true } }, i, ); } } const loopResult = detectToolCallLoop(state, "list", listParams); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("warning"); expect(loopResult.detector).toBe("ping_pong"); expect(loopResult.count).toBe(CRITICAL_THRESHOLD); } }); it("does not flag ping-pong when alternation is broken", () => { const state = createState(); recordToolCall(state, "read", { path: "/a.txt" }, "a1"); recordToolCall(state, "list", { dir: "/workspace" }, "b1"); recordToolCall(state, "read", { path: "/a.txt" }, "a2"); recordToolCall(state, "write", { path: "/tmp/out.txt" }, "c1"); // breaks alternation const loopResult = detectToolCallLoop(state, "list", { dir: "/workspace" }); expect(loopResult.stuck).toBe(false); }); it("records fixed-size result hashes for large tool outputs", () => { const state = createState(); const params = { action: "log", sessionId: "sess-big" }; const toolCallId = "log-big"; recordToolCall(state, "process", params, toolCallId); recordToolCallOutcome(state, { toolName: "process", toolParams: params, toolCallId, result: { content: [{ type: "text", text: "y".repeat(40_000) }], details: { status: "running", totalLines: 1, totalChars: 40_000 }, }, }); const entry = state.toolCallHistory?.find((call) => call.toolCallId === toolCallId); expect(typeof entry?.resultHash).toBe("string"); expect(entry?.resultHash?.length).toBe(64); }); it("handles empty history", () => { const state = createState(); const result = detectToolCallLoop(state, "tool", { arg: 1 }); expect(result.stuck).toBe(false); }); }); describe("getToolCallStats", () => { it("returns zero stats for empty history", () => { const state = createState(); const stats = getToolCallStats(state); expect(stats.totalCalls).toBe(0); expect(stats.uniquePatterns).toBe(0); expect(stats.mostFrequent).toBeNull(); }); it("counts total calls and unique patterns", () => { const state = createState(); for (let i = 0; i < 5; i += 1) { recordToolCall(state, "read", { path: "/file.txt" }, `same-${i}`); } recordToolCall(state, "write", { path: "/output.txt" }, "write-1"); recordToolCall(state, "list", { dir: "/home" }, "list-1"); recordToolCall(state, "read", { path: "/other.txt" }, "read-other"); const stats = getToolCallStats(state); expect(stats.totalCalls).toBe(8); expect(stats.uniquePatterns).toBe(4); }); it("identifies most frequent pattern", () => { const state = createState(); for (let i = 0; i < 3; i += 1) { recordToolCall(state, "read", { path: "/file1.txt" }, `p1-${i}`); } for (let i = 0; i < 7; i += 1) { recordToolCall(state, "read", { path: "/file2.txt" }, `p2-${i}`); } for (let i = 0; i < 2; i += 1) { recordToolCall(state, "write", { path: "/output.txt" }, `p3-${i}`); } const stats = getToolCallStats(state); expect(stats.mostFrequent?.toolName).toBe("read"); expect(stats.mostFrequent?.count).toBe(7); }); }); });