feat: add stuck loop detection and exponential backoff infrastructure for agent polling (#17118)

Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: eebabf679b983e5a660fb3cef371e1303f11f615 Co-authored-by: akramcodez <179671552+akramcodez@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras
2026-02-17 01:46:35 +05:30
parent 1f99d82712
commit e5eb5b3e43
11 changed files with 1769 additions and 2 deletions
--- a/src/agents/tool-loop-detection.test.ts
+++ b/src/agents/tool-loop-detection.test.ts
@@ -0,0 +1,422 @@
+import { describe, expect, it } from "vitest";
+import type { SessionState } from "../logging/diagnostic-session-state.js";
+import {
+  CRITICAL_THRESHOLD,
+  GLOBAL_CIRCUIT_BREAKER_THRESHOLD,
+  TOOL_CALL_HISTORY_SIZE,
+  WARNING_THRESHOLD,
+  detectToolCallLoop,
+  getToolCallStats,
+  hashToolCall,
+  recordToolCall,
+  recordToolCallOutcome,
+} from "./tool-loop-detection.js";
+
+function createState(): SessionState {
+  return {
+    lastActivity: Date.now(),
+    state: "processing",
+    queueDepth: 0,
+  };
+}
+
+function recordSuccessfulCall(
+  state: SessionState,
+  toolName: string,
+  params: unknown,
+  result: unknown,
+  index: number,
+): void {
+  const toolCallId = `${toolName}-${index}`;
+  recordToolCall(state, toolName, params, toolCallId);
+  recordToolCallOutcome(state, {
+    toolName,
+    toolParams: params,
+    toolCallId,
+    result,
+  });
+}
+
+describe("tool-loop-detection", () => {
+  describe("hashToolCall", () => {
+    it("creates consistent hash for same tool and params", () => {
+      const hash1 = hashToolCall("read", { path: "/file.txt" });
+      const hash2 = hashToolCall("read", { path: "/file.txt" });
+      expect(hash1).toBe(hash2);
+    });
+
+    it("creates different hashes for different params", () => {
+      const hash1 = hashToolCall("read", { path: "/file1.txt" });
+      const hash2 = hashToolCall("read", { path: "/file2.txt" });
+      expect(hash1).not.toBe(hash2);
+    });
+
+    it("creates different hashes for different tools", () => {
+      const hash1 = hashToolCall("read", { path: "/file.txt" });
+      const hash2 = hashToolCall("write", { path: "/file.txt" });
+      expect(hash1).not.toBe(hash2);
+    });
+
+    it("handles non-object params", () => {
+      expect(() => hashToolCall("tool", "string-param")).not.toThrow();
+      expect(() => hashToolCall("tool", 123)).not.toThrow();
+      expect(() => hashToolCall("tool", null)).not.toThrow();
+    });
+
+    it("produces deterministic hashes regardless of key order", () => {
+      const hash1 = hashToolCall("tool", { a: 1, b: 2 });
+      const hash2 = hashToolCall("tool", { b: 2, a: 1 });
+      expect(hash1).toBe(hash2);
+    });
+
+    it("keeps hashes fixed-size even for large params", () => {
+      const payload = { data: "x".repeat(20_000) };
+      const hash = hashToolCall("read", payload);
+      expect(hash.startsWith("read:")).toBe(true);
+      expect(hash.length).toBe("read:".length + 64);
+    });
+  });
+
+  describe("recordToolCall", () => {
+    it("adds tool call to empty history", () => {
+      const state = createState();
+
+      recordToolCall(state, "read", { path: "/file.txt" }, "call-1");
+
+      expect(state.toolCallHistory).toHaveLength(1);
+      expect(state.toolCallHistory?.[0]?.toolName).toBe("read");
+      expect(state.toolCallHistory?.[0]?.toolCallId).toBe("call-1");
+    });
+
+    it("maintains sliding window of last N calls", () => {
+      const state = createState();
+
+      for (let i = 0; i < TOOL_CALL_HISTORY_SIZE + 10; i += 1) {
+        recordToolCall(state, "tool", { iteration: i }, `call-${i}`);
+      }
+
+      expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE);
+
+      const oldestCall = state.toolCallHistory?.[0];
+      expect(oldestCall?.argsHash).toBe(hashToolCall("tool", { iteration: 10 }));
+    });
+
+    it("records timestamp for each call", () => {
+      const state = createState();
+      const before = Date.now();
+      recordToolCall(state, "tool", { arg: 1 }, "call-ts");
+      const after = Date.now();
+
+      const timestamp = state.toolCallHistory?.[0]?.timestamp ?? 0;
+      expect(timestamp).toBeGreaterThanOrEqual(before);
+      expect(timestamp).toBeLessThanOrEqual(after);
+    });
+  });
+
+  describe("detectToolCallLoop", () => {
+    it("does not flag unique tool calls", () => {
+      const state = createState();
+
+      for (let i = 0; i < 15; i += 1) {
+        recordToolCall(state, "read", { path: `/file${i}.txt` }, `call-${i}`);
+      }
+
+      const result = detectToolCallLoop(state, "read", { path: "/new-file.txt" });
+      expect(result.stuck).toBe(false);
+    });
+
+    it("warns on generic repeated tool+args calls", () => {
+      const state = createState();
+      for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
+        recordToolCall(state, "read", { path: "/same.txt" }, `warn-${i}`);
+      }
+
+      const result = detectToolCallLoop(state, "read", { path: "/same.txt" });
+
+      expect(result.stuck).toBe(true);
+      if (result.stuck) {
+        expect(result.level).toBe("warning");
+        expect(result.detector).toBe("generic_repeat");
+        expect(result.count).toBe(WARNING_THRESHOLD);
+        expect(result.message).toContain("WARNING");
+        expect(result.message).toContain(`${WARNING_THRESHOLD} times`);
+      }
+    });
+
+    it("keeps generic loops warn-only below global breaker threshold", () => {
+      const state = createState();
+      const params = { path: "/same.txt" };
+      const result = {
+        content: [{ type: "text", text: "same output" }],
+        details: { ok: true },
+      };
+
+      for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
+        recordSuccessfulCall(state, "read", params, result, i);
+      }
+
+      const loopResult = detectToolCallLoop(state, "read", params);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("warning");
+      }
+    });
+
+    it("warns for known polling no-progress loops", () => {
+      const state = createState();
+      const params = { action: "poll", sessionId: "sess-1" };
+      const result = {
+        content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
+        details: { status: "running", aggregated: "steady" },
+      };
+
+      for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
+        recordSuccessfulCall(state, "process", params, result, i);
+      }
+
+      const loopResult = detectToolCallLoop(state, "process", params);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("warning");
+        expect(loopResult.detector).toBe("known_poll_no_progress");
+        expect(loopResult.message).toContain("no progress");
+      }
+    });
+
+    it("blocks known polling no-progress loops at critical threshold", () => {
+      const state = createState();
+      const params = { action: "poll", sessionId: "sess-1" };
+      const result = {
+        content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
+        details: { status: "running", aggregated: "steady" },
+      };
+
+      for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
+        recordSuccessfulCall(state, "process", params, result, i);
+      }
+
+      const loopResult = detectToolCallLoop(state, "process", params);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("critical");
+        expect(loopResult.detector).toBe("known_poll_no_progress");
+        expect(loopResult.message).toContain("CRITICAL");
+      }
+    });
+
+    it("does not block known polling when output progresses", () => {
+      const state = createState();
+      const params = { action: "poll", sessionId: "sess-1" };
+
+      for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
+        const result = {
+          content: [{ type: "text", text: `line ${i}` }],
+          details: { status: "running", aggregated: `line ${i}` },
+        };
+        recordSuccessfulCall(state, "process", params, result, i);
+      }
+
+      const loopResult = detectToolCallLoop(state, "process", params);
+      expect(loopResult.stuck).toBe(false);
+    });
+
+    it("blocks any tool with global no-progress breaker at 30", () => {
+      const state = createState();
+      const params = { path: "/same.txt" };
+      const result = {
+        content: [{ type: "text", text: "same output" }],
+        details: { ok: true },
+      };
+
+      for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
+        recordSuccessfulCall(state, "read", params, result, i);
+      }
+
+      const loopResult = detectToolCallLoop(state, "read", params);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("critical");
+        expect(loopResult.detector).toBe("global_circuit_breaker");
+        expect(loopResult.message).toContain("global circuit breaker");
+      }
+    });
+
+    it("warns on ping-pong alternating patterns", () => {
+      const state = createState();
+      const readParams = { path: "/a.txt" };
+      const listParams = { dir: "/workspace" };
+
+      for (let i = 0; i < WARNING_THRESHOLD - 1; i += 1) {
+        if (i % 2 === 0) {
+          recordToolCall(state, "read", readParams, `read-${i}`);
+        } else {
+          recordToolCall(state, "list", listParams, `list-${i}`);
+        }
+      }
+
+      const loopResult = detectToolCallLoop(state, "list", listParams);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("warning");
+        expect(loopResult.detector).toBe("ping_pong");
+        expect(loopResult.count).toBe(WARNING_THRESHOLD);
+        expect(loopResult.message).toContain("ping-pong loop");
+      }
+    });
+
+    it("blocks ping-pong alternating patterns at critical threshold", () => {
+      const state = createState();
+      const readParams = { path: "/a.txt" };
+      const listParams = { dir: "/workspace" };
+
+      for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
+        if (i % 2 === 0) {
+          recordSuccessfulCall(
+            state,
+            "read",
+            readParams,
+            { content: [{ type: "text", text: "read stable" }], details: { ok: true } },
+            i,
+          );
+        } else {
+          recordSuccessfulCall(
+            state,
+            "list",
+            listParams,
+            { content: [{ type: "text", text: "list stable" }], details: { ok: true } },
+            i,
+          );
+        }
+      }
+
+      const loopResult = detectToolCallLoop(state, "list", listParams);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("critical");
+        expect(loopResult.detector).toBe("ping_pong");
+        expect(loopResult.count).toBe(CRITICAL_THRESHOLD);
+        expect(loopResult.message).toContain("CRITICAL");
+        expect(loopResult.message).toContain("ping-pong loop");
+      }
+    });
+
+    it("does not block ping-pong at critical threshold when outcomes are progressing", () => {
+      const state = createState();
+      const readParams = { path: "/a.txt" };
+      const listParams = { dir: "/workspace" };
+
+      for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
+        if (i % 2 === 0) {
+          recordSuccessfulCall(
+            state,
+            "read",
+            readParams,
+            { content: [{ type: "text", text: `read ${i}` }], details: { ok: true } },
+            i,
+          );
+        } else {
+          recordSuccessfulCall(
+            state,
+            "list",
+            listParams,
+            { content: [{ type: "text", text: `list ${i}` }], details: { ok: true } },
+            i,
+          );
+        }
+      }
+
+      const loopResult = detectToolCallLoop(state, "list", listParams);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("warning");
+        expect(loopResult.detector).toBe("ping_pong");
+        expect(loopResult.count).toBe(CRITICAL_THRESHOLD);
+      }
+    });
+
+    it("does not flag ping-pong when alternation is broken", () => {
+      const state = createState();
+      recordToolCall(state, "read", { path: "/a.txt" }, "a1");
+      recordToolCall(state, "list", { dir: "/workspace" }, "b1");
+      recordToolCall(state, "read", { path: "/a.txt" }, "a2");
+      recordToolCall(state, "write", { path: "/tmp/out.txt" }, "c1"); // breaks alternation
+
+      const loopResult = detectToolCallLoop(state, "list", { dir: "/workspace" });
+      expect(loopResult.stuck).toBe(false);
+    });
+
+    it("records fixed-size result hashes for large tool outputs", () => {
+      const state = createState();
+      const params = { action: "log", sessionId: "sess-big" };
+      const toolCallId = "log-big";
+      recordToolCall(state, "process", params, toolCallId);
+      recordToolCallOutcome(state, {
+        toolName: "process",
+        toolParams: params,
+        toolCallId,
+        result: {
+          content: [{ type: "text", text: "y".repeat(40_000) }],
+          details: { status: "running", totalLines: 1, totalChars: 40_000 },
+        },
+      });
+
+      const entry = state.toolCallHistory?.find((call) => call.toolCallId === toolCallId);
+      expect(typeof entry?.resultHash).toBe("string");
+      expect(entry?.resultHash?.length).toBe(64);
+    });
+
+    it("handles empty history", () => {
+      const state = createState();
+
+      const result = detectToolCallLoop(state, "tool", { arg: 1 });
+      expect(result.stuck).toBe(false);
+    });
+  });
+
+  describe("getToolCallStats", () => {
+    it("returns zero stats for empty history", () => {
+      const state = createState();
+
+      const stats = getToolCallStats(state);
+      expect(stats.totalCalls).toBe(0);
+      expect(stats.uniquePatterns).toBe(0);
+      expect(stats.mostFrequent).toBeNull();
+    });
+
+    it("counts total calls and unique patterns", () => {
+      const state = createState();
+
+      for (let i = 0; i < 5; i += 1) {
+        recordToolCall(state, "read", { path: "/file.txt" }, `same-${i}`);
+      }
+
+      recordToolCall(state, "write", { path: "/output.txt" }, "write-1");
+      recordToolCall(state, "list", { dir: "/home" }, "list-1");
+      recordToolCall(state, "read", { path: "/other.txt" }, "read-other");
+
+      const stats = getToolCallStats(state);
+      expect(stats.totalCalls).toBe(8);
+      expect(stats.uniquePatterns).toBe(4);
+    });
+
+    it("identifies most frequent pattern", () => {
+      const state = createState();
+
+      for (let i = 0; i < 3; i += 1) {
+        recordToolCall(state, "read", { path: "/file1.txt" }, `p1-${i}`);
+      }
+
+      for (let i = 0; i < 7; i += 1) {
+        recordToolCall(state, "read", { path: "/file2.txt" }, `p2-${i}`);
+      }
+
+      for (let i = 0; i < 2; i += 1) {
+        recordToolCall(state, "write", { path: "/output.txt" }, `p3-${i}`);
+      }
+
+      const stats = getToolCallStats(state);
+      expect(stats.mostFrequent?.toolName).toBe("read");
+      expect(stats.mostFrequent?.count).toBe(7);
+    });
+  });
+});