Files
openclaw/src/auto-reply/reply.directive.directive-behavior.defaults-think-low-reasoning-capable-models-no.test.ts
2026-02-24 04:39:53 +00:00

432 lines
13 KiB
TypeScript

import "./reply.directive.directive-behavior.e2e-mocks.js";
import { describe, expect, it, vi } from "vitest";
import { loadSessionStore } from "../config/sessions.js";
import {
assertModelSelection,
installDirectiveBehaviorE2EHooks,
loadModelCatalog,
makeEmbeddedTextResult,
makeWhatsAppDirectiveConfig,
mockEmbeddedTextResult,
replyText,
replyTexts,
runEmbeddedPiAgent,
sessionStorePath,
withTempHome,
} from "./reply.directive.directive-behavior.e2e-harness.js";
import { runModelDirectiveText } from "./reply.directive.directive-behavior.model-directive-test-utils.js";
import { getReplyFromConfig } from "./reply.js";
function makeDefaultModelConfig(home: string) {
return makeWhatsAppDirectiveConfig(home, {
model: { primary: "anthropic/claude-opus-4-5" },
models: {
"anthropic/claude-opus-4-5": {},
"openai/gpt-4.1-mini": {},
},
});
}
async function runReplyToCurrentCase(home: string, text: string) {
vi.mocked(runEmbeddedPiAgent).mockResolvedValue(makeEmbeddedTextResult(text));
const res = await getReplyFromConfig(
{
Body: "ping",
From: "+1004",
To: "+2000",
MessageSid: "msg-123",
},
{},
makeWhatsAppDirectiveConfig(home, { model: "anthropic/claude-opus-4-5" }),
);
return Array.isArray(res) ? res[0] : res;
}
async function expectThinkStatusForReasoningModel(params: {
home: string;
reasoning: boolean;
expectedLevel: "low" | "off";
}): Promise<void> {
vi.mocked(loadModelCatalog).mockResolvedValueOnce([
{
id: "claude-opus-4-5",
name: "Opus 4.5",
provider: "anthropic",
reasoning: params.reasoning,
},
]);
const res = await getReplyFromConfig(
{ Body: "/think", From: "+1222", To: "+1222", CommandAuthorized: true },
{},
makeWhatsAppDirectiveConfig(params.home, { model: "anthropic/claude-opus-4-5" }),
);
const text = replyText(res);
expect(text).toContain(`Current thinking level: ${params.expectedLevel}`);
expect(text).toContain("Options: off, minimal, low, medium, high.");
}
function mockReasoningCapableCatalog() {
vi.mocked(loadModelCatalog).mockResolvedValueOnce([
{
id: "claude-opus-4-5",
name: "Opus 4.5",
provider: "anthropic",
reasoning: true,
},
]);
}
async function runReasoningDefaultCase(params: {
home: string;
expectedThinkLevel: "low" | "off";
expectedReasoningLevel: "off" | "on";
thinkingDefault?: "off" | "low" | "medium" | "high";
}) {
vi.mocked(runEmbeddedPiAgent).mockClear();
mockEmbeddedTextResult("done");
mockReasoningCapableCatalog();
await getReplyFromConfig(
{
Body: "hello",
From: "+1004",
To: "+2000",
},
{},
makeWhatsAppDirectiveConfig(params.home, {
model: { primary: "anthropic/claude-opus-4-5" },
...(params.thinkingDefault ? { thinkingDefault: params.thinkingDefault } : {}),
}),
);
expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
expect(call?.thinkLevel).toBe(params.expectedThinkLevel);
expect(call?.reasoningLevel).toBe(params.expectedReasoningLevel);
}
describe("directive behavior", () => {
installDirectiveBehaviorE2EHooks();
it("covers /think status and reasoning defaults for reasoning and non-reasoning models", async () => {
await withTempHome(async (home) => {
await expectThinkStatusForReasoningModel({
home,
reasoning: true,
expectedLevel: "low",
});
await expectThinkStatusForReasoningModel({
home,
reasoning: false,
expectedLevel: "off",
});
expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
vi.mocked(runEmbeddedPiAgent).mockClear();
for (const scenario of [
{
expectedThinkLevel: "low" as const,
expectedReasoningLevel: "off" as const,
},
{
expectedThinkLevel: "off" as const,
expectedReasoningLevel: "on" as const,
thinkingDefault: "off" as const,
},
]) {
await runReasoningDefaultCase({
home,
...scenario,
});
}
});
});
it("renders model list and status variants across catalog/config combinations", async () => {
await withTempHome(async (home) => {
const aliasText = await runModelDirectiveText(home, "/model list");
expect(aliasText).toContain("Providers:");
expect(aliasText).toContain("- anthropic");
expect(aliasText).toContain("- openai");
expect(aliasText).toContain("Use: /models <provider>");
expect(aliasText).toContain("Switch: /model <provider/model>");
vi.mocked(loadModelCatalog).mockResolvedValueOnce([]);
const unavailableCatalogText = await runModelDirectiveText(home, "/model");
expect(unavailableCatalogText).toContain("Current: anthropic/claude-opus-4-5");
expect(unavailableCatalogText).toContain("Switch: /model <provider/model>");
expect(unavailableCatalogText).toContain(
"Browse: /models (providers) or /models <provider> (models)",
);
expect(unavailableCatalogText).toContain("More: /model status");
const allowlistedStatusText = await runModelDirectiveText(home, "/model status", {
includeSessionStore: false,
});
expect(allowlistedStatusText).toContain("anthropic/claude-opus-4-5");
expect(allowlistedStatusText).toContain("openai/gpt-4.1-mini");
expect(allowlistedStatusText).not.toContain("claude-sonnet-4-1");
expect(allowlistedStatusText).toContain("auth:");
vi.mocked(loadModelCatalog).mockResolvedValue([
{ id: "claude-opus-4-5", name: "Opus 4.5", provider: "anthropic" },
{ id: "gpt-4.1-mini", name: "GPT-4.1 Mini", provider: "openai" },
{ id: "grok-4", name: "Grok 4", provider: "xai" },
]);
const noAllowlistText = await runModelDirectiveText(home, "/model list", {
defaults: {
model: {
primary: "anthropic/claude-opus-4-5",
fallbacks: ["openai/gpt-4.1-mini"],
},
imageModel: { primary: "minimax/MiniMax-M2.1" },
models: undefined,
},
});
expect(noAllowlistText).toContain("Providers:");
expect(noAllowlistText).toContain("- anthropic");
expect(noAllowlistText).toContain("- openai");
expect(noAllowlistText).toContain("- xai");
expect(noAllowlistText).toContain("Use: /models <provider>");
vi.mocked(loadModelCatalog).mockResolvedValueOnce([
{
provider: "anthropic",
id: "claude-opus-4-5",
name: "Claude Opus 4.5",
},
{ provider: "openai", id: "gpt-4.1-mini", name: "GPT-4.1 mini" },
]);
const configOnlyProviderText = await runModelDirectiveText(home, "/models minimax", {
defaults: {
models: {
"anthropic/claude-opus-4-5": {},
"openai/gpt-4.1-mini": {},
"minimax/MiniMax-M2.1": { alias: "minimax" },
},
},
extra: {
models: {
mode: "merge",
providers: {
minimax: {
baseUrl: "https://api.minimax.io/anthropic",
api: "anthropic-messages",
models: [{ id: "MiniMax-M2.1", name: "MiniMax M2.1" }],
},
},
},
},
});
expect(configOnlyProviderText).toContain("Models (minimax");
expect(configOnlyProviderText).toContain("minimax/MiniMax-M2.1");
const missingAuthText = await runModelDirectiveText(home, "/model list", {
defaults: {
models: {
"anthropic/claude-opus-4-5": {},
},
},
});
expect(missingAuthText).toContain("Providers:");
expect(missingAuthText).not.toContain("missing (missing)");
expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
});
});
it("sets model override on /model directive", async () => {
await withTempHome(async (home) => {
const storePath = sessionStorePath(home);
await getReplyFromConfig(
{ Body: "/model openai/gpt-4.1-mini", From: "+1222", To: "+1222", CommandAuthorized: true },
{},
makeWhatsAppDirectiveConfig(
home,
{
model: { primary: "anthropic/claude-opus-4-5" },
models: {
"anthropic/claude-opus-4-5": {},
"openai/gpt-4.1-mini": {},
},
},
{ session: { store: storePath } },
),
);
assertModelSelection(storePath, {
model: "gpt-4.1-mini",
provider: "openai",
});
expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
});
});
it("ignores inline /model and /think directives while still running agent content", async () => {
await withTempHome(async (home) => {
mockEmbeddedTextResult("done");
const inlineModelRes = await getReplyFromConfig(
{
Body: "please sync /model openai/gpt-4.1-mini now",
From: "+1004",
To: "+2000",
},
{},
makeDefaultModelConfig(home),
);
const texts = replyTexts(inlineModelRes);
expect(texts).toContain("done");
expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
expect(call?.provider).toBe("anthropic");
expect(call?.model).toBe("claude-opus-4-5");
vi.mocked(runEmbeddedPiAgent).mockClear();
mockEmbeddedTextResult("done");
const inlineThinkRes = await getReplyFromConfig(
{
Body: "please sync /think:high now",
From: "+1004",
To: "+2000",
},
{},
makeWhatsAppDirectiveConfig(home, { model: { primary: "anthropic/claude-opus-4-5" } }),
);
expect(replyTexts(inlineThinkRes)).toContain("done");
expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
});
});
it("passes elevated defaults when sender is approved", async () => {
await withTempHome(async (home) => {
mockEmbeddedTextResult("done");
await getReplyFromConfig(
{
Body: "hello",
From: "+1004",
To: "+2000",
Provider: "whatsapp",
SenderE164: "+1004",
},
{},
makeWhatsAppDirectiveConfig(
home,
{ model: { primary: "anthropic/claude-opus-4-5" } },
{
tools: {
elevated: {
allowFrom: { whatsapp: ["+1004"] },
},
},
},
),
);
expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
expect(call?.bashElevated).toEqual({
enabled: true,
allowed: true,
defaultLevel: "on",
});
});
});
it("persists /reasoning off on discord even when model defaults reasoning on", async () => {
await withTempHome(async (home) => {
const storePath = sessionStorePath(home);
mockEmbeddedTextResult("done");
vi.mocked(loadModelCatalog).mockResolvedValue([
{
id: "x-ai/grok-4.1-fast",
name: "Grok 4.1 Fast",
provider: "openrouter",
reasoning: true,
},
]);
const config = makeWhatsAppDirectiveConfig(
home,
{
model: "openrouter/x-ai/grok-4.1-fast",
},
{
channels: {
discord: { allowFrom: ["*"] },
},
session: { store: storePath },
},
);
const offRes = await getReplyFromConfig(
{
Body: "/reasoning off",
From: "discord:user:1004",
To: "channel:general",
Provider: "discord",
Surface: "discord",
CommandSource: "text",
CommandAuthorized: true,
},
{},
config,
);
expect(replyText(offRes)).toContain("Reasoning visibility disabled.");
const store = loadSessionStore(storePath);
const entry = Object.values(store)[0];
expect(entry?.reasoningLevel).toBe("off");
await getReplyFromConfig(
{
Body: "hello",
From: "discord:user:1004",
To: "channel:general",
Provider: "discord",
Surface: "discord",
CommandSource: "text",
CommandAuthorized: true,
},
{},
config,
);
expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
expect(call?.reasoningLevel).toBe("off");
});
});
it("handles reply_to_current tags and explicit reply_to precedence", async () => {
await withTempHome(async (home) => {
for (const replyTag of ["[[reply_to_current]]", "[[ reply_to_current ]]"]) {
const payload = await runReplyToCurrentCase(home, `hello ${replyTag}`);
expect(payload?.text).toBe("hello");
expect(payload?.replyToId).toBe("msg-123");
}
vi.mocked(runEmbeddedPiAgent).mockResolvedValue(
makeEmbeddedTextResult("hi [[reply_to_current]] [[reply_to:abc-456]]"),
);
const res = await getReplyFromConfig(
{
Body: "ping",
From: "+1004",
To: "+2000",
MessageSid: "msg-123",
},
{},
makeWhatsAppDirectiveConfig(home, { model: { primary: "anthropic/claude-opus-4-5" } }),
);
const payload = Array.isArray(res) ? res[0] : res;
expect(payload?.text).toBe("hi");
expect(payload?.replyToId).toBe("abc-456");
});
});
});