fix(skills): guard against skills prompt bloat
This commit is contained in:
committed by
Peter Steinberger
parent
4f5b9da503
commit
5b3873add4
@@ -67,4 +67,170 @@ describe("buildWorkspaceSkillSnapshot", () => {
|
||||
"visible-skill",
|
||||
]);
|
||||
});
|
||||
|
||||
it("truncates the skills prompt when it exceeds the configured char budget", async () => {
|
||||
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-"));
|
||||
|
||||
// Make a bunch of skills with very long descriptions.
|
||||
for (let i = 0; i < 25; i += 1) {
|
||||
const name = `skill-${String(i).padStart(2, "0")}`;
|
||||
await _writeSkill({
|
||||
dir: path.join(workspaceDir, "skills", name),
|
||||
name,
|
||||
description: "x".repeat(5000),
|
||||
});
|
||||
}
|
||||
|
||||
const snapshot = buildWorkspaceSkillSnapshot(workspaceDir, {
|
||||
config: {
|
||||
skills: {
|
||||
limits: {
|
||||
maxSkillsInPrompt: 100,
|
||||
maxSkillsPromptChars: 1500,
|
||||
},
|
||||
},
|
||||
},
|
||||
managedSkillsDir: path.join(workspaceDir, ".managed"),
|
||||
bundledSkillsDir: path.join(workspaceDir, ".bundled"),
|
||||
});
|
||||
|
||||
expect(snapshot.prompt).toContain("⚠️ Skills truncated");
|
||||
expect(snapshot.prompt.length).toBeLessThan(5000);
|
||||
});
|
||||
|
||||
it("limits discovery for nested repo-style skills roots (dir/skills/*)", async () => {
|
||||
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-"));
|
||||
const repoDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-skills-repo-"));
|
||||
|
||||
for (let i = 0; i < 20; i += 1) {
|
||||
const name = `repo-skill-${String(i).padStart(2, "0")}`;
|
||||
await _writeSkill({
|
||||
dir: path.join(repoDir, "skills", name),
|
||||
name,
|
||||
description: `Desc ${i}`,
|
||||
});
|
||||
}
|
||||
|
||||
const snapshot = buildWorkspaceSkillSnapshot(workspaceDir, {
|
||||
config: {
|
||||
skills: {
|
||||
load: {
|
||||
extraDirs: [repoDir],
|
||||
},
|
||||
limits: {
|
||||
maxCandidatesPerRoot: 5,
|
||||
maxSkillsLoadedPerSource: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
managedSkillsDir: path.join(workspaceDir, ".managed"),
|
||||
bundledSkillsDir: path.join(workspaceDir, ".bundled"),
|
||||
});
|
||||
|
||||
// We should only have loaded a small subset.
|
||||
expect(snapshot.skills.length).toBeLessThanOrEqual(5);
|
||||
expect(snapshot.prompt).toContain("repo-skill-00");
|
||||
expect(snapshot.prompt).not.toContain("repo-skill-19");
|
||||
});
|
||||
|
||||
it("skips skills whose SKILL.md exceeds maxSkillFileBytes", async () => {
|
||||
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-"));
|
||||
|
||||
await _writeSkill({
|
||||
dir: path.join(workspaceDir, "skills", "small-skill"),
|
||||
name: "small-skill",
|
||||
description: "Small",
|
||||
});
|
||||
|
||||
await _writeSkill({
|
||||
dir: path.join(workspaceDir, "skills", "big-skill"),
|
||||
name: "big-skill",
|
||||
description: "Big",
|
||||
body: "x".repeat(50_000),
|
||||
});
|
||||
|
||||
const snapshot = buildWorkspaceSkillSnapshot(workspaceDir, {
|
||||
config: {
|
||||
skills: {
|
||||
limits: {
|
||||
maxSkillFileBytes: 1000,
|
||||
},
|
||||
},
|
||||
},
|
||||
managedSkillsDir: path.join(workspaceDir, ".managed"),
|
||||
bundledSkillsDir: path.join(workspaceDir, ".bundled"),
|
||||
});
|
||||
|
||||
expect(snapshot.skills.map((s) => s.name)).toContain("small-skill");
|
||||
expect(snapshot.skills.map((s) => s.name)).not.toContain("big-skill");
|
||||
expect(snapshot.prompt).toContain("small-skill");
|
||||
expect(snapshot.prompt).not.toContain("big-skill");
|
||||
});
|
||||
|
||||
it("detects nested skills roots beyond the first 25 entries", async () => {
|
||||
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-"));
|
||||
const repoDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-skills-repo-"));
|
||||
|
||||
// Create 30 nested dirs, but only the last one is an actual skill.
|
||||
for (let i = 0; i < 30; i += 1) {
|
||||
await fs.mkdir(path.join(repoDir, "skills", `entry-${String(i).padStart(2, "0")}`), {
|
||||
recursive: true,
|
||||
});
|
||||
}
|
||||
|
||||
await _writeSkill({
|
||||
dir: path.join(repoDir, "skills", "entry-29"),
|
||||
name: "late-skill",
|
||||
description: "Nested skill discovered late",
|
||||
});
|
||||
|
||||
const snapshot = buildWorkspaceSkillSnapshot(workspaceDir, {
|
||||
config: {
|
||||
skills: {
|
||||
load: {
|
||||
extraDirs: [repoDir],
|
||||
},
|
||||
limits: {
|
||||
maxCandidatesPerRoot: 30,
|
||||
maxSkillsLoadedPerSource: 30,
|
||||
},
|
||||
},
|
||||
},
|
||||
managedSkillsDir: path.join(workspaceDir, ".managed"),
|
||||
bundledSkillsDir: path.join(workspaceDir, ".bundled"),
|
||||
});
|
||||
|
||||
expect(snapshot.skills.map((s) => s.name)).toContain("late-skill");
|
||||
expect(snapshot.prompt).toContain("late-skill");
|
||||
});
|
||||
|
||||
it("enforces maxSkillFileBytes for root-level SKILL.md", async () => {
|
||||
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-"));
|
||||
const rootSkillDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-root-skill-"));
|
||||
|
||||
await _writeSkill({
|
||||
dir: rootSkillDir,
|
||||
name: "root-big-skill",
|
||||
description: "Big",
|
||||
body: "x".repeat(50_000),
|
||||
});
|
||||
|
||||
const snapshot = buildWorkspaceSkillSnapshot(workspaceDir, {
|
||||
config: {
|
||||
skills: {
|
||||
load: {
|
||||
extraDirs: [rootSkillDir],
|
||||
},
|
||||
limits: {
|
||||
maxSkillFileBytes: 1000,
|
||||
},
|
||||
},
|
||||
},
|
||||
managedSkillsDir: path.join(workspaceDir, ".managed"),
|
||||
bundledSkillsDir: path.join(workspaceDir, ".bundled"),
|
||||
});
|
||||
|
||||
expect(snapshot.skills.map((s) => s.name)).not.toContain("root-big-skill");
|
||||
expect(snapshot.prompt).not.toContain("root-big-skill");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -72,6 +72,12 @@ const SKILL_COMMAND_FALLBACK = "skill";
|
||||
// Discord command descriptions must be ≤100 characters
|
||||
const SKILL_COMMAND_DESCRIPTION_MAX_LENGTH = 100;
|
||||
|
||||
const DEFAULT_MAX_CANDIDATES_PER_ROOT = 300;
|
||||
const DEFAULT_MAX_SKILLS_LOADED_PER_SOURCE = 200;
|
||||
const DEFAULT_MAX_SKILLS_IN_PROMPT = 150;
|
||||
const DEFAULT_MAX_SKILLS_PROMPT_CHARS = 30_000;
|
||||
const DEFAULT_MAX_SKILL_FILE_BYTES = 256_000;
|
||||
|
||||
function sanitizeSkillCommandName(raw: string): string {
|
||||
const normalized = raw
|
||||
.toLowerCase()
|
||||
@@ -101,6 +107,97 @@ function resolveUniqueSkillCommandName(base: string, used: Set<string>): string
|
||||
return fallback;
|
||||
}
|
||||
|
||||
type ResolvedSkillsLimits = {
|
||||
maxCandidatesPerRoot: number;
|
||||
maxSkillsLoadedPerSource: number;
|
||||
maxSkillsInPrompt: number;
|
||||
maxSkillsPromptChars: number;
|
||||
maxSkillFileBytes: number;
|
||||
};
|
||||
|
||||
function resolveSkillsLimits(config?: OpenClawConfig): ResolvedSkillsLimits {
|
||||
const limits = config?.skills?.limits;
|
||||
return {
|
||||
maxCandidatesPerRoot: limits?.maxCandidatesPerRoot ?? DEFAULT_MAX_CANDIDATES_PER_ROOT,
|
||||
maxSkillsLoadedPerSource:
|
||||
limits?.maxSkillsLoadedPerSource ?? DEFAULT_MAX_SKILLS_LOADED_PER_SOURCE,
|
||||
maxSkillsInPrompt: limits?.maxSkillsInPrompt ?? DEFAULT_MAX_SKILLS_IN_PROMPT,
|
||||
maxSkillsPromptChars: limits?.maxSkillsPromptChars ?? DEFAULT_MAX_SKILLS_PROMPT_CHARS,
|
||||
maxSkillFileBytes: limits?.maxSkillFileBytes ?? DEFAULT_MAX_SKILL_FILE_BYTES,
|
||||
};
|
||||
}
|
||||
|
||||
function listChildDirectories(dir: string): string[] {
|
||||
try {
|
||||
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
||||
const dirs: string[] = [];
|
||||
for (const entry of entries) {
|
||||
if (entry.name.startsWith(".")) continue;
|
||||
if (entry.name === "node_modules") continue;
|
||||
const fullPath = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
dirs.push(entry.name);
|
||||
continue;
|
||||
}
|
||||
if (entry.isSymbolicLink()) {
|
||||
try {
|
||||
if (fs.statSync(fullPath).isDirectory()) {
|
||||
dirs.push(entry.name);
|
||||
}
|
||||
} catch {
|
||||
// ignore broken symlinks
|
||||
}
|
||||
}
|
||||
}
|
||||
return dirs;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function resolveNestedSkillsRoot(
|
||||
dir: string,
|
||||
opts?: {
|
||||
maxEntriesToScan?: number;
|
||||
},
|
||||
): { baseDir: string; note?: string } {
|
||||
const nested = path.join(dir, "skills");
|
||||
try {
|
||||
if (!fs.existsSync(nested) || !fs.statSync(nested).isDirectory()) {
|
||||
return { baseDir: dir };
|
||||
}
|
||||
} catch {
|
||||
return { baseDir: dir };
|
||||
}
|
||||
|
||||
// Heuristic: if `dir/skills/*/SKILL.md` exists for any entry, treat `dir/skills` as the real root.
|
||||
// Note: don't stop at 25, but keep a cap to avoid pathological scans.
|
||||
const nestedDirs = listChildDirectories(nested);
|
||||
const scanLimit = Math.max(0, opts?.maxEntriesToScan ?? 100);
|
||||
const toScan = scanLimit === 0 ? [] : nestedDirs.slice(0, Math.min(nestedDirs.length, scanLimit));
|
||||
|
||||
for (const name of toScan) {
|
||||
const skillMd = path.join(nested, name, "SKILL.md");
|
||||
if (fs.existsSync(skillMd)) {
|
||||
return { baseDir: nested, note: `Detected nested skills root at ${nested}` };
|
||||
}
|
||||
}
|
||||
return { baseDir: dir };
|
||||
}
|
||||
|
||||
function unwrapLoadedSkills(loaded: unknown): Skill[] {
|
||||
if (Array.isArray(loaded)) {
|
||||
return loaded as Skill[];
|
||||
}
|
||||
if (loaded && typeof loaded === "object" && "skills" in loaded) {
|
||||
const skills = (loaded as { skills?: unknown }).skills;
|
||||
if (Array.isArray(skills)) {
|
||||
return skills as Skill[];
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function loadSkillEntries(
|
||||
workspaceDir: string,
|
||||
opts?: {
|
||||
@@ -109,20 +206,99 @@ function loadSkillEntries(
|
||||
bundledSkillsDir?: string;
|
||||
},
|
||||
): SkillEntry[] {
|
||||
const limits = resolveSkillsLimits(opts?.config);
|
||||
|
||||
const loadSkills = (params: { dir: string; source: string }): Skill[] => {
|
||||
const loaded = loadSkillsFromDir(params);
|
||||
if (Array.isArray(loaded)) {
|
||||
return loaded;
|
||||
const resolved = resolveNestedSkillsRoot(params.dir, {
|
||||
maxEntriesToScan: limits.maxCandidatesPerRoot,
|
||||
});
|
||||
const baseDir = resolved.baseDir;
|
||||
|
||||
// If the root itself is a skill directory, just load it directly (but enforce size cap).
|
||||
const rootSkillMd = path.join(baseDir, "SKILL.md");
|
||||
if (fs.existsSync(rootSkillMd)) {
|
||||
try {
|
||||
const size = fs.statSync(rootSkillMd).size;
|
||||
if (size > limits.maxSkillFileBytes) {
|
||||
skillsLogger.warn("Skipping skills root due to oversized SKILL.md.", {
|
||||
dir: baseDir,
|
||||
filePath: rootSkillMd,
|
||||
size,
|
||||
maxSkillFileBytes: limits.maxSkillFileBytes,
|
||||
});
|
||||
return [];
|
||||
}
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
||||
const loaded = loadSkillsFromDir({ dir: baseDir, source: params.source });
|
||||
return unwrapLoadedSkills(loaded);
|
||||
}
|
||||
if (
|
||||
loaded &&
|
||||
typeof loaded === "object" &&
|
||||
"skills" in loaded &&
|
||||
Array.isArray((loaded as { skills?: unknown }).skills)
|
||||
) {
|
||||
return (loaded as { skills: Skill[] }).skills;
|
||||
|
||||
const childDirs = listChildDirectories(baseDir);
|
||||
const suspicious = childDirs.length > limits.maxCandidatesPerRoot;
|
||||
|
||||
const maxCandidates = Math.max(0, limits.maxSkillsLoadedPerSource);
|
||||
const limitedChildren = childDirs.slice().sort().slice(0, maxCandidates);
|
||||
|
||||
if (suspicious) {
|
||||
skillsLogger.warn("Skills root looks suspiciously large, truncating discovery.", {
|
||||
dir: params.dir,
|
||||
baseDir,
|
||||
childDirCount: childDirs.length,
|
||||
maxCandidatesPerRoot: limits.maxCandidatesPerRoot,
|
||||
maxSkillsLoadedPerSource: limits.maxSkillsLoadedPerSource,
|
||||
});
|
||||
} else if (childDirs.length > maxCandidates) {
|
||||
skillsLogger.warn("Skills root has many entries, truncating discovery.", {
|
||||
dir: params.dir,
|
||||
baseDir,
|
||||
childDirCount: childDirs.length,
|
||||
maxSkillsLoadedPerSource: limits.maxSkillsLoadedPerSource,
|
||||
});
|
||||
}
|
||||
return [];
|
||||
|
||||
const loadedSkills: Skill[] = [];
|
||||
|
||||
// Only consider immediate subfolders that look like skills (have SKILL.md) and are under size cap.
|
||||
for (const name of limitedChildren) {
|
||||
const skillDir = path.join(baseDir, name);
|
||||
const skillMd = path.join(skillDir, "SKILL.md");
|
||||
if (!fs.existsSync(skillMd)) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const size = fs.statSync(skillMd).size;
|
||||
if (size > limits.maxSkillFileBytes) {
|
||||
skillsLogger.warn("Skipping skill due to oversized SKILL.md.", {
|
||||
skill: name,
|
||||
filePath: skillMd,
|
||||
size,
|
||||
maxSkillFileBytes: limits.maxSkillFileBytes,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
const loaded = loadSkillsFromDir({ dir: skillDir, source: params.source });
|
||||
loadedSkills.push(...unwrapLoadedSkills(loaded));
|
||||
|
||||
if (loadedSkills.length >= limits.maxSkillsLoadedPerSource) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (loadedSkills.length > limits.maxSkillsLoadedPerSource) {
|
||||
return loadedSkills
|
||||
.slice()
|
||||
.sort((a, b) => a.name.localeCompare(b.name))
|
||||
.slice(0, limits.maxSkillsLoadedPerSource);
|
||||
}
|
||||
|
||||
return loadedSkills;
|
||||
};
|
||||
|
||||
const managedSkillsDir = opts?.managedSkillsDir ?? path.join(CONFIG_DIR, "skills");
|
||||
@@ -209,6 +385,44 @@ function loadSkillEntries(
|
||||
return skillEntries;
|
||||
}
|
||||
|
||||
function applySkillsPromptLimits(params: { skills: Skill[]; config?: OpenClawConfig }): {
|
||||
skillsForPrompt: Skill[];
|
||||
truncated: boolean;
|
||||
truncatedReason: "count" | "chars" | null;
|
||||
} {
|
||||
const limits = resolveSkillsLimits(params.config);
|
||||
const total = params.skills.length;
|
||||
const byCount = params.skills.slice(0, Math.max(0, limits.maxSkillsInPrompt));
|
||||
|
||||
let skillsForPrompt = byCount;
|
||||
let truncated = total > byCount.length;
|
||||
let truncatedReason: "count" | "chars" | null = truncated ? "count" : null;
|
||||
|
||||
const fits = (skills: Skill[]): boolean => {
|
||||
const block = formatSkillsForPrompt(skills);
|
||||
return block.length <= limits.maxSkillsPromptChars;
|
||||
};
|
||||
|
||||
if (!fits(skillsForPrompt)) {
|
||||
// Binary search the largest prefix that fits in the char budget.
|
||||
let lo = 0;
|
||||
let hi = skillsForPrompt.length;
|
||||
while (lo < hi) {
|
||||
const mid = Math.ceil((lo + hi) / 2);
|
||||
if (fits(skillsForPrompt.slice(0, mid))) {
|
||||
lo = mid;
|
||||
} else {
|
||||
hi = mid - 1;
|
||||
}
|
||||
}
|
||||
skillsForPrompt = skillsForPrompt.slice(0, lo);
|
||||
truncated = true;
|
||||
truncatedReason = "chars";
|
||||
}
|
||||
|
||||
return { skillsForPrompt, truncated, truncatedReason };
|
||||
}
|
||||
|
||||
export function buildWorkspaceSkillSnapshot(
|
||||
workspaceDir: string,
|
||||
opts?: {
|
||||
@@ -234,7 +448,19 @@ export function buildWorkspaceSkillSnapshot(
|
||||
);
|
||||
const resolvedSkills = promptEntries.map((entry) => entry.skill);
|
||||
const remoteNote = opts?.eligibility?.remote?.note?.trim();
|
||||
const prompt = [remoteNote, formatSkillsForPrompt(resolvedSkills)].filter(Boolean).join("\n");
|
||||
|
||||
const { skillsForPrompt, truncated } = applySkillsPromptLimits({
|
||||
skills: resolvedSkills,
|
||||
config: opts?.config,
|
||||
});
|
||||
|
||||
const truncationNote = truncated
|
||||
? `⚠️ Skills truncated: included ${skillsForPrompt.length} of ${resolvedSkills.length}. Run \`openclaw skills check\` to audit.`
|
||||
: "";
|
||||
|
||||
const prompt = [remoteNote, truncationNote, formatSkillsForPrompt(skillsForPrompt)]
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
const skillFilter = normalizeSkillFilter(opts?.skillFilter);
|
||||
return {
|
||||
prompt,
|
||||
@@ -271,7 +497,15 @@ export function buildWorkspaceSkillsPrompt(
|
||||
(entry) => entry.invocation?.disableModelInvocation !== true,
|
||||
);
|
||||
const remoteNote = opts?.eligibility?.remote?.note?.trim();
|
||||
return [remoteNote, formatSkillsForPrompt(promptEntries.map((entry) => entry.skill))]
|
||||
const resolvedSkills = promptEntries.map((entry) => entry.skill);
|
||||
const { skillsForPrompt, truncated } = applySkillsPromptLimits({
|
||||
skills: resolvedSkills,
|
||||
config: opts?.config,
|
||||
});
|
||||
const truncationNote = truncated
|
||||
? `⚠️ Skills truncated: included ${skillsForPrompt.length} of ${resolvedSkills.length}. Run \`openclaw skills check\` to audit.`
|
||||
: "";
|
||||
return [remoteNote, truncationNote, formatSkillsForPrompt(skillsForPrompt)]
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
@@ -22,10 +22,24 @@ export type SkillsInstallConfig = {
|
||||
nodeManager?: "npm" | "pnpm" | "yarn" | "bun";
|
||||
};
|
||||
|
||||
export type SkillsLimitsConfig = {
|
||||
/** Max number of immediate child directories to consider under a skills root before treating it as suspicious. */
|
||||
maxCandidatesPerRoot?: number;
|
||||
/** Max number of skills to load per skills source (bundled/managed/workspace/extra). */
|
||||
maxSkillsLoadedPerSource?: number;
|
||||
/** Max number of skills to include in the model-facing skills prompt. */
|
||||
maxSkillsInPrompt?: number;
|
||||
/** Max characters for the model-facing skills prompt block (approx). */
|
||||
maxSkillsPromptChars?: number;
|
||||
/** Max size (bytes) allowed for a SKILL.md file to be considered. */
|
||||
maxSkillFileBytes?: number;
|
||||
};
|
||||
|
||||
export type SkillsConfig = {
|
||||
/** Optional bundled-skill allowlist (only affects bundled skills). */
|
||||
allowBundled?: string[];
|
||||
load?: SkillsLoadConfig;
|
||||
install?: SkillsInstallConfig;
|
||||
limits?: SkillsLimitsConfig;
|
||||
entries?: Record<string, SkillConfig>;
|
||||
};
|
||||
|
||||
@@ -579,6 +579,16 @@ export const OpenClawSchema = z
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
limits: z
|
||||
.object({
|
||||
maxCandidatesPerRoot: z.number().int().min(1).optional(),
|
||||
maxSkillsLoadedPerSource: z.number().int().min(1).optional(),
|
||||
maxSkillsInPrompt: z.number().int().min(0).optional(),
|
||||
maxSkillsPromptChars: z.number().int().min(0).optional(),
|
||||
maxSkillFileBytes: z.number().int().min(0).optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
entries: z
|
||||
.record(
|
||||
z.string(),
|
||||
|
||||
Reference in New Issue
Block a user