openclaw/src/agents/tools/web-fetch-utils.ts

import { sanitizeHtml, stripInvisibleUnicode } from "./web-fetch-visibility.js";

export type ExtractMode = "markdown" | "text";

const READABILITY_MAX_HTML_CHARS = 1_000_000;
const READABILITY_MAX_ESTIMATED_NESTING_DEPTH = 3_000;

let readabilityDepsPromise:
  | Promise<{
      Readability: typeof import("@mozilla/readability").Readability;
      parseHTML: typeof import("linkedom").parseHTML;
    }>
  | undefined;

async function loadReadabilityDeps(): Promise<{
  Readability: typeof import("@mozilla/readability").Readability;
  parseHTML: typeof import("linkedom").parseHTML;
}> {
  if (!readabilityDepsPromise) {
    readabilityDepsPromise = Promise.all([import("@mozilla/readability"), import("linkedom")]).then(
      ([readability, linkedom]) => ({
        Readability: readability.Readability,
        parseHTML: linkedom.parseHTML,
      }),
    );
  }
  try {
    return await readabilityDepsPromise;
  } catch (error) {
    readabilityDepsPromise = undefined;
    throw error;
  }
}

function decodeEntities(value: string): string {
  return value
    .replace(/&nbsp;/gi, " ")
    .replace(/&amp;/gi, "&")
    .replace(/&quot;/gi, '"')
    .replace(/&#39;/gi, "'")
    .replace(/&lt;/gi, "<")
    .replace(/&gt;/gi, ">")
    .replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)))
    .replace(/&#(\d+);/gi, (_, dec) => String.fromCharCode(Number.parseInt(dec, 10)));
}

function stripTags(value: string): string {
  return decodeEntities(value.replace(/<[^>]+>/g, ""));
}

function normalizeWhitespace(value: string): string {
  return value
    .replace(/\r/g, "")
    .replace(/[ \t]+\n/g, "\n")
    .replace(/\n{3,}/g, "\n\n")
    .replace(/[ \t]{2,}/g, " ")
    .trim();
}

export function htmlToMarkdown(html: string): { text: string; title?: string } {
  const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
  const title = titleMatch ? normalizeWhitespace(stripTags(titleMatch[1])) : undefined;
  let text = html
    .replace(/<script[\s\S]*?<\/script>/gi, "")
    .replace(/<style[\s\S]*?<\/style>/gi, "")
    .replace(/<noscript[\s\S]*?<\/noscript>/gi, "");
  text = text.replace(/<a\s+[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, href, body) => {
    const label = normalizeWhitespace(stripTags(body));
    if (!label) {
      return href;
    }
    return `[${label}](${href})`;
  });
  text = text.replace(/<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi, (_, level, body) => {
    const prefix = "#".repeat(Math.max(1, Math.min(6, Number.parseInt(level, 10))));
    const label = normalizeWhitespace(stripTags(body));
    return `\n${prefix} ${label}\n`;
  });
  text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (_, body) => {
    const label = normalizeWhitespace(stripTags(body));
    return label ? `\n- ${label}` : "";
  });
  text = text
    .replace(/<(br|hr)\s*\/?>/gi, "\n")
    .replace(/<\/(p|div|section|article|header|footer|table|tr|ul|ol)>/gi, "\n");
  text = stripTags(text);
  text = normalizeWhitespace(text);
  return { text, title };
}

export function markdownToText(markdown: string): string {
  let text = markdown;
  text = text.replace(/!\[[^\]]*]\([^)]+\)/g, "");
  text = text.replace(/\[([^\]]+)]\([^)]+\)/g, "$1");
  text = text.replace(/```[\s\S]*?```/g, (block) =>
    block.replace(/```[^\n]*\n?/g, "").replace(/```/g, ""),
  );
  text = text.replace(/`([^`]+)`/g, "$1");
  text = text.replace(/^#{1,6}\s+/gm, "");
  text = text.replace(/^\s*[-*+]\s+/gm, "");
  text = text.replace(/^\s*\d+\.\s+/gm, "");
  return normalizeWhitespace(text);
}

export function truncateText(
  value: string,
  maxChars: number,
): { text: string; truncated: boolean } {
  if (value.length <= maxChars) {
    return { text: value, truncated: false };
  }
  return { text: value.slice(0, maxChars), truncated: true };
}

function exceedsEstimatedHtmlNestingDepth(html: string, maxDepth: number): boolean {
  // Cheap heuristic to skip Readability+DOM parsing on pathological HTML (deep nesting => stack/memory blowups).
  // Not an HTML parser; tuned to catch attacker-controlled "<div><div>..." cases.
  const voidTags = new Set([
    "area",
    "base",
    "br",
    "col",
    "embed",
    "hr",
    "img",
    "input",
    "link",
    "meta",
    "param",
    "source",
    "track",
    "wbr",
  ]);

  let depth = 0;
  const len = html.length;
  for (let i = 0; i < len; i++) {
    if (html.charCodeAt(i) !== 60) {
      continue; // '<'
    }
    const next = html.charCodeAt(i + 1);
    if (next === 33 || next === 63) {
      continue; // <! ...> or <? ...>
    }

    let j = i + 1;
    let closing = false;
    if (html.charCodeAt(j) === 47) {
      closing = true;
      j += 1;
    }

    while (j < len && html.charCodeAt(j) <= 32) {
      j += 1;
    }

    const nameStart = j;
    while (j < len) {
      const c = html.charCodeAt(j);
      const isNameChar =
        (c >= 65 && c <= 90) || // A-Z
        (c >= 97 && c <= 122) || // a-z
        (c >= 48 && c <= 57) || // 0-9
        c === 58 || // :
        c === 45; // -
      if (!isNameChar) {
        break;
      }
      j += 1;
    }

    const tagName = html.slice(nameStart, j).toLowerCase();
    if (!tagName) {
      continue;
    }

    if (closing) {
      depth = Math.max(0, depth - 1);
      continue;
    }

    if (voidTags.has(tagName)) {
      continue;
    }

    // Best-effort self-closing detection: scan a short window for "/>".
    let selfClosing = false;
    for (let k = j; k < len && k < j + 200; k++) {
      const c = html.charCodeAt(k);
      if (c === 62) {
        if (html.charCodeAt(k - 1) === 47) {
          selfClosing = true;
        }
        break;
      }
    }
    if (selfClosing) {
      continue;
    }

    depth += 1;
    if (depth > maxDepth) {
      return true;
    }
  }
  return false;
}

export async function extractReadableContent(params: {
  html: string;
  url: string;
  extractMode: ExtractMode;
}): Promise<{ text: string; title?: string } | null> {
  const cleanHtml = await sanitizeHtml(params.html);
  const fallback = (): { text: string; title?: string } => {
    const rendered = htmlToMarkdown(cleanHtml);
    if (params.extractMode === "text") {
      const text =
        stripInvisibleUnicode(markdownToText(rendered.text)) ||
        stripInvisibleUnicode(normalizeWhitespace(stripTags(cleanHtml)));
      return { text, title: rendered.title };
    }
    return { text: stripInvisibleUnicode(rendered.text), title: rendered.title };
  };
  if (
    cleanHtml.length > READABILITY_MAX_HTML_CHARS ||
    exceedsEstimatedHtmlNestingDepth(cleanHtml, READABILITY_MAX_ESTIMATED_NESTING_DEPTH)
  ) {
    return fallback();
  }
  try {
    const { Readability, parseHTML } = await loadReadabilityDeps();
    const { document } = parseHTML(cleanHtml);
    try {
      (document as { baseURI?: string }).baseURI = params.url;
    } catch {
      // Best-effort base URI for relative links.
    }
    const reader = new Readability(document, { charThreshold: 0 });
    const parsed = reader.parse();
    if (!parsed?.content) {
      return fallback();
    }
    const title = parsed.title || undefined;
    if (params.extractMode === "text") {
      const text = stripInvisibleUnicode(normalizeWhitespace(parsed.textContent ?? ""));
      return text ? { text, title } : fallback();
    }
    const rendered = htmlToMarkdown(parsed.content);
    return { text: stripInvisibleUnicode(rendered.text), title: title ?? rendered.title };
  } catch {
    return fallback();
  }
}
security(web_fetch): strip hidden content to prevent indirect prompt injection (#21074) * security(web_fetch): strip hidden content to prevent indirect prompt injection Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * security(web_fetch): address review feedback and credit author * chore(changelog): credit reporter for web_fetch security fix --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 00:10:26 +01:00			`import { sanitizeHtml, stripInvisibleUnicode } from "./web-fetch-visibility.js";`

refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`export type ExtractMode = "markdown" \| "text";`

fix(web_fetch): cap response body before parsing 2026-02-16 01:19:04 +01:00			`const READABILITY_MAX_HTML_CHARS = 1_000_000;`
			`const READABILITY_MAX_ESTIMATED_NESTING_DEPTH = 3_000;`

perf(web-fetch): memoize readability dependency loading 2026-02-14 01:29:40 +00:00			`let readabilityDepsPromise:`
			`\| Promise<{`
			`Readability: typeof import("@mozilla/readability").Readability;`
			`parseHTML: typeof import("linkedom").parseHTML;`
			`}>`
			`\| undefined;`

			`async function loadReadabilityDeps(): Promise<{`
			`Readability: typeof import("@mozilla/readability").Readability;`
			`parseHTML: typeof import("linkedom").parseHTML;`
			`}> {`
			`if (!readabilityDepsPromise) {`
			`readabilityDepsPromise = Promise.all([import("@mozilla/readability"), import("linkedom")]).then(`
			`([readability, linkedom]) => ({`
			`Readability: readability.Readability,`
			`parseHTML: linkedom.parseHTML,`
			`}),`
			`);`
			`}`
			`try {`
			`return await readabilityDepsPromise;`
			`} catch (error) {`
			`readabilityDepsPromise = undefined;`
			`throw error;`
			`}`
			`}`

refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`function decodeEntities(value: string): string {`
			`return value`
			`.replace(/ /gi, " ")`
			`.replace(/&/gi, "&")`
			`.replace(/"/gi, '"')`
			`.replace(/'/gi, "'")`
			`.replace(/</gi, "<")`
			`.replace(/>/gi, ">")`
			`.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)))`
			`.replace(/&#(\d+);/gi, (_, dec) => String.fromCharCode(Number.parseInt(dec, 10)));`
			`}`

			`function stripTags(value: string): string {`
			`return decodeEntities(value.replace(/<[^>]+>/g, ""));`
			`}`

			`function normalizeWhitespace(value: string): string {`
			`return value`
			`.replace(/\r/g, "")`
			`.replace(/[ \t]+\n/g, "\n")`
			`.replace(/\n{3,}/g, "\n\n")`
			`.replace(/[ \t]{2,}/g, " ")`
			`.trim();`
			`}`

Web: trim HTML error bodies in web_fetch (#1193) * Web: trim HTML error bodies in web_fetch * fix: trim web_fetch HTML error bodies (#1193) (thanks @sebslight) --------- Co-authored-by: Sebastian Slight <sbarrios93@gmail.com> Co-authored-by: Peter Steinberger <steipete@gmail.com> 2026-01-18 19:24:16 -05:00			`export function htmlToMarkdown(html: string): { text: string; title?: string } {`
refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`const titleMatch = html.match(/<title[^>]>([\s\S]?)<\/title>/i);`
			`const title = titleMatch ? normalizeWhitespace(stripTags(titleMatch[1])) : undefined;`
			`let text = html`
			`.replace(/<script[\s\S]*?<\/script>/gi, "")`
			`.replace(/<style[\s\S]*?<\/style>/gi, "")`
			`.replace(/<noscript[\s\S]*?<\/noscript>/gi, "");`
			`text = text.replace(/<a\s+[^>]href=["']([^"']+)["'][^>]>([\s\S]*?)<\/a>/gi, (_, href, body) => {`
			`const label = normalizeWhitespace(stripTags(body));`
chore: Enable "curly" rule to avoid single-statement if confusion/errors. 2026-01-31 16:19:20 +09:00			`if (!label) {`
			`return href;`
			`}`
refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			return `[${label}](${href})`;
			`});`
			`text = text.replace(/<h([1-6])[^>]>([\s\S]?)<\/h\1>/gi, (_, level, body) => {`
			`const prefix = "#".repeat(Math.max(1, Math.min(6, Number.parseInt(level, 10))));`
			`const label = normalizeWhitespace(stripTags(body));`
			return `\n${prefix} ${label}\n`;
			`});`
			`text = text.replace(/<li[^>]>([\s\S]?)<\/li>/gi, (_, body) => {`
			`const label = normalizeWhitespace(stripTags(body));`
			return label ? `\n- ${label}` : "";
			`});`
			`text = text`
			`.replace(/<(br\|hr)\s*\/?>/gi, "\n")`
			`.replace(/<\/(p\|div\|section\|article\|header\|footer\|table\|tr\|ul\|ol)>/gi, "\n");`
			`text = stripTags(text);`
			`text = normalizeWhitespace(text);`
			`return { text, title };`
			`}`

			`export function markdownToText(markdown: string): string {`
			`let text = markdown;`
			`text = text.replace(/!\[[^\]]*]\([^)]+\)/g, "");`
			`text = text.replace(/\[([^\]]+)]\([^)]+\)/g, "$1");`
			text = text.replace(/```[\s\S]*?```/g, (block) =>
			block.replace(/```[^\n]*\n?/g, "").replace(/```/g, ""),
			`);`
			text = text.replace(/`([^`]+)`/g, "$1");
			`text = text.replace(/^#{1,6}\s+/gm, "");`
			`text = text.replace(/^\s[-+]\s+/gm, "");`
			`text = text.replace(/^\s*\d+\.\s+/gm, "");`
			`return normalizeWhitespace(text);`
			`}`

style: apply oxfmt 2026-01-18 02:19:35 +00:00			`export function truncateText(`
			`value: string,`
			`maxChars: number,`
			`): { text: string; truncated: boolean } {`
chore: Enable "curly" rule to avoid single-statement if confusion/errors. 2026-01-31 16:19:20 +09:00			`if (value.length <= maxChars) {`
			`return { text: value, truncated: false };`
			`}`
refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`return { text: value.slice(0, maxChars), truncated: true };`
			`}`

fix(web_fetch): cap response body before parsing 2026-02-16 01:19:04 +01:00			`function exceedsEstimatedHtmlNestingDepth(html: string, maxDepth: number): boolean {`
			`// Cheap heuristic to skip Readability+DOM parsing on pathological HTML (deep nesting => stack/memory blowups).`
			`// Not an HTML parser; tuned to catch attacker-controlled "<div><div>..." cases.`
			`const voidTags = new Set([`
			`"area",`
			`"base",`
			`"br",`
			`"col",`
			`"embed",`
			`"hr",`
			`"img",`
			`"input",`
			`"link",`
			`"meta",`
			`"param",`
			`"source",`
			`"track",`
			`"wbr",`
			`]);`

			`let depth = 0;`
			`const len = html.length;`
			`for (let i = 0; i < len; i++) {`
			`if (html.charCodeAt(i) !== 60) {`
			`continue; // '<'`
			`}`
			`const next = html.charCodeAt(i + 1);`
			`if (next === 33 \|\| next === 63) {`
			`continue; // <! ...> or <? ...>`
			`}`

			`let j = i + 1;`
			`let closing = false;`
			`if (html.charCodeAt(j) === 47) {`
			`closing = true;`
			`j += 1;`
			`}`

			`while (j < len && html.charCodeAt(j) <= 32) {`
			`j += 1;`
			`}`

			`const nameStart = j;`
			`while (j < len) {`
			`const c = html.charCodeAt(j);`
			`const isNameChar =`
			`(c >= 65 && c <= 90) \|\| // A-Z`
			`(c >= 97 && c <= 122) \|\| // a-z`
			`(c >= 48 && c <= 57) \|\| // 0-9`
			`c === 58 \|\| // :`
			`c === 45; // -`
			`if (!isNameChar) {`
			`break;`
			`}`
			`j += 1;`
			`}`

			`const tagName = html.slice(nameStart, j).toLowerCase();`
			`if (!tagName) {`
			`continue;`
			`}`

			`if (closing) {`
			`depth = Math.max(0, depth - 1);`
			`continue;`
			`}`

			`if (voidTags.has(tagName)) {`
			`continue;`
			`}`

			`// Best-effort self-closing detection: scan a short window for "/>".`
			`let selfClosing = false;`
			`for (let k = j; k < len && k < j + 200; k++) {`
			`const c = html.charCodeAt(k);`
			`if (c === 62) {`
			`if (html.charCodeAt(k - 1) === 47) {`
			`selfClosing = true;`
			`}`
			`break;`
			`}`
			`}`
			`if (selfClosing) {`
			`continue;`
			`}`

			`depth += 1;`
			`if (depth > maxDepth) {`
			`return true;`
			`}`
			`}`
			`return false;`
			`}`

refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`export async function extractReadableContent(params: {`
			`html: string;`
			`url: string;`
			`extractMode: ExtractMode;`
			`}): Promise<{ text: string; title?: string } \| null> {`
security(web_fetch): strip hidden content to prevent indirect prompt injection (#21074) * security(web_fetch): strip hidden content to prevent indirect prompt injection Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * security(web_fetch): address review feedback and credit author * chore(changelog): credit reporter for web_fetch security fix --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 00:10:26 +01:00			`const cleanHtml = await sanitizeHtml(params.html);`
fix: add readability fallback extraction 2026-01-24 02:14:59 +00:00			`const fallback = (): { text: string; title?: string } => {`
security(web_fetch): strip hidden content to prevent indirect prompt injection (#21074) * security(web_fetch): strip hidden content to prevent indirect prompt injection Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * security(web_fetch): address review feedback and credit author * chore(changelog): credit reporter for web_fetch security fix --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 00:10:26 +01:00			`const rendered = htmlToMarkdown(cleanHtml);`
fix: add readability fallback extraction 2026-01-24 02:14:59 +00:00			`if (params.extractMode === "text") {`
security(web_fetch): strip hidden content to prevent indirect prompt injection (#21074) * security(web_fetch): strip hidden content to prevent indirect prompt injection Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * security(web_fetch): address review feedback and credit author * chore(changelog): credit reporter for web_fetch security fix --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 00:10:26 +01:00			`const text =`
			`stripInvisibleUnicode(markdownToText(rendered.text)) \|\|`
			`stripInvisibleUnicode(normalizeWhitespace(stripTags(cleanHtml)));`
fix: add readability fallback extraction 2026-01-24 02:14:59 +00:00			`return { text, title: rendered.title };`
			`}`
security(web_fetch): strip hidden content to prevent indirect prompt injection (#21074) * security(web_fetch): strip hidden content to prevent indirect prompt injection Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * security(web_fetch): address review feedback and credit author * chore(changelog): credit reporter for web_fetch security fix --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 00:10:26 +01:00			`return { text: stripInvisibleUnicode(rendered.text), title: rendered.title };`
fix: add readability fallback extraction 2026-01-24 02:14:59 +00:00			`};`
fix(web_fetch): cap response body before parsing 2026-02-16 01:19:04 +01:00			`if (`
security(web_fetch): strip hidden content to prevent indirect prompt injection (#21074) * security(web_fetch): strip hidden content to prevent indirect prompt injection Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * security(web_fetch): address review feedback and credit author * chore(changelog): credit reporter for web_fetch security fix --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 00:10:26 +01:00			`cleanHtml.length > READABILITY_MAX_HTML_CHARS \|\|`
			`exceedsEstimatedHtmlNestingDepth(cleanHtml, READABILITY_MAX_ESTIMATED_NESTING_DEPTH)`
fix(web_fetch): cap response body before parsing 2026-02-16 01:19:04 +01:00			`) {`
			`return fallback();`
			`}`
refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`try {`
perf(web-fetch): memoize readability dependency loading 2026-02-14 01:29:40 +00:00			`const { Readability, parseHTML } = await loadReadabilityDeps();`
security(web_fetch): strip hidden content to prevent indirect prompt injection (#21074) * security(web_fetch): strip hidden content to prevent indirect prompt injection Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * security(web_fetch): address review feedback and credit author * chore(changelog): credit reporter for web_fetch security fix --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 00:10:26 +01:00			`const { document } = parseHTML(cleanHtml);`
refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`try {`
			`(document as { baseURI?: string }).baseURI = params.url;`
			`} catch {`
			`// Best-effort base URI for relative links.`
			`}`
			`const reader = new Readability(document, { charThreshold: 0 });`
			`const parsed = reader.parse();`
chore: Enable "curly" rule to avoid single-statement if confusion/errors. 2026-01-31 16:19:20 +09:00			`if (!parsed?.content) {`
			`return fallback();`
			`}`
refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`const title = parsed.title \|\| undefined;`
			`if (params.extractMode === "text") {`
security(web_fetch): strip hidden content to prevent indirect prompt injection (#21074) * security(web_fetch): strip hidden content to prevent indirect prompt injection Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * security(web_fetch): address review feedback and credit author * chore(changelog): credit reporter for web_fetch security fix --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 00:10:26 +01:00			`const text = stripInvisibleUnicode(normalizeWhitespace(parsed.textContent ?? ""));`
fix: add readability fallback extraction 2026-01-24 02:14:59 +00:00			`return text ? { text, title } : fallback();`
refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`}`
			`const rendered = htmlToMarkdown(parsed.content);`
security(web_fetch): strip hidden content to prevent indirect prompt injection (#21074) * security(web_fetch): strip hidden content to prevent indirect prompt injection Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * security(web_fetch): address review feedback and credit author * chore(changelog): credit reporter for web_fetch security fix --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 00:10:26 +01:00			`return { text: stripInvisibleUnicode(rendered.text), title: title ?? rendered.title };`
refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`} catch {`
fix: add readability fallback extraction 2026-01-24 02:14:59 +00:00			`return fallback();`
refactor: split web tools and docs 2026-01-18 01:42:40 +00:00			`}`
			`}`