fix(telegram): chunk long html outbound messages (#42240)

Merged via squash.

Prepared head SHA: 4d79c41ddf33f44749355641936f8c425224ec6f
Co-authored-by: obviyus <22031114+obviyus@users.noreply.github.com>
Co-authored-by: obviyus <22031114+obviyus@users.noreply.github.com>
Reviewed-by: @obviyus
This commit is contained in:
Ayaan Zaidi
2026-03-10 22:53:04 +05:30
committed by GitHub
parent 8bf64f219a
commit 3b582f1d54
5 changed files with 519 additions and 48 deletions

View File

@@ -66,6 +66,7 @@ Docs: https://docs.openclaw.ai
- Telegram/direct delivery: bridge direct delivery sends to internal `message:sent` hooks so internal hook listeners observe successful Telegram deliveries. (#40185) Thanks @vincentkoc.
- Plugins/global hook runner: harden singleton state handling so shared global hook runner reuse does not leak or corrupt runner state across executions. (#40184) Thanks @vincentkoc.
- Agents/fallback: recognize Poe `402 You've used up your points!` billing errors so configured model fallbacks trigger instead of surfacing the raw provider error. (#42278) Thanks @CryUshio.
- Telegram/outbound HTML sends: chunk long HTML-mode messages, preserve plain-text fallback and silent-delivery params across retries, and cut over to plain text when HTML chunk planning cannot safely preserve the full message. (#42240) thanks @obviyus.
## 2026.3.8

View File

@@ -1,5 +1,5 @@
import { describe, expect, it } from "vitest";
import { markdownToTelegramHtml } from "./format.js";
import { markdownToTelegramHtml, splitTelegramHtmlChunks } from "./format.js";
describe("markdownToTelegramHtml", () => {
it("handles core markdown-to-telegram conversions", () => {
@@ -112,4 +112,26 @@ describe("markdownToTelegramHtml", () => {
expect(res).toContain("<tg-spoiler>secret</tg-spoiler>");
expect(res).toContain("trailing ||");
});
it("splits long multiline html text without breaking balanced tags", () => {
const chunks = splitTelegramHtmlChunks(`<b>${"A\n".repeat(2500)}</b>`, 4000);
expect(chunks.length).toBeGreaterThan(1);
expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true);
expect(chunks[0]).toMatch(/^<b>[\s\S]*<\/b>$/);
expect(chunks[1]).toMatch(/^<b>[\s\S]*<\/b>$/);
});
it("fails loudly when a leading entity cannot fit inside a chunk", () => {
expect(() => splitTelegramHtmlChunks(`A&amp;${"B".repeat(20)}`, 4)).toThrow(/leading entity/i);
});
it("treats malformed leading ampersands as plain text when chunking html", () => {
const chunks = splitTelegramHtmlChunks(`&${"A".repeat(5000)}`, 4000);
expect(chunks.length).toBeGreaterThan(1);
expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true);
});
it("fails loudly when tag overhead leaves no room for text", () => {
expect(() => splitTelegramHtmlChunks("<b><i><u>x</u></i></b>", 10)).toThrow(/tag overhead/i);
});
});

View File

@@ -241,6 +241,217 @@ export function renderTelegramHtmlText(
return markdownToTelegramHtml(text, { tableMode: options.tableMode });
}
type TelegramHtmlTag = {
name: string;
openTag: string;
closeTag: string;
};
const TELEGRAM_SELF_CLOSING_HTML_TAGS = new Set(["br"]);
function buildTelegramHtmlOpenPrefix(tags: TelegramHtmlTag[]): string {
return tags.map((tag) => tag.openTag).join("");
}
function buildTelegramHtmlCloseSuffix(tags: TelegramHtmlTag[]): string {
return tags
.slice()
.toReversed()
.map((tag) => tag.closeTag)
.join("");
}
function buildTelegramHtmlCloseSuffixLength(tags: TelegramHtmlTag[]): number {
return tags.reduce((total, tag) => total + tag.closeTag.length, 0);
}
function findTelegramHtmlEntityEnd(text: string, start: number): number {
if (text[start] !== "&") {
return -1;
}
let index = start + 1;
if (index >= text.length) {
return -1;
}
if (text[index] === "#") {
index += 1;
if (index >= text.length) {
return -1;
}
const isHex = text[index] === "x" || text[index] === "X";
if (isHex) {
index += 1;
const hexStart = index;
while (/[0-9A-Fa-f]/.test(text[index] ?? "")) {
index += 1;
}
if (index === hexStart) {
return -1;
}
} else {
const digitStart = index;
while (/[0-9]/.test(text[index] ?? "")) {
index += 1;
}
if (index === digitStart) {
return -1;
}
}
} else {
const nameStart = index;
while (/[A-Za-z0-9]/.test(text[index] ?? "")) {
index += 1;
}
if (index === nameStart) {
return -1;
}
}
return text[index] === ";" ? index : -1;
}
function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number {
if (text.length <= maxLength) {
return text.length;
}
const normalizedMaxLength = Math.max(1, Math.floor(maxLength));
const lastAmpersand = text.lastIndexOf("&", normalizedMaxLength - 1);
if (lastAmpersand === -1) {
return normalizedMaxLength;
}
const lastSemicolon = text.lastIndexOf(";", normalizedMaxLength - 1);
if (lastAmpersand < lastSemicolon) {
return normalizedMaxLength;
}
const entityEnd = findTelegramHtmlEntityEnd(text, lastAmpersand);
if (entityEnd === -1 || entityEnd < normalizedMaxLength) {
return normalizedMaxLength;
}
return lastAmpersand;
}
function popTelegramHtmlTag(tags: TelegramHtmlTag[], name: string): void {
for (let index = tags.length - 1; index >= 0; index -= 1) {
if (tags[index]?.name === name) {
tags.splice(index, 1);
return;
}
}
}
export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
if (!html) {
return [];
}
const normalizedLimit = Math.max(1, Math.floor(limit));
if (html.length <= normalizedLimit) {
return [html];
}
const chunks: string[] = [];
const openTags: TelegramHtmlTag[] = [];
let current = "";
let chunkHasPayload = false;
const resetCurrent = () => {
current = buildTelegramHtmlOpenPrefix(openTags);
chunkHasPayload = false;
};
const flushCurrent = () => {
if (!chunkHasPayload) {
return;
}
chunks.push(`${current}${buildTelegramHtmlCloseSuffix(openTags)}`);
resetCurrent();
};
const appendText = (segment: string) => {
let remaining = segment;
while (remaining.length > 0) {
const available =
normalizedLimit - current.length - buildTelegramHtmlCloseSuffixLength(openTags);
if (available <= 0) {
if (!chunkHasPayload) {
throw new Error(
`Telegram HTML chunk limit exceeded by tag overhead (limit=${normalizedLimit})`,
);
}
flushCurrent();
continue;
}
if (remaining.length <= available) {
current += remaining;
chunkHasPayload = true;
break;
}
const splitAt = findTelegramHtmlSafeSplitIndex(remaining, available);
if (splitAt <= 0) {
if (!chunkHasPayload) {
throw new Error(
`Telegram HTML chunk limit exceeded by leading entity (limit=${normalizedLimit})`,
);
}
flushCurrent();
continue;
}
current += remaining.slice(0, splitAt);
chunkHasPayload = true;
remaining = remaining.slice(splitAt);
flushCurrent();
}
};
resetCurrent();
HTML_TAG_PATTERN.lastIndex = 0;
let lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = HTML_TAG_PATTERN.exec(html)) !== null) {
const tagStart = match.index;
const tagEnd = HTML_TAG_PATTERN.lastIndex;
appendText(html.slice(lastIndex, tagStart));
const rawTag = match[0];
const isClosing = match[1] === "</";
const tagName = match[2].toLowerCase();
const isSelfClosing =
!isClosing &&
(TELEGRAM_SELF_CLOSING_HTML_TAGS.has(tagName) || rawTag.trimEnd().endsWith("/>"));
if (!isClosing) {
const nextCloseLength = isSelfClosing ? 0 : `</${tagName}>`.length;
if (
chunkHasPayload &&
current.length +
rawTag.length +
buildTelegramHtmlCloseSuffixLength(openTags) +
nextCloseLength >
normalizedLimit
) {
flushCurrent();
}
}
current += rawTag;
if (isSelfClosing) {
chunkHasPayload = true;
}
if (isClosing) {
popTelegramHtmlTag(openTags, tagName);
} else if (!isSelfClosing) {
openTags.push({
name: tagName,
openTag: rawTag,
closeTag: `</${tagName}>`,
});
}
lastIndex = tagEnd;
}
appendText(html.slice(lastIndex));
flushCurrent();
return chunks.length > 0 ? chunks : [html];
}
function splitTelegramChunkByHtmlLimit(
chunk: MarkdownIR,
htmlLimit: number,

View File

@@ -1135,6 +1135,31 @@ describe("sendMessageTelegram", () => {
});
});
it("keeps disable_notification on plain-text fallback when silent is true", async () => {
const chatId = "123";
const parseErr = new Error(
"400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9",
);
const sendMessage = vi
.fn()
.mockRejectedValueOnce(parseErr)
.mockResolvedValueOnce({ message_id: 2, chat: { id: chatId } });
const api = { sendMessage } as unknown as {
sendMessage: typeof sendMessage;
};
await sendMessageTelegram(chatId, "_oops_", {
token: "tok",
api,
silent: true,
});
expect(sendMessage.mock.calls).toEqual([
[chatId, "<i>oops</i>", { parse_mode: "HTML", disable_notification: true }],
[chatId, "_oops_", { disable_notification: true }],
]);
});
it("parses message_thread_id from recipient string (telegram:group:...:topic:...)", async () => {
const chatId = "-1001234567890";
const sendMessage = vi.fn().mockResolvedValue({
@@ -1257,6 +1282,120 @@ describe("sendMessageTelegram", () => {
expect.objectContaining({ maxBytes: 42 * 1024 * 1024 }),
);
});
it("chunks long html-mode text and keeps buttons on the last chunk only", async () => {
const chatId = "123";
const htmlText = `<b>${"A".repeat(5000)}</b>`;
const sendMessage = vi
.fn()
.mockResolvedValueOnce({ message_id: 90, chat: { id: chatId } })
.mockResolvedValueOnce({ message_id: 91, chat: { id: chatId } });
const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage };
const res = await sendMessageTelegram(chatId, htmlText, {
token: "tok",
api,
textMode: "html",
buttons: [[{ text: "OK", callback_data: "ok" }]],
});
expect(sendMessage).toHaveBeenCalledTimes(2);
const firstCall = sendMessage.mock.calls[0];
const secondCall = sendMessage.mock.calls[1];
expect(firstCall).toBeDefined();
expect(secondCall).toBeDefined();
expect((firstCall[1] as string).length).toBeLessThanOrEqual(4000);
expect((secondCall[1] as string).length).toBeLessThanOrEqual(4000);
expect(firstCall[2]?.reply_markup).toBeUndefined();
expect(secondCall[2]?.reply_markup).toEqual({
inline_keyboard: [[{ text: "OK", callback_data: "ok" }]],
});
expect(res.messageId).toBe("91");
});
it("preserves caller plain-text fallback across chunked html parse retries", async () => {
const chatId = "123";
const htmlText = `<b>${"A".repeat(5000)}</b>`;
const plainText = `${"P".repeat(2500)}${"Q".repeat(2500)}`;
const parseErr = new Error(
"400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9",
);
const sendMessage = vi
.fn()
.mockRejectedValueOnce(parseErr)
.mockResolvedValueOnce({ message_id: 90, chat: { id: chatId } })
.mockRejectedValueOnce(parseErr)
.mockResolvedValueOnce({ message_id: 91, chat: { id: chatId } });
const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage };
const res = await sendMessageTelegram(chatId, htmlText, {
token: "tok",
api,
textMode: "html",
plainText,
});
expect(sendMessage).toHaveBeenCalledTimes(4);
const plainFallbackCalls = [sendMessage.mock.calls[1], sendMessage.mock.calls[3]];
expect(plainFallbackCalls.map((call) => String(call?.[1] ?? "")).join("")).toBe(plainText);
expect(plainFallbackCalls.every((call) => !String(call?.[1] ?? "").includes("<"))).toBe(true);
expect(res.messageId).toBe("91");
});
it("keeps malformed leading ampersands on the chunked plain-text fallback path", async () => {
const chatId = "123";
const htmlText = `&${"A".repeat(5000)}`;
const plainText = "fallback!!";
const parseErr = new Error(
"400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 0",
);
const sendMessage = vi
.fn()
.mockRejectedValueOnce(parseErr)
.mockResolvedValueOnce({ message_id: 92, chat: { id: chatId } })
.mockRejectedValueOnce(parseErr)
.mockResolvedValueOnce({ message_id: 93, chat: { id: chatId } });
const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage };
const res = await sendMessageTelegram(chatId, htmlText, {
token: "tok",
api,
textMode: "html",
plainText,
});
expect(sendMessage).toHaveBeenCalledTimes(4);
expect(String(sendMessage.mock.calls[0]?.[1] ?? "")).toMatch(/^&/);
const plainFallbackCalls = [sendMessage.mock.calls[1], sendMessage.mock.calls[3]];
expect(plainFallbackCalls.map((call) => String(call?.[1] ?? "")).join("")).toBe(plainText);
expect(plainFallbackCalls.every((call) => String(call?.[1] ?? "").length > 0)).toBe(true);
expect(res.messageId).toBe("93");
});
it("cuts over to plain text when fallback text needs more chunks than html", async () => {
const chatId = "123";
const htmlText = `<b>${"A".repeat(5000)}</b>`;
const plainText = "P".repeat(9000);
const sendMessage = vi
.fn()
.mockResolvedValueOnce({ message_id: 94, chat: { id: chatId } })
.mockResolvedValueOnce({ message_id: 95, chat: { id: chatId } })
.mockResolvedValueOnce({ message_id: 96, chat: { id: chatId } });
const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage };
const res = await sendMessageTelegram(chatId, htmlText, {
token: "tok",
api,
textMode: "html",
plainText,
});
expect(sendMessage).toHaveBeenCalledTimes(3);
expect(sendMessage.mock.calls.every((call) => call[2]?.parse_mode === undefined)).toBe(true);
expect(sendMessage.mock.calls.map((call) => String(call[1] ?? "")).join("")).toBe(plainText);
expect(res.messageId).toBe("96");
});
});
describe("reactMessageTelegram", () => {

View File

@@ -26,7 +26,7 @@ import { buildTelegramThreadParams, buildTypingThreadParams } from "./bot/helper
import type { TelegramInlineButtons } from "./button-types.js";
import { splitTelegramCaption } from "./caption.js";
import { resolveTelegramFetch } from "./fetch.js";
import { renderTelegramHtmlText } from "./format.js";
import { renderTelegramHtmlText, splitTelegramHtmlChunks } from "./format.js";
import { isRecoverableTelegramNetworkError, isSafeToRetrySendError } from "./network-errors.js";
import { makeProxyFetch } from "./proxy.js";
import { recordSentMessage } from "./sent-message-cache.js";
@@ -108,6 +108,42 @@ function resolveTelegramMessageIdOrThrow(
throw new Error(`Telegram ${context} returned no message_id`);
}
function splitTelegramPlainTextChunks(text: string, limit: number): string[] {
if (!text) {
return [];
}
const normalizedLimit = Math.max(1, Math.floor(limit));
const chunks: string[] = [];
for (let start = 0; start < text.length; start += normalizedLimit) {
chunks.push(text.slice(start, start + normalizedLimit));
}
return chunks;
}
function splitTelegramPlainTextFallback(text: string, chunkCount: number, limit: number): string[] {
if (!text) {
return [];
}
const normalizedLimit = Math.max(1, Math.floor(limit));
const fixedChunks = splitTelegramPlainTextChunks(text, normalizedLimit);
if (chunkCount <= 1 || fixedChunks.length >= chunkCount) {
return fixedChunks;
}
const chunks: string[] = [];
let offset = 0;
for (let index = 0; index < chunkCount; index += 1) {
const remainingChars = text.length - offset;
const remainingChunks = chunkCount - index;
const nextChunkLength =
remainingChunks === 1
? remainingChars
: Math.min(normalizedLimit, Math.ceil(remainingChars / remainingChunks));
chunks.push(text.slice(offset, offset + nextChunkLength));
offset += nextChunkLength;
}
return chunks;
}
const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i;
const THREAD_NOT_FOUND_RE = /400:\s*Bad Request:\s*message thread not found/i;
const MESSAGE_NOT_MODIFIED_RE =
@@ -596,27 +632,49 @@ export async function sendMessageTelegram(
const linkPreviewEnabled = account.config.linkPreview ?? true;
const linkPreviewOptions = linkPreviewEnabled ? undefined : { is_disabled: true };
const sendTelegramText = async (
rawText: string,
type TelegramTextChunk = {
plainText: string;
htmlText?: string;
};
const sendTelegramTextChunk = async (
chunk: TelegramTextChunk,
params?: Record<string, unknown>,
fallbackText?: string,
) => {
return await withTelegramThreadFallback(
params,
"message",
opts.verbose,
async (effectiveParams, label) => {
const htmlText = renderHtmlText(rawText);
const baseParams = effectiveParams ? { ...effectiveParams } : {};
if (linkPreviewOptions) {
baseParams.link_preview_options = linkPreviewOptions;
}
const hasBaseParams = Object.keys(baseParams).length > 0;
const sendParams = {
parse_mode: "HTML" as const,
const plainParams = {
...baseParams,
...(opts.silent === true ? { disable_notification: true } : {}),
};
const hasPlainParams = Object.keys(plainParams).length > 0;
const requestPlain = (retryLabel: string) =>
requestWithChatNotFound(
() =>
hasPlainParams
? api.sendMessage(
chatId,
chunk.plainText,
plainParams as Parameters<typeof api.sendMessage>[2],
)
: api.sendMessage(chatId, chunk.plainText),
retryLabel,
);
if (!chunk.htmlText) {
return await requestPlain(label);
}
const htmlText = chunk.htmlText;
const htmlParams = {
parse_mode: "HTML" as const,
...plainParams,
};
return await withTelegramHtmlParseFallback({
label,
verbose: opts.verbose,
@@ -626,27 +684,74 @@ export async function sendMessageTelegram(
api.sendMessage(
chatId,
htmlText,
sendParams as Parameters<typeof api.sendMessage>[2],
htmlParams as Parameters<typeof api.sendMessage>[2],
),
retryLabel,
),
requestPlain: (retryLabel) => {
const plainParams = hasBaseParams
? (baseParams as Parameters<typeof api.sendMessage>[2])
: undefined;
return requestWithChatNotFound(
() =>
plainParams
? api.sendMessage(chatId, fallbackText ?? rawText, plainParams)
: api.sendMessage(chatId, fallbackText ?? rawText),
retryLabel,
);
},
requestPlain,
});
},
);
};
const buildTextParams = (isLastChunk: boolean) =>
hasThreadParams || (isLastChunk && replyMarkup)
? {
...threadParams,
...(isLastChunk && replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
const sendTelegramTextChunks = async (
chunks: TelegramTextChunk[],
context: string,
): Promise<{ messageId: string; chatId: string }> => {
let lastMessageId = "";
let lastChatId = chatId;
for (let index = 0; index < chunks.length; index += 1) {
const chunk = chunks[index];
if (!chunk) {
continue;
}
const res = await sendTelegramTextChunk(chunk, buildTextParams(index === chunks.length - 1));
const messageId = resolveTelegramMessageIdOrThrow(res, context);
recordSentMessage(chatId, messageId);
lastMessageId = String(messageId);
lastChatId = String(res?.chat?.id ?? chatId);
}
return { messageId: lastMessageId, chatId: lastChatId };
};
const buildChunkedTextPlan = (rawText: string, context: string): TelegramTextChunk[] => {
const fallbackText = opts.plainText ?? rawText;
let htmlChunks: string[];
try {
htmlChunks = splitTelegramHtmlChunks(rawText, 4000);
} catch (error) {
logVerbose(
`telegram ${context} failed HTML chunk planning, retrying as plain text: ${formatErrorMessage(
error,
)}`,
);
return splitTelegramPlainTextChunks(fallbackText, 4000).map((plainText) => ({ plainText }));
}
const fixedPlainTextChunks = splitTelegramPlainTextChunks(fallbackText, 4000);
if (fixedPlainTextChunks.length > htmlChunks.length) {
logVerbose(
`telegram ${context} plain-text fallback needs more chunks than HTML; sending plain text`,
);
return fixedPlainTextChunks.map((plainText) => ({ plainText }));
}
const plainTextChunks = splitTelegramPlainTextFallback(fallbackText, htmlChunks.length, 4000);
return htmlChunks.map((htmlText, index) => ({
htmlText,
plainText: plainTextChunks[index] ?? htmlText,
}));
};
const sendChunkedText = async (rawText: string, context: string) =>
await sendTelegramTextChunks(buildChunkedTextPlan(rawText, context), context);
if (mediaUrl) {
const media = await loadWebMedia(
mediaUrl,
@@ -801,21 +906,15 @@ export async function sendMessageTelegram(
// If text was too long for a caption, send it as a separate follow-up message.
// Use HTML conversion so markdown renders like captions.
if (needsSeparateText && followUpText) {
const textParams =
hasThreadParams || replyMarkup
? {
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
const textRes = await sendTelegramText(followUpText, textParams);
// Return the text message ID as the "main" message (it's the actual content).
const textMessageId = resolveTelegramMessageIdOrThrow(textRes, "text follow-up send");
recordSentMessage(chatId, textMessageId);
return {
messageId: String(textMessageId),
chatId: resolvedChatId,
};
if (textMode === "html") {
const textResult = await sendChunkedText(followUpText, "text follow-up send");
return { messageId: textResult.messageId, chatId: resolvedChatId };
}
const textResult = await sendTelegramTextChunks(
[{ plainText: followUpText, htmlText: renderHtmlText(followUpText) }],
"text follow-up send",
);
return { messageId: textResult.messageId, chatId: resolvedChatId };
}
return { messageId: String(mediaMessageId), chatId: resolvedChatId };
@@ -824,22 +923,21 @@ export async function sendMessageTelegram(
if (!text || !text.trim()) {
throw new Error("Message must be non-empty for Telegram sends");
}
const textParams =
hasThreadParams || replyMarkup
? {
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
const res = await sendTelegramText(text, textParams, opts.plainText);
const messageId = resolveTelegramMessageIdOrThrow(res, "text send");
recordSentMessage(chatId, messageId);
let textResult: { messageId: string; chatId: string };
if (textMode === "html") {
textResult = await sendChunkedText(text, "text send");
} else {
textResult = await sendTelegramTextChunks(
[{ plainText: opts.plainText ?? text, htmlText: renderHtmlText(text) }],
"text send",
);
}
recordChannelActivity({
channel: "telegram",
accountId: account.accountId,
direction: "outbound",
});
return { messageId: String(messageId), chatId: String(res?.chat?.id ?? chatId) };
return textResult;
}
export async function sendTypingTelegram(