diff --git a/CHANGELOG.md b/CHANGELOG.md index 1712feda5..b510bd370 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ Docs: https://docs.openclaw.ai - Telegram/direct delivery: bridge direct delivery sends to internal `message:sent` hooks so internal hook listeners observe successful Telegram deliveries. (#40185) Thanks @vincentkoc. - Plugins/global hook runner: harden singleton state handling so shared global hook runner reuse does not leak or corrupt runner state across executions. (#40184) Thanks @vincentkoc. - Agents/fallback: recognize Poe `402 You've used up your points!` billing errors so configured model fallbacks trigger instead of surfacing the raw provider error. (#42278) Thanks @CryUshio. +- Telegram/outbound HTML sends: chunk long HTML-mode messages, preserve plain-text fallback and silent-delivery params across retries, and cut over to plain text when HTML chunk planning cannot safely preserve the full message. (#42240) thanks @obviyus. ## 2026.3.8 diff --git a/src/telegram/format.test.ts b/src/telegram/format.test.ts index ac4163b96..2fcd06663 100644 --- a/src/telegram/format.test.ts +++ b/src/telegram/format.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { markdownToTelegramHtml } from "./format.js"; +import { markdownToTelegramHtml, splitTelegramHtmlChunks } from "./format.js"; describe("markdownToTelegramHtml", () => { it("handles core markdown-to-telegram conversions", () => { @@ -112,4 +112,26 @@ describe("markdownToTelegramHtml", () => { expect(res).toContain("secret"); expect(res).toContain("trailing ||"); }); + + it("splits long multiline html text without breaking balanced tags", () => { + const chunks = splitTelegramHtmlChunks(`${"A\n".repeat(2500)}`, 4000); + expect(chunks.length).toBeGreaterThan(1); + expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true); + expect(chunks[0]).toMatch(/^[\s\S]*<\/b>$/); + expect(chunks[1]).toMatch(/^[\s\S]*<\/b>$/); + }); + + it("fails loudly when a leading entity cannot fit inside a chunk", () => { + expect(() => splitTelegramHtmlChunks(`A&${"B".repeat(20)}`, 4)).toThrow(/leading entity/i); + }); + + it("treats malformed leading ampersands as plain text when chunking html", () => { + const chunks = splitTelegramHtmlChunks(`&${"A".repeat(5000)}`, 4000); + expect(chunks.length).toBeGreaterThan(1); + expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true); + }); + + it("fails loudly when tag overhead leaves no room for text", () => { + expect(() => splitTelegramHtmlChunks("x", 10)).toThrow(/tag overhead/i); + }); }); diff --git a/src/telegram/format.ts b/src/telegram/format.ts index f74b508b4..ed1f6c822 100644 --- a/src/telegram/format.ts +++ b/src/telegram/format.ts @@ -241,6 +241,217 @@ export function renderTelegramHtmlText( return markdownToTelegramHtml(text, { tableMode: options.tableMode }); } +type TelegramHtmlTag = { + name: string; + openTag: string; + closeTag: string; +}; + +const TELEGRAM_SELF_CLOSING_HTML_TAGS = new Set(["br"]); + +function buildTelegramHtmlOpenPrefix(tags: TelegramHtmlTag[]): string { + return tags.map((tag) => tag.openTag).join(""); +} + +function buildTelegramHtmlCloseSuffix(tags: TelegramHtmlTag[]): string { + return tags + .slice() + .toReversed() + .map((tag) => tag.closeTag) + .join(""); +} + +function buildTelegramHtmlCloseSuffixLength(tags: TelegramHtmlTag[]): number { + return tags.reduce((total, tag) => total + tag.closeTag.length, 0); +} + +function findTelegramHtmlEntityEnd(text: string, start: number): number { + if (text[start] !== "&") { + return -1; + } + let index = start + 1; + if (index >= text.length) { + return -1; + } + if (text[index] === "#") { + index += 1; + if (index >= text.length) { + return -1; + } + const isHex = text[index] === "x" || text[index] === "X"; + if (isHex) { + index += 1; + const hexStart = index; + while (/[0-9A-Fa-f]/.test(text[index] ?? "")) { + index += 1; + } + if (index === hexStart) { + return -1; + } + } else { + const digitStart = index; + while (/[0-9]/.test(text[index] ?? "")) { + index += 1; + } + if (index === digitStart) { + return -1; + } + } + } else { + const nameStart = index; + while (/[A-Za-z0-9]/.test(text[index] ?? "")) { + index += 1; + } + if (index === nameStart) { + return -1; + } + } + return text[index] === ";" ? index : -1; +} + +function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number { + if (text.length <= maxLength) { + return text.length; + } + const normalizedMaxLength = Math.max(1, Math.floor(maxLength)); + const lastAmpersand = text.lastIndexOf("&", normalizedMaxLength - 1); + if (lastAmpersand === -1) { + return normalizedMaxLength; + } + const lastSemicolon = text.lastIndexOf(";", normalizedMaxLength - 1); + if (lastAmpersand < lastSemicolon) { + return normalizedMaxLength; + } + const entityEnd = findTelegramHtmlEntityEnd(text, lastAmpersand); + if (entityEnd === -1 || entityEnd < normalizedMaxLength) { + return normalizedMaxLength; + } + return lastAmpersand; +} + +function popTelegramHtmlTag(tags: TelegramHtmlTag[], name: string): void { + for (let index = tags.length - 1; index >= 0; index -= 1) { + if (tags[index]?.name === name) { + tags.splice(index, 1); + return; + } + } +} + +export function splitTelegramHtmlChunks(html: string, limit: number): string[] { + if (!html) { + return []; + } + const normalizedLimit = Math.max(1, Math.floor(limit)); + if (html.length <= normalizedLimit) { + return [html]; + } + + const chunks: string[] = []; + const openTags: TelegramHtmlTag[] = []; + let current = ""; + let chunkHasPayload = false; + + const resetCurrent = () => { + current = buildTelegramHtmlOpenPrefix(openTags); + chunkHasPayload = false; + }; + + const flushCurrent = () => { + if (!chunkHasPayload) { + return; + } + chunks.push(`${current}${buildTelegramHtmlCloseSuffix(openTags)}`); + resetCurrent(); + }; + + const appendText = (segment: string) => { + let remaining = segment; + while (remaining.length > 0) { + const available = + normalizedLimit - current.length - buildTelegramHtmlCloseSuffixLength(openTags); + if (available <= 0) { + if (!chunkHasPayload) { + throw new Error( + `Telegram HTML chunk limit exceeded by tag overhead (limit=${normalizedLimit})`, + ); + } + flushCurrent(); + continue; + } + if (remaining.length <= available) { + current += remaining; + chunkHasPayload = true; + break; + } + const splitAt = findTelegramHtmlSafeSplitIndex(remaining, available); + if (splitAt <= 0) { + if (!chunkHasPayload) { + throw new Error( + `Telegram HTML chunk limit exceeded by leading entity (limit=${normalizedLimit})`, + ); + } + flushCurrent(); + continue; + } + current += remaining.slice(0, splitAt); + chunkHasPayload = true; + remaining = remaining.slice(splitAt); + flushCurrent(); + } + }; + + resetCurrent(); + HTML_TAG_PATTERN.lastIndex = 0; + let lastIndex = 0; + let match: RegExpExecArray | null; + while ((match = HTML_TAG_PATTERN.exec(html)) !== null) { + const tagStart = match.index; + const tagEnd = HTML_TAG_PATTERN.lastIndex; + appendText(html.slice(lastIndex, tagStart)); + + const rawTag = match[0]; + const isClosing = match[1] === "")); + + if (!isClosing) { + const nextCloseLength = isSelfClosing ? 0 : ``.length; + if ( + chunkHasPayload && + current.length + + rawTag.length + + buildTelegramHtmlCloseSuffixLength(openTags) + + nextCloseLength > + normalizedLimit + ) { + flushCurrent(); + } + } + + current += rawTag; + if (isSelfClosing) { + chunkHasPayload = true; + } + if (isClosing) { + popTelegramHtmlTag(openTags, tagName); + } else if (!isSelfClosing) { + openTags.push({ + name: tagName, + openTag: rawTag, + closeTag: ``, + }); + } + lastIndex = tagEnd; + } + + appendText(html.slice(lastIndex)); + flushCurrent(); + return chunks.length > 0 ? chunks : [html]; +} + function splitTelegramChunkByHtmlLimit( chunk: MarkdownIR, htmlLimit: number, diff --git a/src/telegram/send.test.ts b/src/telegram/send.test.ts index a34f27d19..a00d1b2e8 100644 --- a/src/telegram/send.test.ts +++ b/src/telegram/send.test.ts @@ -1135,6 +1135,31 @@ describe("sendMessageTelegram", () => { }); }); + it("keeps disable_notification on plain-text fallback when silent is true", async () => { + const chatId = "123"; + const parseErr = new Error( + "400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9", + ); + const sendMessage = vi + .fn() + .mockRejectedValueOnce(parseErr) + .mockResolvedValueOnce({ message_id: 2, chat: { id: chatId } }); + const api = { sendMessage } as unknown as { + sendMessage: typeof sendMessage; + }; + + await sendMessageTelegram(chatId, "_oops_", { + token: "tok", + api, + silent: true, + }); + + expect(sendMessage.mock.calls).toEqual([ + [chatId, "oops", { parse_mode: "HTML", disable_notification: true }], + [chatId, "_oops_", { disable_notification: true }], + ]); + }); + it("parses message_thread_id from recipient string (telegram:group:...:topic:...)", async () => { const chatId = "-1001234567890"; const sendMessage = vi.fn().mockResolvedValue({ @@ -1257,6 +1282,120 @@ describe("sendMessageTelegram", () => { expect.objectContaining({ maxBytes: 42 * 1024 * 1024 }), ); }); + + it("chunks long html-mode text and keeps buttons on the last chunk only", async () => { + const chatId = "123"; + const htmlText = `${"A".repeat(5000)}`; + + const sendMessage = vi + .fn() + .mockResolvedValueOnce({ message_id: 90, chat: { id: chatId } }) + .mockResolvedValueOnce({ message_id: 91, chat: { id: chatId } }); + const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage }; + + const res = await sendMessageTelegram(chatId, htmlText, { + token: "tok", + api, + textMode: "html", + buttons: [[{ text: "OK", callback_data: "ok" }]], + }); + + expect(sendMessage).toHaveBeenCalledTimes(2); + const firstCall = sendMessage.mock.calls[0]; + const secondCall = sendMessage.mock.calls[1]; + expect(firstCall).toBeDefined(); + expect(secondCall).toBeDefined(); + expect((firstCall[1] as string).length).toBeLessThanOrEqual(4000); + expect((secondCall[1] as string).length).toBeLessThanOrEqual(4000); + expect(firstCall[2]?.reply_markup).toBeUndefined(); + expect(secondCall[2]?.reply_markup).toEqual({ + inline_keyboard: [[{ text: "OK", callback_data: "ok" }]], + }); + expect(res.messageId).toBe("91"); + }); + + it("preserves caller plain-text fallback across chunked html parse retries", async () => { + const chatId = "123"; + const htmlText = `${"A".repeat(5000)}`; + const plainText = `${"P".repeat(2500)}${"Q".repeat(2500)}`; + const parseErr = new Error( + "400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9", + ); + const sendMessage = vi + .fn() + .mockRejectedValueOnce(parseErr) + .mockResolvedValueOnce({ message_id: 90, chat: { id: chatId } }) + .mockRejectedValueOnce(parseErr) + .mockResolvedValueOnce({ message_id: 91, chat: { id: chatId } }); + const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage }; + + const res = await sendMessageTelegram(chatId, htmlText, { + token: "tok", + api, + textMode: "html", + plainText, + }); + + expect(sendMessage).toHaveBeenCalledTimes(4); + const plainFallbackCalls = [sendMessage.mock.calls[1], sendMessage.mock.calls[3]]; + expect(plainFallbackCalls.map((call) => String(call?.[1] ?? "")).join("")).toBe(plainText); + expect(plainFallbackCalls.every((call) => !String(call?.[1] ?? "").includes("<"))).toBe(true); + expect(res.messageId).toBe("91"); + }); + + it("keeps malformed leading ampersands on the chunked plain-text fallback path", async () => { + const chatId = "123"; + const htmlText = `&${"A".repeat(5000)}`; + const plainText = "fallback!!"; + const parseErr = new Error( + "400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 0", + ); + const sendMessage = vi + .fn() + .mockRejectedValueOnce(parseErr) + .mockResolvedValueOnce({ message_id: 92, chat: { id: chatId } }) + .mockRejectedValueOnce(parseErr) + .mockResolvedValueOnce({ message_id: 93, chat: { id: chatId } }); + const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage }; + + const res = await sendMessageTelegram(chatId, htmlText, { + token: "tok", + api, + textMode: "html", + plainText, + }); + + expect(sendMessage).toHaveBeenCalledTimes(4); + expect(String(sendMessage.mock.calls[0]?.[1] ?? "")).toMatch(/^&/); + const plainFallbackCalls = [sendMessage.mock.calls[1], sendMessage.mock.calls[3]]; + expect(plainFallbackCalls.map((call) => String(call?.[1] ?? "")).join("")).toBe(plainText); + expect(plainFallbackCalls.every((call) => String(call?.[1] ?? "").length > 0)).toBe(true); + expect(res.messageId).toBe("93"); + }); + + it("cuts over to plain text when fallback text needs more chunks than html", async () => { + const chatId = "123"; + const htmlText = `${"A".repeat(5000)}`; + const plainText = "P".repeat(9000); + const sendMessage = vi + .fn() + .mockResolvedValueOnce({ message_id: 94, chat: { id: chatId } }) + .mockResolvedValueOnce({ message_id: 95, chat: { id: chatId } }) + .mockResolvedValueOnce({ message_id: 96, chat: { id: chatId } }); + const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage }; + + const res = await sendMessageTelegram(chatId, htmlText, { + token: "tok", + api, + textMode: "html", + plainText, + }); + + expect(sendMessage).toHaveBeenCalledTimes(3); + expect(sendMessage.mock.calls.every((call) => call[2]?.parse_mode === undefined)).toBe(true); + expect(sendMessage.mock.calls.map((call) => String(call[1] ?? "")).join("")).toBe(plainText); + expect(res.messageId).toBe("96"); + }); }); describe("reactMessageTelegram", () => { diff --git a/src/telegram/send.ts b/src/telegram/send.ts index 313abf361..fa26df020 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -26,7 +26,7 @@ import { buildTelegramThreadParams, buildTypingThreadParams } from "./bot/helper import type { TelegramInlineButtons } from "./button-types.js"; import { splitTelegramCaption } from "./caption.js"; import { resolveTelegramFetch } from "./fetch.js"; -import { renderTelegramHtmlText } from "./format.js"; +import { renderTelegramHtmlText, splitTelegramHtmlChunks } from "./format.js"; import { isRecoverableTelegramNetworkError, isSafeToRetrySendError } from "./network-errors.js"; import { makeProxyFetch } from "./proxy.js"; import { recordSentMessage } from "./sent-message-cache.js"; @@ -108,6 +108,42 @@ function resolveTelegramMessageIdOrThrow( throw new Error(`Telegram ${context} returned no message_id`); } +function splitTelegramPlainTextChunks(text: string, limit: number): string[] { + if (!text) { + return []; + } + const normalizedLimit = Math.max(1, Math.floor(limit)); + const chunks: string[] = []; + for (let start = 0; start < text.length; start += normalizedLimit) { + chunks.push(text.slice(start, start + normalizedLimit)); + } + return chunks; +} + +function splitTelegramPlainTextFallback(text: string, chunkCount: number, limit: number): string[] { + if (!text) { + return []; + } + const normalizedLimit = Math.max(1, Math.floor(limit)); + const fixedChunks = splitTelegramPlainTextChunks(text, normalizedLimit); + if (chunkCount <= 1 || fixedChunks.length >= chunkCount) { + return fixedChunks; + } + const chunks: string[] = []; + let offset = 0; + for (let index = 0; index < chunkCount; index += 1) { + const remainingChars = text.length - offset; + const remainingChunks = chunkCount - index; + const nextChunkLength = + remainingChunks === 1 + ? remainingChars + : Math.min(normalizedLimit, Math.ceil(remainingChars / remainingChunks)); + chunks.push(text.slice(offset, offset + nextChunkLength)); + offset += nextChunkLength; + } + return chunks; +} + const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i; const THREAD_NOT_FOUND_RE = /400:\s*Bad Request:\s*message thread not found/i; const MESSAGE_NOT_MODIFIED_RE = @@ -596,27 +632,49 @@ export async function sendMessageTelegram( const linkPreviewEnabled = account.config.linkPreview ?? true; const linkPreviewOptions = linkPreviewEnabled ? undefined : { is_disabled: true }; - const sendTelegramText = async ( - rawText: string, + type TelegramTextChunk = { + plainText: string; + htmlText?: string; + }; + + const sendTelegramTextChunk = async ( + chunk: TelegramTextChunk, params?: Record, - fallbackText?: string, ) => { return await withTelegramThreadFallback( params, "message", opts.verbose, async (effectiveParams, label) => { - const htmlText = renderHtmlText(rawText); const baseParams = effectiveParams ? { ...effectiveParams } : {}; if (linkPreviewOptions) { baseParams.link_preview_options = linkPreviewOptions; } - const hasBaseParams = Object.keys(baseParams).length > 0; - const sendParams = { - parse_mode: "HTML" as const, + const plainParams = { ...baseParams, ...(opts.silent === true ? { disable_notification: true } : {}), }; + const hasPlainParams = Object.keys(plainParams).length > 0; + const requestPlain = (retryLabel: string) => + requestWithChatNotFound( + () => + hasPlainParams + ? api.sendMessage( + chatId, + chunk.plainText, + plainParams as Parameters[2], + ) + : api.sendMessage(chatId, chunk.plainText), + retryLabel, + ); + if (!chunk.htmlText) { + return await requestPlain(label); + } + const htmlText = chunk.htmlText; + const htmlParams = { + parse_mode: "HTML" as const, + ...plainParams, + }; return await withTelegramHtmlParseFallback({ label, verbose: opts.verbose, @@ -626,27 +684,74 @@ export async function sendMessageTelegram( api.sendMessage( chatId, htmlText, - sendParams as Parameters[2], + htmlParams as Parameters[2], ), retryLabel, ), - requestPlain: (retryLabel) => { - const plainParams = hasBaseParams - ? (baseParams as Parameters[2]) - : undefined; - return requestWithChatNotFound( - () => - plainParams - ? api.sendMessage(chatId, fallbackText ?? rawText, plainParams) - : api.sendMessage(chatId, fallbackText ?? rawText), - retryLabel, - ); - }, + requestPlain, }); }, ); }; + const buildTextParams = (isLastChunk: boolean) => + hasThreadParams || (isLastChunk && replyMarkup) + ? { + ...threadParams, + ...(isLastChunk && replyMarkup ? { reply_markup: replyMarkup } : {}), + } + : undefined; + + const sendTelegramTextChunks = async ( + chunks: TelegramTextChunk[], + context: string, + ): Promise<{ messageId: string; chatId: string }> => { + let lastMessageId = ""; + let lastChatId = chatId; + for (let index = 0; index < chunks.length; index += 1) { + const chunk = chunks[index]; + if (!chunk) { + continue; + } + const res = await sendTelegramTextChunk(chunk, buildTextParams(index === chunks.length - 1)); + const messageId = resolveTelegramMessageIdOrThrow(res, context); + recordSentMessage(chatId, messageId); + lastMessageId = String(messageId); + lastChatId = String(res?.chat?.id ?? chatId); + } + return { messageId: lastMessageId, chatId: lastChatId }; + }; + + const buildChunkedTextPlan = (rawText: string, context: string): TelegramTextChunk[] => { + const fallbackText = opts.plainText ?? rawText; + let htmlChunks: string[]; + try { + htmlChunks = splitTelegramHtmlChunks(rawText, 4000); + } catch (error) { + logVerbose( + `telegram ${context} failed HTML chunk planning, retrying as plain text: ${formatErrorMessage( + error, + )}`, + ); + return splitTelegramPlainTextChunks(fallbackText, 4000).map((plainText) => ({ plainText })); + } + const fixedPlainTextChunks = splitTelegramPlainTextChunks(fallbackText, 4000); + if (fixedPlainTextChunks.length > htmlChunks.length) { + logVerbose( + `telegram ${context} plain-text fallback needs more chunks than HTML; sending plain text`, + ); + return fixedPlainTextChunks.map((plainText) => ({ plainText })); + } + const plainTextChunks = splitTelegramPlainTextFallback(fallbackText, htmlChunks.length, 4000); + return htmlChunks.map((htmlText, index) => ({ + htmlText, + plainText: plainTextChunks[index] ?? htmlText, + })); + }; + + const sendChunkedText = async (rawText: string, context: string) => + await sendTelegramTextChunks(buildChunkedTextPlan(rawText, context), context); + if (mediaUrl) { const media = await loadWebMedia( mediaUrl, @@ -801,21 +906,15 @@ export async function sendMessageTelegram( // If text was too long for a caption, send it as a separate follow-up message. // Use HTML conversion so markdown renders like captions. if (needsSeparateText && followUpText) { - const textParams = - hasThreadParams || replyMarkup - ? { - ...threadParams, - ...(replyMarkup ? { reply_markup: replyMarkup } : {}), - } - : undefined; - const textRes = await sendTelegramText(followUpText, textParams); - // Return the text message ID as the "main" message (it's the actual content). - const textMessageId = resolveTelegramMessageIdOrThrow(textRes, "text follow-up send"); - recordSentMessage(chatId, textMessageId); - return { - messageId: String(textMessageId), - chatId: resolvedChatId, - }; + if (textMode === "html") { + const textResult = await sendChunkedText(followUpText, "text follow-up send"); + return { messageId: textResult.messageId, chatId: resolvedChatId }; + } + const textResult = await sendTelegramTextChunks( + [{ plainText: followUpText, htmlText: renderHtmlText(followUpText) }], + "text follow-up send", + ); + return { messageId: textResult.messageId, chatId: resolvedChatId }; } return { messageId: String(mediaMessageId), chatId: resolvedChatId }; @@ -824,22 +923,21 @@ export async function sendMessageTelegram( if (!text || !text.trim()) { throw new Error("Message must be non-empty for Telegram sends"); } - const textParams = - hasThreadParams || replyMarkup - ? { - ...threadParams, - ...(replyMarkup ? { reply_markup: replyMarkup } : {}), - } - : undefined; - const res = await sendTelegramText(text, textParams, opts.plainText); - const messageId = resolveTelegramMessageIdOrThrow(res, "text send"); - recordSentMessage(chatId, messageId); + let textResult: { messageId: string; chatId: string }; + if (textMode === "html") { + textResult = await sendChunkedText(text, "text send"); + } else { + textResult = await sendTelegramTextChunks( + [{ plainText: opts.plainText ?? text, htmlText: renderHtmlText(text) }], + "text send", + ); + } recordChannelActivity({ channel: "telegram", accountId: account.accountId, direction: "outbound", }); - return { messageId: String(messageId), chatId: String(res?.chat?.id ?? chatId) }; + return textResult; } export async function sendTypingTelegram(