From 4ca179ad3316489628651e96f74ac535bb499900 Mon Sep 17 00:00:00 2001 From: lpf Date: Fri, 13 Mar 2026 12:05:21 +0800 Subject: [PATCH] fix: harden telegram stream rendering --- pkg/channels/telegram.go | 114 +++++++++++++++++++++++++++++++--- pkg/channels/telegram_test.go | 113 +++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+), 8 deletions(-) create mode 100644 pkg/channels/telegram_test.go diff --git a/pkg/channels/telegram.go b/pkg/channels/telegram.go index 8568384..e6729ba 100644 --- a/pkg/channels/telegram.go +++ b/pkg/channels/telegram.go @@ -784,9 +784,9 @@ func clampTelegramHTML(markdown string, maxRunes int) string { return sanitizeTelegramHTML(markdownToTelegramHTML(chunks[0])) } -func (c *TelegramChannel) handleStreamAction(ctx context.Context, chatID int64, msg bus.OutboundMessage) error { +func (c *TelegramChannel) handleStreamAction(ctx context.Context, chatID int64, msg bus.OutboundMessage, finalizeRich bool) error { streamKey := telegramStreamKey(chatID, msg.ReplyToID) - chunks := renderTelegramStreamChunks(msg.Content) + chunks := renderTelegramStreamChunksWithFinalize(msg.Content, finalizeRich) if len(chunks) == 0 { return nil } @@ -855,6 +855,10 @@ func (c *TelegramChannel) handleStreamAction(ctx context.Context, chatID int64, } func renderTelegramStreamChunks(content string) []telegramRenderedChunk { + return renderTelegramStreamChunksWithFinalize(content, false) +} + +func renderTelegramStreamChunksWithFinalize(content string, finalizeRich bool) []telegramRenderedChunk { raw := strings.TrimSpace(content) if raw == "" { return nil @@ -863,6 +867,9 @@ func renderTelegramStreamChunks(content string) []telegramRenderedChunk { if strings.TrimSpace(body) == "" { return nil } + if mode == "auto_markdown" && !shouldFlushTelegramStreamSnapshot(body) { + return nil + } var parts []string switch mode { @@ -909,6 +916,16 @@ func renderTelegramStreamChunks(content string) []telegramRenderedChunk { out = append(out, telegramRenderedChunk{payload: payload, parseMode: ""}) } default: + if !finalizeRich { + payload := trimmed + if len([]rune(payload)) > telegramStreamSplitMaxRunes { + payload = splitTelegramText(payload, telegramStreamSplitMaxRunes)[0] + } + if strings.TrimSpace(payload) != "" { + out = append(out, telegramRenderedChunk{payload: payload, parseMode: ""}) + } + continue + } payload := sanitizeTelegramHTML(markdownToTelegramHTML(trimmed)) if len([]rune(payload)) > telegramSafeHTMLMaxRunes { payload = clampTelegramHTML(trimmed, telegramSafeHTMLMaxRunes) @@ -921,6 +938,87 @@ func renderTelegramStreamChunks(content string) []telegramRenderedChunk { return out } +func shouldFlushTelegramStreamSnapshot(content string) bool { + trimmed := strings.TrimSpace(content) + if trimmed == "" { + return false + } + if strings.Count(trimmed, "```")%2 == 1 { + return false + } + + inlineBackticks := 0 + for i := 0; i < len(trimmed); i++ { + if trimmed[i] != '`' { + continue + } + if strings.HasPrefix(trimmed[i:], "```") { + i += 2 + continue + } + inlineBackticks++ + } + if inlineBackticks%2 == 1 { + return false + } + + if hasOddUnescapedMarkdownMarker(trimmed, "**") { + return false + } + if hasOddUnescapedMarkdownMarker(trimmed, "__") { + return false + } + if hasOddSingleMarkdownMarker(trimmed, '*') { + return false + } + if hasOddSingleMarkdownMarker(trimmed, '_') { + return false + } + if strings.Count(trimmed, "[") != strings.Count(trimmed, "]") { + return false + } + if strings.Count(trimmed, "(") < strings.Count(trimmed, "]") { + return false + } + if strings.Count(trimmed, "](") > 0 && strings.Count(trimmed, "(") != strings.Count(trimmed, ")") { + return false + } + return true +} + +func hasOddUnescapedMarkdownMarker(s, marker string) bool { + count := 0 + for i := 0; i+len(marker) <= len(s); i++ { + if s[i:i+len(marker)] != marker { + continue + } + if i > 0 && s[i-1] == '\\' { + continue + } + count++ + i += len(marker) - 1 + } + return count%2 == 1 +} + +func hasOddSingleMarkdownMarker(s string, marker byte) bool { + count := 0 + for i := 0; i < len(s); i++ { + if s[i] != marker { + continue + } + if i > 0 && s[i-1] == '\\' { + continue + } + if i+1 < len(s) && s[i+1] == marker { + i++ + continue + } + count++ + } + return count%2 == 1 +} + func detectTelegramStreamMode(content string) (mode string, body string) { trimmed := strings.TrimSpace(content) switch { @@ -1083,16 +1181,16 @@ func (c *TelegramChannel) handleAction(ctx context.Context, chatID int64, action _, err := c.bot.EditMessageText(editCtx, &telego.EditMessageTextParams{ChatID: telegoutil.ID(chatID), MessageID: messageID, Text: htmlContent, ParseMode: telego.ModeHTML}) return err case "stream": - return c.handleStreamAction(ctx, chatID, msg) + return c.handleStreamAction(ctx, chatID, msg, false) case "finalize": if strings.TrimSpace(msg.Content) != "" { - // Final pass in auto-markdown mode to recover rich formatting after plain streaming. + // Final pass to recover rich formatting after conservative plain streaming. if err := c.handleStreamAction(ctx, chatID, bus.OutboundMessage{ ChatID: msg.ChatID, ReplyToID: msg.ReplyToID, Content: msg.Content, Action: "stream", - }); err != nil { + }, true); err != nil { return err } } @@ -1148,15 +1246,15 @@ func markdownToTelegramHTML(text string) string { text = escapeHTML(text) text = regexp.MustCompile("(?m)^#{1,6}\\s+(.+)$").ReplaceAllString(text, "$1") - text = regexp.MustCompile("(?m)^>\\s*(.*)$").ReplaceAllString(text, "鈹?$1") + text = regexp.MustCompile("(?m)^\\s*>\\s*(.*)$").ReplaceAllString(text, "> $1") text = regexp.MustCompile("\\[([^\\]]+)\\]\\(([^)]+)\\)").ReplaceAllString(text, `$1`) text = regexp.MustCompile("\\*\\*(.+?)\\*\\*").ReplaceAllString(text, "$1") text = regexp.MustCompile("__(.+?)__").ReplaceAllString(text, "$1") text = regexp.MustCompile("\\*([^*\\n]+)\\*").ReplaceAllString(text, "$1") text = regexp.MustCompile("_([^_\\n]+)_").ReplaceAllString(text, "$1") text = regexp.MustCompile("~~(.+?)~~").ReplaceAllString(text, "$1") - text = regexp.MustCompile("(?m)^[-*]\\s+").ReplaceAllString(text, "鈥?") - text = regexp.MustCompile("(?m)^\\d+\\.\\s+").ReplaceAllString(text, "鈥?") + text = regexp.MustCompile("(?m)^\\s*[-*]\\s+").ReplaceAllString(text, "• ") + text = regexp.MustCompile("(?m)^\\s*(\\d+\\.\\s+)").ReplaceAllString(text, "$1") for i, code := range inlineCodes.codes { escaped := escapeHTML(code) diff --git a/pkg/channels/telegram_test.go b/pkg/channels/telegram_test.go new file mode 100644 index 0000000..b5af88a --- /dev/null +++ b/pkg/channels/telegram_test.go @@ -0,0 +1,113 @@ +//go:build !omit_telegram + +package channels + +import ( + "strings" + "testing" +) + +func TestMarkdownToTelegramHTMLFormatsChineseAndInlineMarkup(t *testing.T) { + got := markdownToTelegramHTML("中文 **加粗** *斜体* `代码`") + if strings.Contains(got, "鈹") || strings.Contains(got, "鈥") { + t.Fatalf("unexpected mojibake in output: %q", got) + } + if !strings.Contains(got, "中文 加粗 斜体 代码") { + t.Fatalf("unexpected formatted output: %q", got) + } +} + +func TestMarkdownToTelegramHTMLFormatsQuoteAndListsWithoutMojibake(t *testing.T) { + input := "> 引用\n- 列表\n* 另一项\n1. 有序" + got := markdownToTelegramHTML(input) + + if strings.Contains(got, "鈹") || strings.Contains(got, "鈥") { + t.Fatalf("unexpected mojibake in output: %q", got) + } + if !strings.Contains(got, "> 引用") { + t.Fatalf("expected escaped quote marker, got %q", got) + } + if !strings.Contains(got, "• 列表") || !strings.Contains(got, "• 另一项") { + t.Fatalf("expected bullet list markers, got %q", got) + } + if !strings.Contains(got, "1. 有序") { + t.Fatalf("expected ordered list marker preserved, got %q", got) + } +} + +func TestRenderTelegramStreamChunksDoesNotInjectMojibake(t *testing.T) { + chunks := renderTelegramStreamChunks("> 引用\n- 列表\n1. 有序\n中文内容") + if len(chunks) == 0 { + t.Fatal("expected stream chunks") + } + for _, chunk := range chunks { + if strings.Contains(chunk.payload, "鈹") || strings.Contains(chunk.payload, "鈥") { + t.Fatalf("unexpected mojibake chunk payload: %q", chunk.payload) + } + } +} + +func TestShouldFlushTelegramStreamSnapshotRejectsUnclosedMarkdown(t *testing.T) { + cases := []string{ + "中文 **加粗", + "中文 *斜体", + "中文 `代码", + "```go\nfmt.Println(\"hi\")", + "[链接](https://example.com", + } + + for _, input := range cases { + if shouldFlushTelegramStreamSnapshot(input) { + t.Fatalf("expected unsafe snapshot to be rejected: %q", input) + } + if chunks := renderTelegramStreamChunks(input); len(chunks) != 0 { + t.Fatalf("expected no chunks for unsafe snapshot %q, got %+v", input, chunks) + } + } +} + +func TestShouldFlushTelegramStreamSnapshotAcceptsBalancedMarkdown(t *testing.T) { + input := "> 引用\n- 列表\n1. 有序\n中文 **加粗** *斜体* `代码` [链接](https://example.com)" + if !shouldFlushTelegramStreamSnapshot(input) { + t.Fatalf("expected balanced snapshot to flush: %q", input) + } + chunks := renderTelegramStreamChunks(input) + if len(chunks) == 0 { + t.Fatalf("expected chunks for balanced snapshot") + } + if chunks[0].parseMode != "" { + t.Fatalf("expected auto-markdown stream to degrade to plain text, got mode=%q", chunks[0].parseMode) + } +} + +func TestRenderTelegramStreamChunksFinalizeRecoversRichFormatting(t *testing.T) { + input := "> 引用\n- 列表\n中文 **加粗** *斜体* `代码` [链接](https://example.com)" + chunks := renderTelegramStreamChunksWithFinalize(input, true) + if len(chunks) == 0 { + t.Fatalf("expected finalize chunks") + } + if chunks[0].parseMode != "HTML" { + t.Fatalf("expected finalize chunk to use HTML, got %q", chunks[0].parseMode) + } + if !strings.Contains(chunks[0].payload, "加粗") { + t.Fatalf("expected rich formatting restored, got %q", chunks[0].payload) + } +} + +func TestMarkdownToTelegramHTMLHandlesEdgeFormatting(t *testing.T) { + input := "> 第一段引用\n> 第二段引用\n- 列表一\n - 子项\n1. 有序项\n\n```go\nfmt.Println(\"hi\")\nfmt.Println(\"bye\")\n```\n[链接](https://example.com/path?q=1)" + got := markdownToTelegramHTML(input) + + if !strings.Contains(got, "> 第一段引用\n> 第二段引用") { + t.Fatalf("expected consecutive quote lines to stay stable, got %q", got) + } + if !strings.Contains(got, "• 列表一") || !strings.Contains(got, "• 子项") { + t.Fatalf("expected nested list lines to normalize to bullets, got %q", got) + } + if !strings.Contains(got, "
fmt.Println(\"hi\")\nfmt.Println(\"bye\")\n
") { + t.Fatalf("expected code block newlines preserved, got %q", got) + } + if !strings.Contains(got, `链接`) { + t.Fatalf("expected link conversion, got %q", got) + } +}