From 5bc67ed3582ab47731401dcbc2ec78e29d40da1b Mon Sep 17 00:00:00 2001 From: lpf Date: Fri, 13 Feb 2026 14:51:25 +0800 Subject: [PATCH] fix bug --- pkg/channels/telegram.go | 155 ++++++++++++++++++++++++++++++++++----- 1 file changed, 138 insertions(+), 17 deletions(-) diff --git a/pkg/channels/telegram.go b/pkg/channels/telegram.go index a0768f5..a3ea408 100644 --- a/pkg/channels/telegram.go +++ b/pkg/channels/telegram.go @@ -123,7 +123,7 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err log.Printf("Telegram thinking stop skipped: no stop channel found for chat_id=%s", msg.ChatID) } - htmlContent := markdownToTelegramHTML(msg.Content) + htmlContent := sanitizeTelegramHTML(markdownToTelegramHTML(msg.Content)) // Try to edit placeholder if pID, ok := c.placeholders.Load(msg.ChatID); ok { @@ -151,7 +151,8 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err if err != nil { log.Printf("HTML parse failed, falling back to plain text: %v", err) - _, err = c.bot.SendMessage(ctx, telegoutil.Message(chatID, msg.Content)) + plain := plainTextFromTelegramHTML(htmlContent) + _, err = c.bot.SendMessage(ctx, telegoutil.Message(chatID, plain)) if err != nil { log.Printf("Telegram plain-text fallback send failed: chat_id=%s err=%v", msg.ChatID, err) } @@ -407,30 +408,25 @@ func markdownToTelegramHTML(text string) string { inlineCodes := extractInlineCodes(text) text = inlineCodes.text - text = regexp.MustCompile(`^#{1,6}\s+(.+)$`).ReplaceAllString(text, "$1") - - text = regexp.MustCompile(`^>\s*(.*)$`).ReplaceAllString(text, "$1") - text = escapeHTML(text) + text = regexp.MustCompile(`(?m)^#{1,6}\s+(.+)$`).ReplaceAllString(text, "$1") + + text = regexp.MustCompile(`(?m)^>\s*(.*)$`).ReplaceAllString(text, "│ $1") + text = regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`).ReplaceAllString(text, `$1`) text = regexp.MustCompile(`\*\*(.+?)\*\*`).ReplaceAllString(text, "$1") text = regexp.MustCompile(`__(.+?)__`).ReplaceAllString(text, "$1") - reItalic := regexp.MustCompile(`_([^_]+)_`) - text = reItalic.ReplaceAllStringFunc(text, func(s string) string { - match := reItalic.FindStringSubmatch(s) - if len(match) < 2 { - return s - } - return "" + match[1] + "" - }) + text = regexp.MustCompile(`\*([^*\n]+)\*`).ReplaceAllString(text, "$1") + text = regexp.MustCompile(`_([^_\n]+)_`).ReplaceAllString(text, "$1") text = regexp.MustCompile(`~~(.+?)~~`).ReplaceAllString(text, "$1") - text = regexp.MustCompile(`^[-*]\s+`).ReplaceAllString(text, "• ") + text = regexp.MustCompile(`(?m)^[-*]\s+`).ReplaceAllString(text, "• ") + text = regexp.MustCompile(`(?m)^\d+\.\s+`).ReplaceAllString(text, "• ") for i, code := range inlineCodes.codes { escaped := escapeHTML(code) @@ -459,8 +455,11 @@ func extractCodeBlocks(text string) codeBlockMatch { codes = append(codes, match[1]) } + index := 0 text = re.ReplaceAllStringFunc(text, func(m string) string { - return fmt.Sprintf("\x00CB%d\x00", len(codes)-1) + placeholder := fmt.Sprintf("\x00CB%d\x00", index) + index++ + return placeholder }) return codeBlockMatch{text: text, codes: codes} @@ -480,8 +479,11 @@ func extractInlineCodes(text string) inlineCodeMatch { codes = append(codes, match[1]) } + index := 0 text = re.ReplaceAllStringFunc(text, func(m string) string { - return fmt.Sprintf("\x00IC%d\x00", len(codes)-1) + placeholder := fmt.Sprintf("\x00IC%d\x00", index) + index++ + return placeholder }) return inlineCodeMatch{text: text, codes: codes} @@ -493,3 +495,122 @@ func escapeHTML(text string) string { text = strings.ReplaceAll(text, ">", ">") return text } + +var telegramAllowedTags = map[string]bool{ + "b": true, + "strong": true, + "i": true, + "em": true, + "u": true, + "s": true, + "strike": true, + "del": true, + "code": true, + "pre": true, + "a": true, +} + +func sanitizeTelegramHTML(input string) string { + if input == "" { + return "" + } + + tagRe := regexp.MustCompile(`(?is)<\s*(/?)\s*([a-z0-9]+)([^>]*)>`) + hrefRe := regexp.MustCompile(`(?is)\bhref\s*=\s*"([^"]+)"`) + + var out strings.Builder + stack := make([]string, 0, 16) + pos := 0 + + matches := tagRe.FindAllStringSubmatchIndex(input, -1) + for _, m := range matches { + start, end := m[0], m[1] + out.WriteString(input[pos:start]) + + isClose := strings.TrimSpace(input[m[2]:m[3]]) == "/" + tagName := strings.ToLower(strings.TrimSpace(input[m[4]:m[5]])) + attrRaw := "" + if m[6] >= 0 && m[7] >= 0 { + attrRaw = input[m[6]:m[7]] + } + + if !telegramAllowedTags[tagName] { + out.WriteString(escapeHTML(input[start:end])) + pos = end + continue + } + + if isClose { + // Ensure tag stack remains balanced; drop unmatched close tags. + found := -1 + for i := len(stack) - 1; i >= 0; i-- { + if stack[i] == tagName { + found = i + break + } + } + if found == -1 { + pos = end + continue + } + for i := len(stack) - 1; i >= found; i-- { + out.WriteString("") + } + stack = stack[:found] + pos = end + continue + } + + // Normalize opening tags; only can carry attributes. + if tagName == "a" { + hrefMatch := hrefRe.FindStringSubmatch(attrRaw) + if len(hrefMatch) < 2 { + // Invalid anchor tag -> degrade to escaped text to avoid parse errors. + out.WriteString("<a>") + pos = end + continue + } + href := strings.TrimSpace(hrefMatch[1]) + if !isSafeTelegramHref(href) { + out.WriteString("<a>") + pos = end + continue + } + out.WriteString(``) + } else { + out.WriteString("<" + tagName + ">") + } + stack = append(stack, tagName) + pos = end + } + + out.WriteString(input[pos:]) + for i := len(stack) - 1; i >= 0; i-- { + out.WriteString("") + } + return out.String() +} + +func isSafeTelegramHref(href string) bool { + lower := strings.ToLower(strings.TrimSpace(href)) + return strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://") || strings.HasPrefix(lower, "tg://") +} + +func escapeHTMLAttr(text string) string { + text = strings.ReplaceAll(text, "&", "&") + text = strings.ReplaceAll(text, `"`, """) + text = strings.ReplaceAll(text, "<", "<") + text = strings.ReplaceAll(text, ">", ">") + return text +} + +func plainTextFromTelegramHTML(text string) string { + // Best-effort fallback for parse failures: drop tags and keep readable content. + tagRe := regexp.MustCompile(`(?is)<[^>]+>`) + plain := tagRe.ReplaceAllString(text, "") + plain = strings.ReplaceAll(plain, "<", "<") + plain = strings.ReplaceAll(plain, ">", ">") + plain = strings.ReplaceAll(plain, """, "\"") + plain = strings.ReplaceAll(plain, "&", "&") + return plain +}