From f04a6939b335b62061beef3a0c30420c5914e821 Mon Sep 17 00:00:00 2001 From: DBT Date: Wed, 25 Feb 2026 11:46:36 +0000 Subject: [PATCH] fix telegram media sending via message tool (path/url support) --- pkg/agent/loop.go | 3 +- pkg/bus/types.go | 1 + pkg/channels/telegram.go | 97 ++++++++++++++++++++++++++++++++++++++++ pkg/tools/message.go | 40 +++++++++++++++-- 4 files changed, 136 insertions(+), 5 deletions(-) diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 41812f7..10c0352 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -149,11 +149,12 @@ func NewAgentLoop(cfg *config.Config, msgBus *bus.MessageBus, provider providers // Register message tool messageTool := tools.NewMessageTool() - messageTool.SetSendCallback(func(channel, chatID, action, content, messageID, emoji string, buttons [][]bus.Button) error { + messageTool.SetSendCallback(func(channel, chatID, action, content, media, messageID, emoji string, buttons [][]bus.Button) error { msgBus.PublishOutbound(bus.OutboundMessage{ Channel: channel, ChatID: chatID, Content: content, + Media: media, Buttons: buttons, Action: action, MessageID: messageID, diff --git a/pkg/bus/types.go b/pkg/bus/types.go index 592f591..ae82ae9 100644 --- a/pkg/bus/types.go +++ b/pkg/bus/types.go @@ -19,6 +19,7 @@ type OutboundMessage struct { Channel string `json:"channel"` ChatID string `json:"chat_id"` Content string `json:"content"` + Media string `json:"media,omitempty"` ReplyToID string `json:"reply_to_id,omitempty"` Buttons [][]Button `json:"buttons,omitempty"` Action string `json:"action,omitempty"` diff --git a/pkg/channels/telegram.go b/pkg/channels/telegram.go index 8afe249..8bce5ef 100644 --- a/pkg/channels/telegram.go +++ b/pkg/channels/telegram.go @@ -1,13 +1,17 @@ package channels import ( + "bytes" "context" "fmt" "io" + "mime/multipart" "net/http" + "net/url" "os" "path/filepath" "regexp" + "strconv" "strings" "sync" "time" @@ -269,6 +273,10 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err htmlContent := sanitizeTelegramHTML(markdownToTelegramHTML(msg.Content)) + if strings.TrimSpace(msg.Media) != "" { + return c.sendMedia(ctx, chatIDInt, msg, htmlContent) + } + var markup *telego.InlineKeyboardMarkup if len(msg.Buttons) > 0 { var rows [][]telego.InlineKeyboardButton @@ -334,6 +342,95 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err return nil } +func (c *TelegramChannel) sendMedia(ctx context.Context, chatID int64, msg bus.OutboundMessage, htmlCaption string) error { + media := strings.TrimSpace(msg.Media) + if media == "" { + return fmt.Errorf("empty media") + } + + method := "sendDocument" + field := "document" + lower := strings.ToLower(media) + if strings.HasSuffix(lower, ".jpg") || strings.HasSuffix(lower, ".jpeg") || strings.HasSuffix(lower, ".png") || strings.HasSuffix(lower, ".webp") || strings.HasSuffix(lower, ".gif") { + method = "sendPhoto" + field = "photo" + } + + replyID, hasReply := parseTelegramMessageID(msg.ReplyToID) + apiURL := fmt.Sprintf("https://api.telegram.org/bot%s/%s", c.config.Token, method) + + if strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://") { + vals := url.Values{} + vals.Set("chat_id", strconv.FormatInt(chatID, 10)) + vals.Set(field, media) + if strings.TrimSpace(htmlCaption) != "" { + vals.Set("caption", htmlCaption) + vals.Set("parse_mode", "HTML") + } + if hasReply { + vals.Set("reply_to_message_id", strconv.Itoa(replyID)) + } + req, err := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, strings.NewReader(vals.Encode())) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode >= 300 { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return fmt.Errorf("telegram media send failed: status=%d body=%s", resp.StatusCode, string(body)) + } + return nil + } + + f, err := os.Open(media) + if err != nil { + return err + } + defer f.Close() + + var b bytes.Buffer + w := multipart.NewWriter(&b) + _ = w.WriteField("chat_id", strconv.FormatInt(chatID, 10)) + if strings.TrimSpace(htmlCaption) != "" { + _ = w.WriteField("caption", htmlCaption) + _ = w.WriteField("parse_mode", "HTML") + } + if hasReply { + _ = w.WriteField("reply_to_message_id", strconv.Itoa(replyID)) + } + part, err := w.CreateFormFile(field, filepath.Base(media)) + if err != nil { + return err + } + if _, err := io.Copy(part, f); err != nil { + return err + } + if err := w.Close(); err != nil { + return err + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, &b) + if err != nil { + return err + } + req.Header.Set("Content-Type", w.FormDataContentType()) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode >= 300 { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return fmt.Errorf("telegram media send failed: status=%d body=%s", resp.StatusCode, string(body)) + } + return nil +} + func (c *TelegramChannel) isAllowedChat(chatID int64, chatType string) bool { // Private chats are governed by allow_from (sender allowlist), not allow_chats. if strings.TrimSpace(chatType) == telego.ChatTypePrivate { diff --git a/pkg/tools/message.go b/pkg/tools/message.go index 9b23328..bc154c2 100644 --- a/pkg/tools/message.go +++ b/pkg/tools/message.go @@ -9,7 +9,7 @@ import ( "clawgo/pkg/bus" ) -type SendCallback func(channel, chatID, action, content, messageID, emoji string, buttons [][]bus.Button) error +type SendCallback func(channel, chatID, action, content, media, messageID, emoji string, buttons [][]bus.Button) error type MessageTool struct { sendCallback SendCallback @@ -59,6 +59,22 @@ func (t *MessageTool) Parameters() map[string]interface{} { "type": "string", "description": "Optional: target chat/user ID", }, + "media": map[string]interface{}{ + "type": "string", + "description": "Optional media path or URL for action=send", + }, + "path": map[string]interface{}{ + "type": "string", + "description": "Alias of media", + }, + "file_path": map[string]interface{}{ + "type": "string", + "description": "Alias of media", + }, + "filePath": map[string]interface{}{ + "type": "string", + "description": "Alias of media", + }, "message_id": map[string]interface{}{ "type": "string", "description": "Target message id for edit/delete/react", @@ -106,13 +122,29 @@ func (t *MessageTool) Execute(ctx context.Context, args map[string]interface{}) if msg, _ := args["message"].(string); msg != "" { content = msg } + media, _ := args["media"].(string) + if media == "" { + if p, _ := args["path"].(string); p != "" { + media = p + } + } + if media == "" { + if p, _ := args["file_path"].(string); p != "" { + media = p + } + } + if media == "" { + if p, _ := args["filePath"].(string); p != "" { + media = p + } + } messageID, _ := args["message_id"].(string) emoji, _ := args["emoji"].(string) switch action { case "send": - if content == "" { - return "", fmt.Errorf("%w: message/content for action=send", ErrMissingField) + if content == "" && media == "" { + return "", fmt.Errorf("%w: message/content or media for action=send", ErrMissingField) } case "edit": if messageID == "" || content == "" { @@ -175,7 +207,7 @@ func (t *MessageTool) Execute(ctx context.Context, args map[string]interface{}) } } - if err := t.sendCallback(channel, chatID, action, content, messageID, emoji, buttons); err != nil { + if err := t.sendCallback(channel, chatID, action, content, media, messageID, emoji, buttons); err != nil { return fmt.Sprintf("Error sending message: %v", err), nil }