refactor(responses): remove chat_completions and wire responses features end-to-end

This commit is contained in:
lpf
2026-03-04 10:25:50 +08:00
parent d42b6a6f10
commit 6c78ef3b1c
14 changed files with 726 additions and 486 deletions

View File

@@ -777,17 +777,7 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage)
})
toolDefs := al.tools.GetDefinitions()
providerToolDefs := make([]providers.ToolDefinition, 0, len(toolDefs))
for _, td := range toolDefs {
providerToolDefs = append(providerToolDefs, providers.ToolDefinition{
Type: td["type"].(string),
Function: providers.ToolFunctionDefinition{
Name: td["function"].(map[string]interface{})["name"].(string),
Description: td["function"].(map[string]interface{})["description"].(string),
Parameters: td["function"].(map[string]interface{})["parameters"].(map[string]interface{}),
},
})
}
providerToolDefs := al.buildProviderToolDefs(toolDefs)
// Log LLM request details
logger.DebugCF("agent", logger.C0152,
@@ -809,7 +799,8 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage)
"tools_json": formatToolsForLog(providerToolDefs),
})
options := map[string]interface{}{"max_tokens": 8192, "temperature": 0.7}
messages = injectResponsesMediaParts(messages, msg.Media, msg.MediaItems)
options := buildResponsesOptions(8192, 0.7)
var response *providers.LLMResponse
var err error
if msg.Channel == "telegram" && strings.TrimSpace(os.Getenv("CLAWGO_TELEGRAM_STREAMING")) == "1" {
@@ -850,11 +841,6 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage)
}
}
if err != nil {
errText := strings.ToLower(err.Error())
if strings.Contains(errText, "no tool call found for function call output") {
removed := al.sessions.PurgeOrphanToolOutputs(msg.SessionKey)
logger.WarnCF("agent", logger.C0154, map[string]interface{}{"session_key": msg.SessionKey, "removed": removed})
}
logger.ErrorCF("agent", logger.C0155,
map[string]interface{}{
"iteration": iteration,
@@ -1150,17 +1136,7 @@ func (al *AgentLoop) processSystemMessage(ctx context.Context, msg bus.InboundMe
iteration++
toolDefs := al.tools.GetDefinitions()
providerToolDefs := make([]providers.ToolDefinition, 0, len(toolDefs))
for _, td := range toolDefs {
providerToolDefs = append(providerToolDefs, providers.ToolDefinition{
Type: td["type"].(string),
Function: providers.ToolFunctionDefinition{
Name: td["function"].(map[string]interface{})["name"].(string),
Description: td["function"].(map[string]interface{})["description"].(string),
Parameters: td["function"].(map[string]interface{})["parameters"].(map[string]interface{}),
},
})
}
providerToolDefs := al.buildProviderToolDefs(toolDefs)
// Log LLM request details
logger.DebugCF("agent", logger.C0152,
@@ -1182,10 +1158,7 @@ func (al *AgentLoop) processSystemMessage(ctx context.Context, msg bus.InboundMe
"tools_json": formatToolsForLog(providerToolDefs),
})
options := map[string]interface{}{
"max_tokens": 8192,
"temperature": 0.7,
}
options := buildResponsesOptions(8192, 0.7)
response, err := al.provider.Chat(ctx, messages, providerToolDefs, al.model, options)
if err != nil {
@@ -1200,40 +1173,6 @@ func (al *AgentLoop) processSystemMessage(ctx context.Context, msg bus.InboundMe
}
}
if err != nil {
errText := strings.ToLower(err.Error())
if strings.Contains(errText, "no tool call found for function call output") {
removed := al.sessions.PurgeOrphanToolOutputs(sessionKey)
logger.WarnCF("agent", logger.C0160, map[string]interface{}{"session_key": sessionKey, "removed": removed})
if removed > 0 {
// Rebuild context from cleaned history and retry current iteration.
history = al.sessions.GetHistory(sessionKey)
summary = al.sessions.GetSummary(sessionKey)
messages = al.contextBuilder.BuildMessages(
history,
summary,
msg.Content,
nil,
originChannel,
originChatID,
responseLang,
)
continue
}
if strings.ToLower(strings.TrimSpace(msg.Metadata["trigger"])) == "heartbeat" {
al.sessions.ResetSession(sessionKey)
messages = al.contextBuilder.BuildMessages(
[]providers.Message{},
"",
msg.Content,
nil,
originChannel,
originChatID,
responseLang,
)
logger.WarnCF("agent", logger.C0161, map[string]interface{}{"session_key": sessionKey})
continue
}
}
logger.ErrorCF("agent", logger.C0162,
map[string]interface{}{
"iteration": iteration,
@@ -1325,6 +1264,155 @@ func truncate(s string, maxLen int) string {
return s[:maxLen-3] + "..."
}
func (al *AgentLoop) buildProviderToolDefs(toolDefs []map[string]interface{}) []providers.ToolDefinition {
providerToolDefs := make([]providers.ToolDefinition, 0, len(toolDefs))
for _, td := range toolDefs {
fnRaw, ok := td["function"].(map[string]interface{})
if !ok {
continue
}
name, _ := fnRaw["name"].(string)
description, _ := fnRaw["description"].(string)
params, _ := fnRaw["parameters"].(map[string]interface{})
if strings.TrimSpace(name) == "" {
continue
}
if params == nil {
params = map[string]interface{}{}
}
providerToolDefs = append(providerToolDefs, providers.ToolDefinition{
Type: "function",
Function: providers.ToolFunctionDefinition{
Name: name,
Description: description,
Parameters: params,
},
})
}
return providerToolDefs
}
func buildResponsesOptions(maxTokens int64, temperature float64) map[string]interface{} {
options := map[string]interface{}{
"max_tokens": maxTokens,
"temperature": temperature,
}
responseTools := make([]map[string]interface{}, 0, 2)
if strings.TrimSpace(os.Getenv("CLAWGO_RESPONSES_WEB_SEARCH")) == "1" {
webTool := map[string]interface{}{"type": "web_search"}
if contextSize := strings.TrimSpace(os.Getenv("CLAWGO_RESPONSES_WEB_SEARCH_CONTEXT_SIZE")); contextSize != "" {
webTool["search_context_size"] = contextSize
}
responseTools = append(responseTools, webTool)
}
if idsRaw := strings.TrimSpace(os.Getenv("CLAWGO_RESPONSES_FILE_SEARCH_VECTOR_STORE_IDS")); idsRaw != "" {
ids := splitCommaList(idsRaw)
if len(ids) > 0 {
fileSearch := map[string]interface{}{
"type": "file_search",
"vector_store_ids": ids,
}
if maxNumRaw := strings.TrimSpace(os.Getenv("CLAWGO_RESPONSES_FILE_SEARCH_MAX_NUM_RESULTS")); maxNumRaw != "" {
if n, err := strconv.Atoi(maxNumRaw); err == nil && n > 0 {
fileSearch["max_num_results"] = n
}
}
responseTools = append(responseTools, fileSearch)
}
}
if len(responseTools) > 0 {
options["responses_tools"] = responseTools
}
if include := splitCommaList(strings.TrimSpace(os.Getenv("CLAWGO_RESPONSES_INCLUDE"))); len(include) > 0 {
options["responses_include"] = include
}
if strings.TrimSpace(os.Getenv("CLAWGO_RESPONSES_STREAM_INCLUDE_USAGE")) == "1" {
options["responses_stream_options"] = map[string]interface{}{"include_usage": true}
}
return options
}
func injectResponsesMediaParts(messages []providers.Message, media []string, mediaItems []bus.MediaItem) []providers.Message {
if len(messages) == 0 || (len(media) == 0 && len(mediaItems) == 0) {
return messages
}
last := len(messages) - 1
if strings.ToLower(strings.TrimSpace(messages[last].Role)) != "user" {
return messages
}
parts := make([]providers.MessageContentPart, 0, 1+len(media)+len(mediaItems))
if strings.TrimSpace(messages[last].Content) != "" {
parts = append(parts, providers.MessageContentPart{
Type: "input_text",
Text: messages[last].Content,
})
}
for _, ref := range media {
ref = strings.TrimSpace(ref)
if ref == "" {
continue
}
parts = append(parts, providers.MessageContentPart{
Type: "input_image",
ImageURL: ref,
})
}
for _, item := range mediaItems {
typ := strings.ToLower(strings.TrimSpace(item.Type))
ref := strings.TrimSpace(item.Ref)
path := strings.TrimSpace(item.Path)
src := ref
if src == "" {
src = path
}
switch {
case strings.Contains(typ, "image"):
part := providers.MessageContentPart{Type: "input_image"}
if strings.HasPrefix(src, "file_") {
part.FileID = src
} else {
part.ImageURL = src
}
if part.FileID != "" || part.ImageURL != "" {
parts = append(parts, part)
}
case strings.Contains(typ, "file"), strings.Contains(typ, "document"), strings.Contains(typ, "audio"), strings.Contains(typ, "video"):
part := providers.MessageContentPart{Type: "input_file"}
if strings.HasPrefix(src, "file_") {
part.FileID = src
} else {
part.FileURL = src
}
if part.FileID != "" || part.FileURL != "" {
parts = append(parts, part)
}
}
}
if len(parts) == 0 {
return messages
}
messages[last].ContentParts = parts
return messages
}
func splitCommaList(raw string) []string {
if strings.TrimSpace(raw) == "" {
return nil
}
parts := strings.Split(raw, ",")
out := make([]string, 0, len(parts))
for _, p := range parts {
if s := strings.TrimSpace(p); s != "" {
out = append(out, s)
}
}
return out
}
// GetStartupInfo returns information about loaded tools and skills for logging.
func (al *AgentLoop) compactSessionIfNeeded(sessionKey string) {
if !al.compactionEnabled {

View File

@@ -0,0 +1,78 @@
package agent
import (
"os"
"testing"
"clawgo/pkg/bus"
"clawgo/pkg/providers"
)
func TestInjectResponsesMediaParts(t *testing.T) {
msgs := []providers.Message{
{Role: "system", Content: "sys"},
{Role: "user", Content: "look"},
}
media := []string{"https://example.com/a.png"}
items := []bus.MediaItem{
{Type: "image", Ref: "file_img_1"},
{Type: "document", Ref: "file_doc_1"},
}
got := injectResponsesMediaParts(msgs, media, items)
if len(got) != 2 {
t.Fatalf("unexpected messages length: %#v", got)
}
parts := got[1].ContentParts
if len(parts) != 4 {
t.Fatalf("expected 4 content parts, got %#v", parts)
}
if parts[0].Type != "input_text" || parts[0].Text != "look" {
t.Fatalf("expected first part to preserve input text, got %#v", parts[0])
}
if parts[1].Type != "input_image" || parts[1].ImageURL == "" {
t.Fatalf("expected media URL as input_image, got %#v", parts[1])
}
if parts[2].Type != "input_image" || parts[2].FileID != "file_img_1" {
t.Fatalf("expected image media item mapped to file_id, got %#v", parts[2])
}
if parts[3].Type != "input_file" || parts[3].FileID != "file_doc_1" {
t.Fatalf("expected document media item mapped to input_file file_id, got %#v", parts[3])
}
}
func TestBuildResponsesOptionsFromEnv(t *testing.T) {
t.Setenv("CLAWGO_RESPONSES_WEB_SEARCH", "1")
t.Setenv("CLAWGO_RESPONSES_WEB_SEARCH_CONTEXT_SIZE", "high")
t.Setenv("CLAWGO_RESPONSES_FILE_SEARCH_VECTOR_STORE_IDS", "vs_1,vs_2")
t.Setenv("CLAWGO_RESPONSES_FILE_SEARCH_MAX_NUM_RESULTS", "8")
t.Setenv("CLAWGO_RESPONSES_INCLUDE", "output_text,tool_calls")
t.Setenv("CLAWGO_RESPONSES_STREAM_INCLUDE_USAGE", "1")
opts := buildResponsesOptions(1024, 0.2)
if opts["max_tokens"] != int64(1024) {
t.Fatalf("max_tokens mismatch: %#v", opts["max_tokens"])
}
if opts["temperature"] != 0.2 {
t.Fatalf("temperature mismatch: %#v", opts["temperature"])
}
toolsRaw, ok := opts["responses_tools"].([]map[string]interface{})
if !ok || len(toolsRaw) != 2 {
t.Fatalf("expected two built-in response tools, got %#v", opts["responses_tools"])
}
if toolsRaw[0]["type"] != "web_search" {
t.Fatalf("expected web_search tool first, got %#v", toolsRaw[0])
}
if toolsRaw[1]["type"] != "file_search" {
t.Fatalf("expected file_search tool second, got %#v", toolsRaw[1])
}
if _, ok := opts["responses_include"]; !ok {
t.Fatalf("expected responses_include in options")
}
if _, ok := opts["responses_stream_options"]; !ok {
t.Fatalf("expected responses_stream_options in options")
}
// keep linter happy for unused os import when build tags differ
_ = os.Getenv("CLAWGO_RESPONSES_WEB_SEARCH")
}

View File

@@ -29,6 +29,8 @@ const (
telegramAPICallTimeout = 15 * time.Second
telegramMaxConcurrentHandlers = 32
telegramStopWaitHandlersPeriod = 5 * time.Second
telegramSafeHTMLMaxRunes = 3500
telegramStreamPreviewMaxRunes = 3200
)
type TelegramChannel struct {
@@ -42,11 +44,18 @@ type TelegramChannel struct {
handleSem chan struct{}
handleWG sync.WaitGroup
botUsername string
streamMu sync.Mutex
streamState map[string]telegramStreamState
}
type telegramStreamState struct {
MessageID int
LastContent string
}
func (c *TelegramChannel) SupportsAction(action string) bool {
switch strings.ToLower(strings.TrimSpace(action)) {
case "", "send", "edit", "delete", "react":
case "", "send", "edit", "delete", "react", "stream":
return true
default:
return false
@@ -67,6 +76,7 @@ func NewTelegramChannel(cfg config.TelegramConfig, bus *bus.MessageBus) (*Telegr
config: cfg,
chatIDs: make(map[string]int64),
handleSem: make(chan struct{}, telegramMaxConcurrentHandlers),
streamState: make(map[string]telegramStreamState),
}, nil
}
@@ -252,6 +262,7 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err
if action != "send" {
return c.handleAction(ctx, chatIDInt, action, msg)
}
streamKey := telegramStreamKey(chatIDInt, msg.ReplyToID)
htmlContent := sanitizeTelegramHTML(markdownToTelegramHTML(msg.Content))
@@ -292,6 +303,9 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err
return err
}
}
c.streamMu.Lock()
delete(c.streamState, streamKey)
c.streamMu.Unlock()
return nil
}
@@ -325,8 +339,14 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err
return err
}
}
c.streamMu.Lock()
delete(c.streamState, streamKey)
c.streamMu.Unlock()
return nil
}
c.streamMu.Lock()
delete(c.streamState, streamKey)
c.streamMu.Unlock()
return nil
}
@@ -730,21 +750,132 @@ func parseChatID(chatIDStr string) (int64, error) {
return id, err
}
func telegramStreamKey(chatID int64, replyToID string) string {
return fmt.Sprintf("%d:%s", chatID, strings.TrimSpace(replyToID))
}
func tailRunes(s string, maxRunes int) string {
if maxRunes <= 0 {
return s
}
r := []rune(s)
if len(r) <= maxRunes {
return s
}
return "…\n" + string(r[len(r)-maxRunes:])
}
func clampTelegramHTML(markdown string, maxRunes int) string {
if maxRunes <= 0 {
maxRunes = telegramSafeHTMLMaxRunes
}
htmlContent := sanitizeTelegramHTML(markdownToTelegramHTML(markdown))
if len([]rune(htmlContent)) <= maxRunes {
return htmlContent
}
chunks := splitTelegramMarkdown(markdown, maxRunes-400)
if len(chunks) == 0 {
return ""
}
return sanitizeTelegramHTML(markdownToTelegramHTML(chunks[0]))
}
func (c *TelegramChannel) handleStreamAction(ctx context.Context, chatID int64, msg bus.OutboundMessage) error {
streamKey := telegramStreamKey(chatID, msg.ReplyToID)
content := tailRunes(msg.Content, telegramStreamPreviewMaxRunes)
htmlContent := clampTelegramHTML(content, telegramSafeHTMLMaxRunes)
if strings.TrimSpace(htmlContent) == "" {
return nil
}
c.streamMu.Lock()
state := c.streamState[streamKey]
if state.LastContent == htmlContent {
c.streamMu.Unlock()
return nil
}
c.streamMu.Unlock()
if state.MessageID == 0 {
sendParams := telegoutil.Message(telegoutil.ID(chatID), htmlContent).WithParseMode(telego.ModeHTML)
if replyID, ok := parseTelegramMessageID(msg.ReplyToID); ok {
sendParams.ReplyParameters = &telego.ReplyParameters{MessageID: replyID}
}
sendCtx, cancel := withTelegramAPITimeout(ctx)
sent, err := c.bot.SendMessage(sendCtx, sendParams)
cancel()
if err != nil {
plain := tailRunes(plainTextFromTelegramHTML(htmlContent), telegramSafeHTMLMaxRunes)
if strings.TrimSpace(plain) == "" {
return err
}
sendPlainCtx, cancelPlain := withTelegramAPITimeout(ctx)
sent, err = c.bot.SendMessage(sendPlainCtx, telegoutil.Message(telegoutil.ID(chatID), plain))
cancelPlain()
if err != nil {
return err
}
}
c.streamMu.Lock()
c.streamState[streamKey] = telegramStreamState{MessageID: sent.MessageID, LastContent: htmlContent}
c.streamMu.Unlock()
return nil
}
editCtx, cancel := withTelegramAPITimeout(ctx)
_, err := c.bot.EditMessageText(editCtx, &telego.EditMessageTextParams{
ChatID: telegoutil.ID(chatID),
MessageID: state.MessageID,
Text: htmlContent,
ParseMode: telego.ModeHTML,
})
cancel()
if err != nil {
errText := strings.ToLower(err.Error())
if strings.Contains(errText, "message is not modified") {
return nil
}
if strings.Contains(errText, "message is too long") || strings.Contains(errText, "can't parse entities") {
plain := tailRunes(plainTextFromTelegramHTML(htmlContent), telegramSafeHTMLMaxRunes)
if strings.TrimSpace(plain) == "" {
return err
}
editPlainCtx, cancelPlain := withTelegramAPITimeout(ctx)
_, err = c.bot.EditMessageText(editPlainCtx, &telego.EditMessageTextParams{
ChatID: telegoutil.ID(chatID),
MessageID: state.MessageID,
Text: plain,
})
cancelPlain()
if err != nil && !strings.Contains(strings.ToLower(err.Error()), "message is not modified") {
return err
}
} else {
return err
}
}
c.streamMu.Lock()
state.LastContent = htmlContent
c.streamState[streamKey] = state
c.streamMu.Unlock()
return nil
}
func (c *TelegramChannel) handleAction(ctx context.Context, chatID int64, action string, msg bus.OutboundMessage) error {
messageID, ok := parseTelegramMessageID(msg.MessageID)
if !ok && action != "send" {
if !ok && action != "send" && action != "stream" {
return fmt.Errorf("message_id required for action=%s", action)
}
switch action {
case "edit":
htmlContent := sanitizeTelegramHTML(markdownToTelegramHTML(msg.Content))
if len([]rune(htmlContent)) > 3500 {
htmlContent = sanitizeTelegramHTML(markdownToTelegramHTML(splitTelegramMarkdown(msg.Content, 3000)[0]))
}
htmlContent := clampTelegramHTML(msg.Content, telegramSafeHTMLMaxRunes)
editCtx, cancel := withTelegramAPITimeout(ctx)
defer cancel()
_, err := c.bot.EditMessageText(editCtx, &telego.EditMessageTextParams{ChatID: telegoutil.ID(chatID), MessageID: messageID, Text: htmlContent, ParseMode: telego.ModeHTML})
return err
case "stream":
return c.handleStreamAction(ctx, chatID, msg)
case "delete":
delCtx, cancel := withTelegramAPITimeout(ctx)
defer cancel()

View File

@@ -45,26 +45,26 @@ type AgentDefaults struct {
}
type AutonomyConfig struct {
Enabled bool `json:"enabled" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_ENABLED"`
TickIntervalSec int `json:"tick_interval_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_TICK_INTERVAL_SEC"`
MinRunIntervalSec int `json:"min_run_interval_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_MIN_RUN_INTERVAL_SEC"`
MaxPendingDurationSec int `json:"max_pending_duration_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_MAX_PENDING_DURATION_SEC"`
MaxConsecutiveStalls int `json:"max_consecutive_stalls" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_MAX_CONSECUTIVE_STALLS"`
MaxDispatchPerTick int `json:"max_dispatch_per_tick" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_MAX_DISPATCH_PER_TICK"`
NotifyCooldownSec int `json:"notify_cooldown_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_NOTIFY_COOLDOWN_SEC"`
NotifySameReasonCooldownSec int `json:"notify_same_reason_cooldown_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_NOTIFY_SAME_REASON_COOLDOWN_SEC"`
QuietHours string `json:"quiet_hours" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_QUIET_HOURS"`
UserIdleResumeSec int `json:"user_idle_resume_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_USER_IDLE_RESUME_SEC"`
MaxRoundsWithoutUser int `json:"max_rounds_without_user" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_MAX_ROUNDS_WITHOUT_USER"`
TaskHistoryRetentionDays int `json:"task_history_retention_days" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_TASK_HISTORY_RETENTION_DAYS"`
WaitingResumeDebounceSec int `json:"waiting_resume_debounce_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_WAITING_RESUME_DEBOUNCE_SEC"`
IdleRoundBudgetReleaseSec int `json:"idle_round_budget_release_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_IDLE_ROUND_BUDGET_RELEASE_SEC"`
AllowedTaskKeywords []string `json:"allowed_task_keywords" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_ALLOWED_TASK_KEYWORDS"`
EKGConsecutiveErrorThreshold int `json:"ekg_consecutive_error_threshold" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_EKG_CONSECUTIVE_ERROR_THRESHOLD"`
Enabled bool `json:"enabled" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_ENABLED"`
TickIntervalSec int `json:"tick_interval_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_TICK_INTERVAL_SEC"`
MinRunIntervalSec int `json:"min_run_interval_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_MIN_RUN_INTERVAL_SEC"`
MaxPendingDurationSec int `json:"max_pending_duration_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_MAX_PENDING_DURATION_SEC"`
MaxConsecutiveStalls int `json:"max_consecutive_stalls" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_MAX_CONSECUTIVE_STALLS"`
MaxDispatchPerTick int `json:"max_dispatch_per_tick" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_MAX_DISPATCH_PER_TICK"`
NotifyCooldownSec int `json:"notify_cooldown_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_NOTIFY_COOLDOWN_SEC"`
NotifySameReasonCooldownSec int `json:"notify_same_reason_cooldown_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_NOTIFY_SAME_REASON_COOLDOWN_SEC"`
QuietHours string `json:"quiet_hours" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_QUIET_HOURS"`
UserIdleResumeSec int `json:"user_idle_resume_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_USER_IDLE_RESUME_SEC"`
MaxRoundsWithoutUser int `json:"max_rounds_without_user" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_MAX_ROUNDS_WITHOUT_USER"`
TaskHistoryRetentionDays int `json:"task_history_retention_days" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_TASK_HISTORY_RETENTION_DAYS"`
WaitingResumeDebounceSec int `json:"waiting_resume_debounce_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_WAITING_RESUME_DEBOUNCE_SEC"`
IdleRoundBudgetReleaseSec int `json:"idle_round_budget_release_sec" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_IDLE_ROUND_BUDGET_RELEASE_SEC"`
AllowedTaskKeywords []string `json:"allowed_task_keywords" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_ALLOWED_TASK_KEYWORDS"`
EKGConsecutiveErrorThreshold int `json:"ekg_consecutive_error_threshold" env:"CLAWGO_AGENTS_DEFAULTS_AUTONOMY_EKG_CONSECUTIVE_ERROR_THRESHOLD"`
// Deprecated: kept for backward compatibility with existing config files.
NotifyChannel string `json:"notify_channel,omitempty"`
NotifyChannel string `json:"notify_channel,omitempty"`
// Deprecated: kept for backward compatibility with existing config files.
NotifyChatID string `json:"notify_chat_id,omitempty"`
NotifyChatID string `json:"notify_chat_id,omitempty"`
}
type AgentTextConfig struct {
@@ -128,9 +128,9 @@ type ContextCompactionConfig struct {
}
type ChannelsConfig struct {
InboundMessageIDDedupeTTLSeconds int `json:"inbound_message_id_dedupe_ttl_seconds" env:"CLAWGO_CHANNELS_INBOUND_MESSAGE_ID_DEDUPE_TTL_SECONDS"`
InboundContentDedupeWindowSeconds int `json:"inbound_content_dedupe_window_seconds" env:"CLAWGO_CHANNELS_INBOUND_CONTENT_DEDUPE_WINDOW_SECONDS"`
OutboundDedupeWindowSeconds int `json:"outbound_dedupe_window_seconds" env:"CLAWGO_CHANNELS_OUTBOUND_DEDUPE_WINDOW_SECONDS"`
InboundMessageIDDedupeTTLSeconds int `json:"inbound_message_id_dedupe_ttl_seconds" env:"CLAWGO_CHANNELS_INBOUND_MESSAGE_ID_DEDUPE_TTL_SECONDS"`
InboundContentDedupeWindowSeconds int `json:"inbound_content_dedupe_window_seconds" env:"CLAWGO_CHANNELS_INBOUND_CONTENT_DEDUPE_WINDOW_SECONDS"`
OutboundDedupeWindowSeconds int `json:"outbound_dedupe_window_seconds" env:"CLAWGO_CHANNELS_OUTBOUND_DEDUPE_WINDOW_SECONDS"`
WhatsApp WhatsAppConfig `json:"whatsapp"`
Telegram TelegramConfig `json:"telegram"`
Feishu FeishuConfig `json:"feishu"`
@@ -156,15 +156,15 @@ type TelegramConfig struct {
}
type FeishuConfig struct {
Enabled bool `json:"enabled" env:"CLAWGO_CHANNELS_FEISHU_ENABLED"`
AppID string `json:"app_id" env:"CLAWGO_CHANNELS_FEISHU_APP_ID"`
AppSecret string `json:"app_secret" env:"CLAWGO_CHANNELS_FEISHU_APP_SECRET"`
EncryptKey string `json:"encrypt_key" env:"CLAWGO_CHANNELS_FEISHU_ENCRYPT_KEY"`
VerificationToken string `json:"verification_token" env:"CLAWGO_CHANNELS_FEISHU_VERIFICATION_TOKEN"`
AllowFrom []string `json:"allow_from" env:"CLAWGO_CHANNELS_FEISHU_ALLOW_FROM"`
AllowChats []string `json:"allow_chats" env:"CLAWGO_CHANNELS_FEISHU_ALLOW_CHATS"`
EnableGroups bool `json:"enable_groups" env:"CLAWGO_CHANNELS_FEISHU_ENABLE_GROUPS"`
RequireMentionInGroups bool `json:"require_mention_in_groups" env:"CLAWGO_CHANNELS_FEISHU_REQUIRE_MENTION_IN_GROUPS"`
Enabled bool `json:"enabled" env:"CLAWGO_CHANNELS_FEISHU_ENABLED"`
AppID string `json:"app_id" env:"CLAWGO_CHANNELS_FEISHU_APP_ID"`
AppSecret string `json:"app_secret" env:"CLAWGO_CHANNELS_FEISHU_APP_SECRET"`
EncryptKey string `json:"encrypt_key" env:"CLAWGO_CHANNELS_FEISHU_ENCRYPT_KEY"`
VerificationToken string `json:"verification_token" env:"CLAWGO_CHANNELS_FEISHU_VERIFICATION_TOKEN"`
AllowFrom []string `json:"allow_from" env:"CLAWGO_CHANNELS_FEISHU_ALLOW_FROM"`
AllowChats []string `json:"allow_chats" env:"CLAWGO_CHANNELS_FEISHU_ALLOW_CHATS"`
EnableGroups bool `json:"enable_groups" env:"CLAWGO_CHANNELS_FEISHU_ENABLE_GROUPS"`
RequireMentionInGroups bool `json:"require_mention_in_groups" env:"CLAWGO_CHANNELS_FEISHU_REQUIRE_MENTION_IN_GROUPS"`
}
type DiscordConfig struct {
@@ -245,8 +245,6 @@ func (p *ProvidersConfig) UnmarshalJSON(data []byte) error {
type ProviderConfig struct {
APIKey string `json:"api_key" env:"CLAWGO_PROVIDERS_{{.Name}}_API_KEY"`
APIBase string `json:"api_base" env:"CLAWGO_PROVIDERS_{{.Name}}_API_BASE"`
Protocol string `json:"protocol" env:"CLAWGO_PROVIDERS_{{.Name}}_PROTOCOL"`
CrossSessionCallID bool `json:"cross_session_call_id" env:"CLAWGO_PROVIDERS_{{.Name}}_CROSS_SESSION_CALL_ID"`
Models []string `json:"models" env:"CLAWGO_PROVIDERS_{{.Name}}_MODELS"`
SupportsResponsesCompact bool `json:"supports_responses_compact" env:"CLAWGO_PROVIDERS_{{.Name}}_SUPPORTS_RESPONSES_COMPACT"`
Auth string `json:"auth" env:"CLAWGO_PROVIDERS_{{.Name}}_AUTH"`
@@ -367,21 +365,21 @@ func DefaultConfig() *Config {
PromptTemplate: "Read HEARTBEAT.md if it exists (workspace context). Follow it strictly. Do not infer or repeat old tasks from prior chats. If nothing needs attention, reply HEARTBEAT_OK.",
},
Autonomy: AutonomyConfig{
Enabled: false,
TickIntervalSec: 30,
MinRunIntervalSec: 20,
MaxPendingDurationSec: 180,
MaxConsecutiveStalls: 3,
MaxDispatchPerTick: 2,
NotifyCooldownSec: 300,
NotifySameReasonCooldownSec: 900,
QuietHours: "23:00-08:00",
UserIdleResumeSec: 20,
MaxRoundsWithoutUser: 12,
TaskHistoryRetentionDays: 3,
WaitingResumeDebounceSec: 5,
IdleRoundBudgetReleaseSec: 1800,
AllowedTaskKeywords: []string{},
Enabled: false,
TickIntervalSec: 30,
MinRunIntervalSec: 20,
MaxPendingDurationSec: 180,
MaxConsecutiveStalls: 3,
MaxDispatchPerTick: 2,
NotifyCooldownSec: 300,
NotifySameReasonCooldownSec: 900,
QuietHours: "23:00-08:00",
UserIdleResumeSec: 20,
MaxRoundsWithoutUser: 12,
TaskHistoryRetentionDays: 3,
WaitingResumeDebounceSec: 5,
IdleRoundBudgetReleaseSec: 1800,
AllowedTaskKeywords: []string{},
EKGConsecutiveErrorThreshold: 3,
},
Texts: AgentTextConfig{
@@ -435,7 +433,7 @@ func DefaultConfig() *Config {
},
},
Channels: ChannelsConfig{
InboundMessageIDDedupeTTLSeconds: 600,
InboundMessageIDDedupeTTLSeconds: 600,
InboundContentDedupeWindowSeconds: 12,
OutboundDedupeWindowSeconds: 12,
WhatsApp: WhatsAppConfig{
@@ -489,7 +487,6 @@ func DefaultConfig() *Config {
Providers: ProvidersConfig{
Proxy: ProviderConfig{
APIBase: "http://localhost:8080/v1",
Protocol: "chat_completions",
Models: []string{"glm-4.7"},
TimeoutSec: 90,
},

View File

@@ -208,8 +208,8 @@ func Validate(cfg *Config) []error {
if active == "" {
active = "proxy"
}
if pc, ok := providerConfigByName(cfg, active); !ok || !pc.SupportsResponsesCompact || pc.Protocol != "responses" {
errs = append(errs, fmt.Errorf("context_compaction.mode=responses_compact requires active proxy %q with protocol=responses and supports_responses_compact=true", active))
if pc, ok := providerConfigByName(cfg, active); !ok || !pc.SupportsResponsesCompact {
errs = append(errs, fmt.Errorf("context_compaction.mode=responses_compact requires active proxy %q with supports_responses_compact=true", active))
}
}
@@ -314,16 +314,6 @@ func validateProviderConfig(path string, p ProviderConfig) []error {
if p.APIBase == "" {
errs = append(errs, fmt.Errorf("%s.api_base is required", path))
}
if p.Protocol != "" {
switch p.Protocol {
case "chat_completions", "responses":
default:
errs = append(errs, fmt.Errorf("%s.protocol must be one of: chat_completions, responses", path))
}
}
if p.SupportsResponsesCompact && p.Protocol != "responses" {
errs = append(errs, fmt.Errorf("%s.supports_responses_compact=true requires protocol=responses", path))
}
if p.TimeoutSec <= 0 {
errs = append(errs, fmt.Errorf("%s.timeout_sec must be > 0", path))
}

View File

@@ -16,31 +16,22 @@ import (
"time"
)
const (
ProtocolChatCompletions = "chat_completions"
ProtocolResponses = "responses"
)
type HTTPProvider struct {
apiKey string
apiBase string
protocol string
defaultModel string
crossSessionCallID bool
supportsResponsesCompact bool
authMode string
timeout time.Duration
httpClient *http.Client
}
func NewHTTPProvider(apiKey, apiBase, protocol, defaultModel string, crossSessionCallID bool, supportsResponsesCompact bool, authMode string, timeout time.Duration) *HTTPProvider {
func NewHTTPProvider(apiKey, apiBase, defaultModel string, supportsResponsesCompact bool, authMode string, timeout time.Duration) *HTTPProvider {
normalizedBase := normalizeAPIBase(apiBase)
return &HTTPProvider{
apiKey: apiKey,
apiBase: normalizedBase,
protocol: normalizeProtocol(protocol),
defaultModel: strings.TrimSpace(defaultModel),
crossSessionCallID: crossSessionCallID,
supportsResponsesCompact: supportsResponsesCompact,
authMode: authMode,
timeout: timeout,
@@ -55,39 +46,24 @@ func (p *HTTPProvider) Chat(ctx context.Context, messages []Message, tools []Too
logger.DebugCF("provider", logger.C0133, map[string]interface{}{
"api_base": p.apiBase,
"protocol": p.protocol,
"model": model,
"messages_count": len(messages),
"tools_count": len(tools),
"timeout": p.timeout.String(),
})
if p.protocol == ProtocolResponses {
body, statusCode, contentType, err := p.callResponses(ctx, messages, tools, model, options)
if err != nil {
return nil, err
}
if statusCode != http.StatusOK {
preview := previewResponseBody(body)
return nil, fmt.Errorf("API error (status %d, content-type %q): %s", statusCode, contentType, preview)
}
if !json.Valid(body) {
return nil, fmt.Errorf("API error (status %d, content-type %q): non-JSON response: %s", statusCode, contentType, previewResponseBody(body))
}
return parseResponsesAPIResponse(body)
}
body, statusCode, contentType, err := p.callChatCompletions(ctx, messages, tools, model, options)
body, statusCode, contentType, err := p.callResponses(ctx, messages, tools, model, options)
if err != nil {
return nil, err
}
if statusCode != http.StatusOK {
return nil, fmt.Errorf("API error (status %d, content-type %q): %s", statusCode, contentType, previewResponseBody(body))
preview := previewResponseBody(body)
return nil, fmt.Errorf("API error (status %d, content-type %q): %s", statusCode, contentType, preview)
}
if !json.Valid(body) {
return nil, fmt.Errorf("API error (status %d, content-type %q): non-JSON response: %s", statusCode, contentType, previewResponseBody(body))
}
return parseChatCompletionsResponse(body)
return parseResponsesAPIResponse(body)
}
func (p *HTTPProvider) ChatStream(ctx context.Context, messages []Message, tools []ToolDefinition, model string, options map[string]interface{}, onDelta func(string)) (*LLMResponse, error) {
@@ -97,20 +73,7 @@ func (p *HTTPProvider) ChatStream(ctx context.Context, messages []Message, tools
if p.apiBase == "" {
return nil, fmt.Errorf("API base not configured")
}
if p.protocol == ProtocolResponses {
body, status, ctype, err := p.callResponsesStream(ctx, messages, tools, model, options, onDelta)
if err != nil {
return nil, err
}
if status != http.StatusOK {
return nil, fmt.Errorf("API error (status %d, content-type %q): %s", status, ctype, previewResponseBody(body))
}
if !json.Valid(body) {
return nil, fmt.Errorf("API error (status %d, content-type %q): non-JSON response: %s", status, ctype, previewResponseBody(body))
}
return parseResponsesAPIResponse(body)
}
body, status, ctype, err := p.callChatCompletionsStream(ctx, messages, tools, model, options, onDelta)
body, status, ctype, err := p.callResponsesStream(ctx, messages, tools, model, options, onDelta)
if err != nil {
return nil, err
}
@@ -120,52 +83,29 @@ func (p *HTTPProvider) ChatStream(ctx context.Context, messages []Message, tools
if !json.Valid(body) {
return nil, fmt.Errorf("API error (status %d, content-type %q): non-JSON response: %s", status, ctype, previewResponseBody(body))
}
return parseChatCompletionsResponse(body)
}
func (p *HTTPProvider) callChatCompletions(ctx context.Context, messages []Message, tools []ToolDefinition, model string, options map[string]interface{}) ([]byte, int, string, error) {
requestBody := map[string]interface{}{
"model": model,
"messages": toChatCompletionsMessages(messages),
}
if len(tools) > 0 {
requestBody["tools"] = tools
requestBody["tool_choice"] = "auto"
}
if maxTokens, ok := int64FromOption(options, "max_tokens"); ok {
requestBody["max_tokens"] = maxTokens
}
if temperature, ok := float64FromOption(options, "temperature"); ok {
requestBody["temperature"] = temperature
}
return p.postJSON(ctx, endpointFor(p.apiBase, "/chat/completions"), requestBody)
return parseResponsesAPIResponse(body)
}
func (p *HTTPProvider) callResponses(ctx context.Context, messages []Message, tools []ToolDefinition, model string, options map[string]interface{}) ([]byte, int, string, error) {
input := make([]map[string]interface{}, 0, len(messages))
pendingCalls := map[string]struct{}{}
for _, msg := range messages {
input = append(input, toResponsesInputItemsWithState(msg, pendingCalls, p.crossSessionCallID)...)
input = append(input, toResponsesInputItemsWithState(msg, pendingCalls)...)
}
requestBody := map[string]interface{}{
"model": model,
"input": input,
}
if len(tools) > 0 {
responseTools := make([]map[string]interface{}, 0, len(tools))
for _, t := range tools {
entry := map[string]interface{}{
"type": "function",
"name": t.Function.Name,
"parameters": t.Function.Parameters,
}
if strings.TrimSpace(t.Function.Description) != "" {
entry["description"] = t.Function.Description
}
responseTools = append(responseTools, entry)
}
responseTools := buildResponsesTools(tools, options)
if len(responseTools) > 0 {
requestBody["tools"] = responseTools
requestBody["tool_choice"] = "auto"
if tc, ok := rawOption(options, "tool_choice"); ok {
requestBody["tool_choice"] = tc
}
if tc, ok := rawOption(options, "responses_tool_choice"); ok {
requestBody["tool_choice"] = tc
}
}
if maxTokens, ok := int64FromOption(options, "max_tokens"); ok {
requestBody["max_output_tokens"] = maxTokens
@@ -173,82 +113,23 @@ func (p *HTTPProvider) callResponses(ctx context.Context, messages []Message, to
if temperature, ok := float64FromOption(options, "temperature"); ok {
requestBody["temperature"] = temperature
}
if include, ok := stringSliceOption(options, "responses_include"); ok && len(include) > 0 {
requestBody["include"] = include
}
if metadata, ok := mapOption(options, "responses_metadata"); ok && len(metadata) > 0 {
requestBody["metadata"] = metadata
}
if prevID, ok := stringOption(options, "responses_previous_response_id"); ok && prevID != "" {
requestBody["previous_response_id"] = prevID
}
return p.postJSON(ctx, endpointFor(p.apiBase, "/responses"), requestBody)
}
func toChatCompletionsMessages(messages []Message) []map[string]interface{} {
out := make([]map[string]interface{}, 0, len(messages))
for _, msg := range messages {
entry := map[string]interface{}{
"role": msg.Role,
}
content := toChatCompletionsContent(msg)
if len(content) > 0 {
entry["content"] = content
} else {
entry["content"] = msg.Content
}
if len(msg.ToolCalls) > 0 {
entry["tool_calls"] = msg.ToolCalls
}
if strings.TrimSpace(msg.ToolCallID) != "" {
entry["tool_call_id"] = msg.ToolCallID
}
out = append(out, entry)
}
return out
}
func toChatCompletionsContent(msg Message) []map[string]interface{} {
if len(msg.ContentParts) == 0 {
return nil
}
content := make([]map[string]interface{}, 0, len(msg.ContentParts))
for _, part := range msg.ContentParts {
switch strings.ToLower(strings.TrimSpace(part.Type)) {
case "input_text":
if part.Text == "" {
continue
}
content = append(content, map[string]interface{}{
"type": "text",
"text": part.Text,
})
case "input_image":
if part.ImageURL == "" {
continue
}
content = append(content, map[string]interface{}{
"type": "image_url",
"image_url": map[string]interface{}{
"url": part.ImageURL,
},
})
case "input_file":
fileLabel := part.Filename
if fileLabel == "" {
fileLabel = "attached file"
}
mimeType := part.MIMEType
if mimeType == "" {
mimeType = "application/octet-stream"
}
content = append(content, map[string]interface{}{
"type": "text",
"text": fmt.Sprintf("[file attachment: %s, mime=%s]", fileLabel, mimeType),
})
}
}
return content
}
func toResponsesInputItems(msg Message) []map[string]interface{} {
return toResponsesInputItemsWithState(msg, nil, true)
return toResponsesInputItemsWithState(msg, nil)
}
func toResponsesInputItemsWithState(msg Message, pendingCalls map[string]struct{}, crossSessionCallID bool) []map[string]interface{} {
func toResponsesInputItemsWithState(msg Message, pendingCalls map[string]struct{}) []map[string]interface{} {
role := strings.ToLower(strings.TrimSpace(msg.Role))
switch role {
case "system", "developer", "user":
@@ -304,12 +185,6 @@ func toResponsesInputItemsWithState(msg Message, pendingCalls map[string]struct{
}
return items
case "tool":
if !crossSessionCallID {
if strings.TrimSpace(msg.Content) == "" {
return nil
}
return []map[string]interface{}{responsesMessageItem("user", msg.Content, "input_text")}
}
callID := msg.ToolCallID
if callID == "" {
return nil
@@ -335,7 +210,7 @@ func responsesMessageContent(msg Message) []map[string]interface{} {
content := make([]map[string]interface{}, 0, len(msg.ContentParts))
for _, part := range msg.ContentParts {
switch strings.ToLower(strings.TrimSpace(part.Type)) {
case "input_text":
case "input_text", "text":
if part.Text == "" {
continue
}
@@ -343,31 +218,200 @@ func responsesMessageContent(msg Message) []map[string]interface{} {
"type": "input_text",
"text": part.Text,
})
case "input_image":
if part.ImageURL == "" {
continue
}
content = append(content, map[string]interface{}{
"type": "input_image",
"image_url": part.ImageURL,
})
case "input_file":
if part.FileData == "" {
continue
}
case "input_image", "image":
entry := map[string]interface{}{
"type": "input_file",
"file_data": part.FileData,
"type": "input_image",
}
if part.ImageURL != "" {
entry["image_url"] = part.ImageURL
}
if part.FileID != "" {
entry["file_id"] = part.FileID
}
if detail := strings.TrimSpace(part.Detail); detail != "" {
entry["detail"] = detail
}
if _, ok := entry["image_url"]; !ok {
if _, ok := entry["file_id"]; !ok {
continue
}
}
content = append(content, entry)
case "input_file", "file":
entry := map[string]interface{}{
"type": "input_file",
}
if part.FileData != "" {
entry["file_data"] = part.FileData
}
if part.FileID != "" {
entry["file_id"] = part.FileID
}
if part.FileURL != "" {
entry["file_url"] = part.FileURL
}
if part.Filename != "" {
entry["filename"] = part.Filename
}
if _, ok := entry["file_data"]; !ok {
if _, ok := entry["file_id"]; !ok {
if _, ok := entry["file_url"]; !ok {
continue
}
}
}
content = append(content, entry)
}
}
return content
}
func buildResponsesTools(tools []ToolDefinition, options map[string]interface{}) []map[string]interface{} {
responseTools := make([]map[string]interface{}, 0, len(tools)+2)
for _, t := range tools {
typ := strings.ToLower(strings.TrimSpace(t.Type))
if typ == "" {
typ = "function"
}
if typ == "function" {
name := strings.TrimSpace(t.Function.Name)
if name == "" {
name = strings.TrimSpace(t.Name)
}
if name == "" {
continue
}
entry := map[string]interface{}{
"type": "function",
"name": name,
"parameters": map[string]interface{}{},
}
if t.Function.Parameters != nil {
entry["parameters"] = t.Function.Parameters
} else if t.Parameters != nil {
entry["parameters"] = t.Parameters
}
desc := strings.TrimSpace(t.Function.Description)
if desc == "" {
desc = strings.TrimSpace(t.Description)
}
if desc != "" {
entry["description"] = desc
}
if t.Function.Strict != nil {
entry["strict"] = *t.Function.Strict
} else if t.Strict != nil {
entry["strict"] = *t.Strict
}
responseTools = append(responseTools, entry)
continue
}
// Built-in tool types (web_search, file_search, code_interpreter, etc.).
entry := map[string]interface{}{
"type": typ,
}
if name := strings.TrimSpace(t.Name); name != "" {
entry["name"] = name
}
if desc := strings.TrimSpace(t.Description); desc != "" {
entry["description"] = desc
}
if t.Strict != nil {
entry["strict"] = *t.Strict
}
for k, v := range t.Parameters {
entry[k] = v
}
responseTools = append(responseTools, entry)
}
if extraTools, ok := mapSliceOption(options, "responses_tools"); ok {
responseTools = append(responseTools, extraTools...)
}
return responseTools
}
func rawOption(options map[string]interface{}, key string) (interface{}, bool) {
if options == nil {
return nil, false
}
v, ok := options[key]
if !ok || v == nil {
return nil, false
}
return v, true
}
func stringOption(options map[string]interface{}, key string) (string, bool) {
v, ok := rawOption(options, key)
if !ok {
return "", false
}
s, ok := v.(string)
if !ok {
return "", false
}
return strings.TrimSpace(s), true
}
func mapOption(options map[string]interface{}, key string) (map[string]interface{}, bool) {
v, ok := rawOption(options, key)
if !ok {
return nil, false
}
m, ok := v.(map[string]interface{})
return m, ok
}
func stringSliceOption(options map[string]interface{}, key string) ([]string, bool) {
v, ok := rawOption(options, key)
if !ok {
return nil, false
}
switch t := v.(type) {
case []string:
out := make([]string, 0, len(t))
for _, item := range t {
if s := strings.TrimSpace(item); s != "" {
out = append(out, s)
}
}
return out, true
case []interface{}:
out := make([]string, 0, len(t))
for _, item := range t {
s := strings.TrimSpace(fmt.Sprintf("%v", item))
if s != "" {
out = append(out, s)
}
}
return out, true
}
return nil, false
}
func mapSliceOption(options map[string]interface{}, key string) ([]map[string]interface{}, bool) {
v, ok := rawOption(options, key)
if !ok {
return nil, false
}
switch t := v.(type) {
case []map[string]interface{}:
return t, true
case []interface{}:
out := make([]map[string]interface{}, 0, len(t))
for _, item := range t {
m, ok := item.(map[string]interface{})
if ok {
out = append(out, m)
}
}
return out, true
}
return nil, false
}
func responsesMessageItem(role, text, contentType string) map[string]interface{} {
ct := contentType
if ct == "" {
@@ -385,79 +429,27 @@ func responsesMessageItem(role, text, contentType string) map[string]interface{}
}
}
func (p *HTTPProvider) callChatCompletionsStream(ctx context.Context, messages []Message, tools []ToolDefinition, model string, options map[string]interface{}, onDelta func(string)) ([]byte, int, string, error) {
requestBody := map[string]interface{}{
"model": model,
"messages": toChatCompletionsMessages(messages),
"stream": true,
}
if len(tools) > 0 {
requestBody["tools"] = tools
requestBody["tool_choice"] = "auto"
}
if maxTokens, ok := int64FromOption(options, "max_tokens"); ok {
requestBody["max_tokens"] = maxTokens
}
if temperature, ok := float64FromOption(options, "temperature"); ok {
requestBody["temperature"] = temperature
}
var fullText strings.Builder
rawBody, status, ctype, err := p.postJSONStream(ctx, endpointFor(p.apiBase, "/chat/completions"), requestBody, func(event string) {
var chunk struct {
Choices []struct {
Delta struct {
Content string `json:"content"`
} `json:"delta"`
} `json:"choices"`
}
if err := json.Unmarshal([]byte(event), &chunk); err != nil {
return
}
if len(chunk.Choices) > 0 {
d := chunk.Choices[0].Delta.Content
if d != "" {
fullText.WriteString(d)
onDelta(d)
}
}
})
if err != nil {
return nil, status, ctype, err
}
if status != http.StatusOK || !strings.Contains(strings.ToLower(ctype), "text/event-stream") {
return rawBody, status, ctype, nil
}
body, _ := json.Marshal(map[string]interface{}{
"choices": []map[string]interface{}{{
"message": map[string]interface{}{"content": fullText.String()},
"finish_reason": "stop",
}},
})
return body, status, "application/json", nil
}
func (p *HTTPProvider) callResponsesStream(ctx context.Context, messages []Message, tools []ToolDefinition, model string, options map[string]interface{}, onDelta func(string)) ([]byte, int, string, error) {
input := make([]map[string]interface{}, 0, len(messages))
pendingCalls := map[string]struct{}{}
for _, msg := range messages {
input = append(input, toResponsesInputItemsWithState(msg, pendingCalls, p.crossSessionCallID)...)
input = append(input, toResponsesInputItemsWithState(msg, pendingCalls)...)
}
requestBody := map[string]interface{}{
"model": model,
"input": input,
"stream": true,
}
if len(tools) > 0 {
responseTools := make([]map[string]interface{}, 0, len(tools))
for _, t := range tools {
entry := map[string]interface{}{"type": "function", "name": t.Function.Name, "parameters": t.Function.Parameters}
if strings.TrimSpace(t.Function.Description) != "" {
entry["description"] = t.Function.Description
}
responseTools = append(responseTools, entry)
}
responseTools := buildResponsesTools(tools, options)
if len(responseTools) > 0 {
requestBody["tools"] = responseTools
requestBody["tool_choice"] = "auto"
if tc, ok := rawOption(options, "tool_choice"); ok {
requestBody["tool_choice"] = tc
}
if tc, ok := rawOption(options, "responses_tool_choice"); ok {
requestBody["tool_choice"] = tc
}
}
if maxTokens, ok := int64FromOption(options, "max_tokens"); ok {
requestBody["max_output_tokens"] = maxTokens
@@ -465,6 +457,12 @@ func (p *HTTPProvider) callResponsesStream(ctx context.Context, messages []Messa
if temperature, ok := float64FromOption(options, "temperature"); ok {
requestBody["temperature"] = temperature
}
if include, ok := stringSliceOption(options, "responses_include"); ok && len(include) > 0 {
requestBody["include"] = include
}
if streamOpts, ok := mapOption(options, "responses_stream_options"); ok && len(streamOpts) > 0 {
requestBody["stream_options"] = streamOpts
}
return p.postJSONStream(ctx, endpointFor(p.apiBase, "/responses"), requestBody, func(event string) {
var obj map[string]interface{}
if err := json.Unmarshal([]byte(event), &obj); err != nil {
@@ -600,71 +598,6 @@ func (p *HTTPProvider) postJSON(ctx context.Context, endpoint string, payload in
return body, resp.StatusCode, strings.TrimSpace(resp.Header.Get("Content-Type")), nil
}
func parseChatCompletionsResponse(body []byte) (*LLMResponse, error) {
var apiResponse struct {
Choices []struct {
Message struct {
Content *string `json:"content"`
ToolCalls []struct {
ID string `json:"id"`
Type string `json:"type"`
Function *struct {
Name string `json:"name"`
Arguments string `json:"arguments"`
} `json:"function"`
} `json:"tool_calls"`
} `json:"message"`
FinishReason string `json:"finish_reason"`
} `json:"choices"`
Usage *UsageInfo `json:"usage"`
}
if err := json.Unmarshal(body, &apiResponse); err != nil {
return nil, fmt.Errorf("failed to unmarshal response: %w", err)
}
if len(apiResponse.Choices) == 0 {
return &LLMResponse{Content: "", FinishReason: "stop"}, nil
}
choice := apiResponse.Choices[0]
toolCalls := make([]ToolCall, 0, len(choice.Message.ToolCalls))
for i, tc := range choice.Message.ToolCalls {
if tc.Type != "" && tc.Type != "function" {
continue
}
if tc.Function == nil || strings.TrimSpace(tc.Function.Name) == "" {
continue
}
args := map[string]interface{}{}
if strings.TrimSpace(tc.Function.Arguments) != "" {
if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil {
args["raw"] = tc.Function.Arguments
}
}
id := strings.TrimSpace(tc.ID)
if id == "" {
id = fmt.Sprintf("call_%d", i+1)
}
toolCalls = append(toolCalls, ToolCall{ID: id, Name: tc.Function.Name, Arguments: args})
}
content := ""
if choice.Message.Content != nil {
content = *choice.Message.Content
}
if len(toolCalls) == 0 {
compatCalls, cleanedContent := parseCompatFunctionCalls(content)
if len(compatCalls) > 0 {
toolCalls = compatCalls
content = cleanedContent
}
}
finishReason := strings.TrimSpace(choice.FinishReason)
if finishReason == "" {
finishReason = "stop"
}
return &LLMResponse{Content: content, ToolCalls: toolCalls, FinishReason: finishReason, Usage: apiResponse.Usage}, nil
}
func parseResponsesAPIResponse(body []byte) (*LLMResponse, error) {
var resp struct {
Status string `json:"status"`
@@ -833,17 +766,6 @@ func endpointFor(base, relative string) string {
return b + relative
}
func normalizeProtocol(raw string) string {
switch strings.TrimSpace(raw) {
case "", ProtocolChatCompletions:
return ProtocolChatCompletions
case ProtocolResponses:
return ProtocolResponses
default:
return ProtocolChatCompletions
}
}
func parseCompatFunctionCalls(content string) ([]ToolCall, string) {
if strings.TrimSpace(content) == "" || !strings.Contains(content, "<function_call>") {
return nil, content
@@ -910,7 +832,7 @@ func (p *HTTPProvider) GetDefaultModel() string {
}
func (p *HTTPProvider) SupportsResponsesCompact() bool {
return p != nil && p.supportsResponsesCompact && p.protocol == ProtocolResponses
return p != nil && p.supportsResponsesCompact
}
func (p *HTTPProvider) BuildSummaryViaResponsesCompact(ctx context.Context, model string, existingSummary string, messages []Message, maxSummaryChars int) (string, error) {
@@ -923,7 +845,7 @@ func (p *HTTPProvider) BuildSummaryViaResponsesCompact(ctx context.Context, mode
}
pendingCalls := map[string]struct{}{}
for _, msg := range messages {
input = append(input, toResponsesInputItemsWithState(msg, pendingCalls, p.crossSessionCallID)...)
input = append(input, toResponsesInputItemsWithState(msg, pendingCalls)...)
}
if len(input) == 0 {
return strings.TrimSpace(existingSummary), nil
@@ -1030,7 +952,7 @@ func CreateProviderByName(cfg *config.Config, name string) (LLMProvider, error)
if len(pc.Models) > 0 {
defaultModel = pc.Models[0]
}
return NewHTTPProvider(pc.APIKey, pc.APIBase, pc.Protocol, defaultModel, pc.CrossSessionCallID, pc.SupportsResponsesCompact, pc.Auth, time.Duration(pc.TimeoutSec)*time.Second), nil
return NewHTTPProvider(pc.APIKey, pc.APIBase, defaultModel, pc.SupportsResponsesCompact, pc.Auth, time.Duration(pc.TimeoutSec)*time.Second), nil
}
func CreateProviders(cfg *config.Config) (map[string]LLMProvider, error) {
@@ -1072,7 +994,7 @@ func ProviderSupportsResponsesCompact(cfg *config.Config, name string) bool {
if err != nil {
return false
}
return pc.SupportsResponsesCompact && normalizeProtocol(pc.Protocol) == ProtocolResponses
return pc.SupportsResponsesCompact
}
func ListProviderNames(cfg *config.Config) []string {

View File

@@ -0,0 +1,62 @@
package providers
import "testing"
func TestBuildResponsesTools_IncludesFunctionAndBuiltinTools(t *testing.T) {
tools := []ToolDefinition{
{
Type: "function",
Function: ToolFunctionDefinition{
Name: "read_file",
Parameters: map[string]interface{}{"type": "object"},
},
},
{
Type: "web_search",
Parameters: map[string]interface{}{"search_context_size": "high"},
},
}
options := map[string]interface{}{
"responses_tools": []interface{}{
map[string]interface{}{
"type": "file_search",
"vector_store_ids": []string{"vs_123"},
},
},
}
got := buildResponsesTools(tools, options)
if len(got) != 3 {
t.Fatalf("expected 3 tools, got %#v", got)
}
if got[0]["type"] != "function" || got[0]["name"] != "read_file" {
t.Fatalf("expected function tool in first slot, got %#v", got[0])
}
if got[1]["type"] != "web_search" {
t.Fatalf("expected web_search tool in second slot, got %#v", got[1])
}
if got[2]["type"] != "file_search" {
t.Fatalf("expected file_search tool from options, got %#v", got[2])
}
}
func TestResponsesMessageContent_SupportsImageAndFileByID(t *testing.T) {
msg := Message{
Role: "user",
ContentParts: []MessageContentPart{
{Type: "input_image", FileID: "file_img_1", Detail: "high"},
{Type: "input_file", FileID: "file_doc_1", Filename: "doc.pdf"},
},
}
content := responsesMessageContent(msg)
if len(content) != 2 {
t.Fatalf("expected two content items, got %#v", content)
}
if content[0]["type"] != "input_image" || content[0]["file_id"] != "file_img_1" {
t.Fatalf("expected input_image by file_id, got %#v", content[0])
}
if content[1]["type"] != "input_file" || content[1]["file_id"] != "file_doc_1" {
t.Fatalf("expected input_file by file_id, got %#v", content[1])
}
}

View File

@@ -6,7 +6,7 @@ func TestToResponsesInputItemsWithState_DropsOrphanToolOutputs(t *testing.T) {
pending := map[string]struct{}{}
orphan := Message{Role: "tool", ToolCallID: "call-orphan", Content: "orphan output"}
if got := toResponsesInputItemsWithState(orphan, pending, true); len(got) != 0 {
if got := toResponsesInputItemsWithState(orphan, pending); len(got) != 0 {
t.Fatalf("expected orphan tool output to be dropped, got: %#v", got)
}
@@ -20,7 +20,7 @@ func TestToResponsesInputItemsWithState_DropsOrphanToolOutputs(t *testing.T) {
},
}},
}
items := toResponsesInputItemsWithState(assistant, pending, true)
items := toResponsesInputItemsWithState(assistant, pending)
if len(items) == 0 {
t.Fatalf("assistant tool call should produce responses items")
}
@@ -29,7 +29,7 @@ func TestToResponsesInputItemsWithState_DropsOrphanToolOutputs(t *testing.T) {
}
matched := Message{Role: "tool", ToolCallID: "call-1", Content: "file content"}
matchedItems := toResponsesInputItemsWithState(matched, pending, true)
matchedItems := toResponsesInputItemsWithState(matched, pending)
if len(matchedItems) != 1 {
t.Fatalf("expected matched tool output item, got %#v", matchedItems)
}
@@ -40,24 +40,3 @@ func TestToResponsesInputItemsWithState_DropsOrphanToolOutputs(t *testing.T) {
t.Fatalf("matched tool output should clear pending call id")
}
}
func TestToResponsesInputItemsWithState_ToolResultAsUserInputWhenCallIDDisabled(t *testing.T) {
pending := map[string]struct{}{
"call-1": {},
}
msg := Message{Role: "tool", ToolCallID: "call-1", Content: "file content"}
items := toResponsesInputItemsWithState(msg, pending, false)
if len(items) != 1 {
t.Fatalf("expected one fallback tool result item, got %#v", items)
}
if items[0]["type"] != "message" {
t.Fatalf("expected message item, got %#v", items[0])
}
if items[0]["role"] != "user" {
t.Fatalf("expected fallback role=user, got %#v", items[0])
}
if _, ok := pending["call-1"]; !ok {
t.Fatalf("pending call state should remain untouched when call_id mode is disabled")
}
}

View File

@@ -40,9 +40,12 @@ type MessageContentPart struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
ImageURL string `json:"image_url,omitempty"`
Detail string `json:"detail,omitempty"`
MIMEType string `json:"mime_type,omitempty"`
Filename string `json:"filename,omitempty"`
FileData string `json:"file_data,omitempty"`
FileID string `json:"file_id,omitempty"`
FileURL string `json:"file_url,omitempty"`
}
type LLMProvider interface {
@@ -63,12 +66,17 @@ type ResponsesCompactor interface {
}
type ToolDefinition struct {
Type string `json:"type"`
Function ToolFunctionDefinition `json:"function"`
Type string `json:"type"`
Name string `json:"name,omitempty"`
Description string `json:"description,omitempty"`
Parameters map[string]interface{} `json:"parameters,omitempty"`
Strict *bool `json:"strict,omitempty"`
Function ToolFunctionDefinition `json:"function"`
}
type ToolFunctionDefinition struct {
Name string `json:"name"`
Description string `json:"description"`
Parameters map[string]interface{} `json:"parameters"`
Strict *bool `json:"strict,omitempty"`
}