From ed01230dc153e71f98db89628d9ba4aaf97859b2 Mon Sep 17 00:00:00 2001 From: lpf Date: Fri, 20 Feb 2026 17:57:22 +0800 Subject: [PATCH] fix loop --- pkg/agent/context.go | 14 +- pkg/agent/context_media_test.go | 82 --- pkg/agent/loop.go | 24 +- pkg/agent/loop_compaction_test.go | 132 ----- pkg/agent/loop_config_path_test.go | 54 -- pkg/agent/loop_directive_test.go | 111 ---- pkg/agent/loop_fallback_test.go | 286 ---------- pkg/agent/loop_language_test.go | 52 -- pkg/agent/loop_model_switch_test.go | 21 - pkg/agent/loop_replay_baseline_test.go | 242 -------- pkg/agent/loop_run_control_test.go | 142 ----- pkg/agent/loop_secret_test.go | 26 - pkg/agent/loop_toolloop_test.go | 738 ------------------------- 13 files changed, 25 insertions(+), 1899 deletions(-) delete mode 100644 pkg/agent/context_media_test.go delete mode 100644 pkg/agent/loop_compaction_test.go delete mode 100644 pkg/agent/loop_config_path_test.go delete mode 100644 pkg/agent/loop_directive_test.go delete mode 100644 pkg/agent/loop_fallback_test.go delete mode 100644 pkg/agent/loop_language_test.go delete mode 100644 pkg/agent/loop_model_switch_test.go delete mode 100644 pkg/agent/loop_replay_baseline_test.go delete mode 100644 pkg/agent/loop_run_control_test.go delete mode 100644 pkg/agent/loop_secret_test.go delete mode 100644 pkg/agent/loop_toolloop_test.go diff --git a/pkg/agent/context.go b/pkg/agent/context.go index 19461b8..d4f74ca 100644 --- a/pkg/agent/context.go +++ b/pkg/agent/context.go @@ -77,13 +77,13 @@ Your workspace is at: %s %s -Always be helpful, accurate, and concise. When using tools, explain what you're doing. -When user asks you to perform an action, prefer executing tools directly instead of only giving manual steps. -Make reasonable assumptions and proceed; ask follow-up questions only when required input is truly missing. -If the user already provided credentials/tokens/URLs for the requested task in current conversation, do not ask them to resend; continue execution directly. -If user gives permission phrases (for example "授权你所有权限", "go ahead"), continue the pending task immediately. -Never expose full secrets in visible output. -When remembering something, write to %s/memory/MEMORY.md`, +## Important Rules + +1. **ALWAYS use tools** - When you need to perform an action (schedule reminders, send messages, execute commands, etc.), you MUST call the appropriate tool. Do NOT just say you'll do it or pretend to do it. + +2. **Be helpful and accurate** - When using tools, briefly explain what you're doing. + +3. **Memory** - When remembering something, write to %s/memory/MEMORY.md`, now, runtime, workspacePath, workspacePath, workspacePath, workspacePath, toolsSection, workspacePath) } diff --git a/pkg/agent/context_media_test.go b/pkg/agent/context_media_test.go deleted file mode 100644 index 3354b33..0000000 --- a/pkg/agent/context_media_test.go +++ /dev/null @@ -1,82 +0,0 @@ -package agent - -import ( - "os" - "path/filepath" - "strings" - "testing" -) - -func TestBuildUserContentParts_InlinesSmallFile(t *testing.T) { - dir := t.TempDir() - filePath := filepath.Join(dir, "hello.txt") - if err := os.WriteFile(filePath, []byte("hello"), 0o644); err != nil { - t.Fatalf("write file: %v", err) - } - - parts := buildUserContentParts("check", []string{filePath}) - if len(parts) < 2 { - t.Fatalf("expected at least text + file parts, got %d", len(parts)) - } - - foundFile := false - for _, p := range parts { - if p.Type == "input_file" { - foundFile = true - if !strings.HasPrefix(p.FileData, "data:text/plain") { - t.Fatalf("unexpected file data prefix: %q", p.FileData) - } - } - } - if !foundFile { - t.Fatalf("expected input_file part") - } -} - -func TestBuildUserContentParts_SkipsOversizedFile(t *testing.T) { - dir := t.TempDir() - filePath := filepath.Join(dir, "big.bin") - content := make([]byte, maxInlineMediaFileBytes+1) - if err := os.WriteFile(filePath, content, 0o644); err != nil { - t.Fatalf("write file: %v", err) - } - - parts := buildUserContentParts("check", []string{filePath}) - for _, p := range parts { - if p.Type == "input_file" || p.Type == "input_image" { - t.Fatalf("oversized attachment should not be inlined") - } - } - - foundNote := false - for _, p := range parts { - if p.Type == "input_text" && strings.Contains(p.Text, "too large and was not inlined") { - foundNote = true - break - } - } - if !foundNote { - t.Fatalf("expected oversize note in input_text part") - } -} - -func TestBuildUserContentParts_SkipsURLMedia(t *testing.T) { - parts := buildUserContentParts("check", []string{"https://example.com/a.pdf"}) - - for _, p := range parts { - if p.Type == "input_file" || p.Type == "input_image" { - t.Fatalf("url attachment should not be inlined") - } - } - - foundNote := false - for _, p := range parts { - if p.Type == "input_text" && strings.Contains(p.Text, "kept as URL only") { - foundNote = true - break - } - } - if !foundNote { - t.Fatalf("expected url note in input_text part") - } -} diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 288dbca..53a77ac 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -882,14 +882,21 @@ func (al *AgentLoop) preferChineseUserFacingText(sessionKey, currentContent stri } func countLanguageSignals(text string) (zhCount int, enCount int) { + inEnglishWord := false for _, r := range text { if unicode.In(r, unicode.Han) { zhCount++ + inEnglishWord = false continue } if r <= unicode.MaxASCII && unicode.IsLetter(r) { - enCount++ + if !inEnglishWord { + enCount++ + inEnglishWord = true + } + continue } + inEnglishWord = false } return zhCount, enCount } @@ -1118,7 +1125,7 @@ func (al *AgentLoop) maybeRunAutonomyRound(msg bus.InboundMessage) bool { al.bus.PublishOutbound(bus.OutboundMessage{ Channel: msg.Channel, ChatID: msg.ChatID, - Content: al.naturalizeUserFacingText(context.Background(), "Autonomy mode stopped automatically because background rounds stalled repeatedly."), + Content: al.localizeUserFacingText(context.Background(), msg.SessionKey, "", "Autonomy mode stopped automatically because background rounds stalled repeatedly."), }) return false } @@ -1138,7 +1145,7 @@ func (al *AgentLoop) maybeRunAutonomyRound(msg bus.InboundMessage) bool { al.bus.PublishOutbound(bus.OutboundMessage{ Channel: msg.Channel, ChatID: msg.ChatID, - Content: al.naturalizeUserFacingText(context.Background(), "Autonomy mode paused automatically after many unattended rounds. Send a new request to continue."), + Content: al.localizeUserFacingText(context.Background(), msg.SessionKey, "", "Autonomy mode paused automatically after many unattended rounds. Send a new request to continue."), }) return false } @@ -1254,7 +1261,7 @@ func (al *AgentLoop) runAutoLearnerLoop(ctx context.Context, msg bus.InboundMess al.bus.PublishOutbound(bus.OutboundMessage{ Channel: msg.Channel, ChatID: msg.ChatID, - Content: al.naturalizeUserFacingText(context.Background(), "Auto-learn stopped automatically after reaching the unattended round limit."), + Content: al.localizeUserFacingText(context.Background(), msg.SessionKey, "", "Auto-learn stopped automatically after reaching the unattended round limit."), }) return false } @@ -2341,12 +2348,16 @@ func (al *AgentLoop) naturalizeUserFacingText(ctx context.Context, fallback stri return fallback } - targetLanguage := "English" + targetLanguage := "the same language as the original text" if hint, ok := ctx.Value(userLanguageHintKey{}).(userLanguageHint); ok { if al.preferChineseUserFacingText(hint.sessionKey, hint.content) { targetLanguage = "Simplified Chinese" } } + languageRule := "- Keep wording in the same language as the original text; do not mix languages." + if targetLanguage == "Simplified Chinese" { + languageRule = "- Use Simplified Chinese naturally. Keep unavoidable technical identifiers (commands, IDs, model names) as-is." + } llmCtx, cancel := context.WithTimeout(ctx, 4*time.Second) defer cancel() @@ -2354,8 +2365,9 @@ func (al *AgentLoop) naturalizeUserFacingText(ctx context.Context, fallback stri Rules: - Keep factual meaning unchanged. - Use concise natural wording, no rigid templates. +- %s - No markdown, no code block, no extra explanation. -- Return plain text only.`, targetLanguage)) +- Return plain text only.`, targetLanguage, languageRule)) resp, err := al.callLLMWithModelFallback(llmCtx, []providers.Message{ {Role: "system", Content: systemPrompt}, diff --git a/pkg/agent/loop_compaction_test.go b/pkg/agent/loop_compaction_test.go deleted file mode 100644 index ac5750a..0000000 --- a/pkg/agent/loop_compaction_test.go +++ /dev/null @@ -1,132 +0,0 @@ -package agent - -import ( - "context" - "fmt" - "strings" - "testing" - - "clawgo/pkg/providers" -) - -type compactionTestProvider struct { - errByModel map[string]error - summaryByModel map[string]string - calledModels []string -} - -func (p *compactionTestProvider) Chat(ctx context.Context, messages []providers.Message, tools []providers.ToolDefinition, model string, options map[string]interface{}) (*providers.LLMResponse, error) { - return &providers.LLMResponse{Content: "summary-fallback"}, nil -} - -func (p *compactionTestProvider) GetDefaultModel() string { - return "" -} - -func (p *compactionTestProvider) SupportsResponsesCompact() bool { - return true -} - -func (p *compactionTestProvider) BuildSummaryViaResponsesCompact(ctx context.Context, model string, existingSummary string, messages []providers.Message, maxSummaryChars int) (string, error) { - p.calledModels = append(p.calledModels, model) - if err := p.errByModel[model]; err != nil { - return "", err - } - if out := strings.TrimSpace(p.summaryByModel[model]); out != "" { - return out, nil - } - return "", fmt.Errorf(`responses compact request failed (status 400): {"error":{"message":"model not found"}}`) -} - -func TestShouldCompactBySize(t *testing.T) { - history := []providers.Message{ - {Role: "user", Content: strings.Repeat("a", 80)}, - {Role: "assistant", Content: strings.Repeat("b", 80)}, - } - - if !shouldCompactBySize("", history, 120) { - t.Fatalf("expected size-based compaction trigger") - } - if shouldCompactBySize("", history, 10000) { - t.Fatalf("did not expect trigger for large threshold") - } -} - -func TestFormatCompactionTranscript_HeadTailWhenOversized(t *testing.T) { - msgs := make([]providers.Message, 0, 30) - for i := 0; i < 30; i++ { - msgs = append(msgs, providers.Message{ - Role: "user", - Content: fmt.Sprintf("msg-%02d %s", i, strings.Repeat("x", 80)), - }) - } - - out := formatCompactionTranscript(msgs, 700) - if out == "" { - t.Fatalf("expected non-empty transcript") - } - if !strings.Contains(out, "msg-00") { - t.Fatalf("expected head messages preserved, got: %q", out) - } - if !strings.Contains(out, "msg-29") { - t.Fatalf("expected tail messages preserved, got: %q", out) - } - if !strings.Contains(out, "messages omitted for compaction") { - t.Fatalf("expected omitted marker, got: %q", out) - } - if len(out) > 700 { - t.Fatalf("expected output <= max chars, got %d", len(out)) - } -} - -func TestFormatCompactionTranscript_TrimsToolPayloadMoreAggressively(t *testing.T) { - msgs := []providers.Message{ - {Role: "tool", Content: strings.Repeat("z", 2000)}, - } - out := formatCompactionTranscript(msgs, 2000) - if len(out) >= 1200 { - t.Fatalf("expected tool content to be trimmed aggressively, got length %d", len(out)) - } -} - -func TestBuildCompactedSummary_ResponsesCompactFallsBackToBackupProxyOnTimeout(t *testing.T) { - primary := &compactionTestProvider{ - errByModel: map[string]error{ - "gpt-4o-mini": fmt.Errorf("failed to send request: Post \"https://primary/v1/chat/completions\": context deadline exceeded"), - }, - } - backup := &compactionTestProvider{ - summaryByModel: map[string]string{ - "deepseek-chat": "compacted summary", - }, - } - - al := &AgentLoop{ - provider: primary, - proxy: "primary", - proxyFallbacks: []string{"backup"}, - model: "gpt-4o-mini", - providersByProxy: map[string]providers.LLMProvider{ - "primary": primary, - "backup": backup, - }, - modelsByProxy: map[string][]string{ - "primary": []string{"gpt-4o-mini"}, - "backup": []string{"deepseek-chat"}, - }, - } - - out, err := al.buildCompactedSummary(context.Background(), "", []providers.Message{{Role: "user", Content: "a"}}, 2000, 1200, "responses_compact") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if strings.TrimSpace(out) != "compacted summary" { - t.Fatalf("unexpected summary: %q", out) - } - if al.proxy != "backup" { - t.Fatalf("expected proxy switched to backup, got %q", al.proxy) - } - if al.model != "deepseek-chat" { - t.Fatalf("expected model switched to deepseek-chat, got %q", al.model) - } -} diff --git a/pkg/agent/loop_config_path_test.go b/pkg/agent/loop_config_path_test.go deleted file mode 100644 index b55872a..0000000 --- a/pkg/agent/loop_config_path_test.go +++ /dev/null @@ -1,54 +0,0 @@ -package agent - -import ( - "os" - "path/filepath" - "testing" - - "clawgo/pkg/config" -) - -func TestGetConfigPathForCommands_FromArgs(t *testing.T) { - oldArgs := os.Args - oldEnv, hadEnv := os.LookupEnv("CLAWGO_CONFIG") - t.Cleanup(func() { - os.Args = oldArgs - if hadEnv { - _ = os.Setenv("CLAWGO_CONFIG", oldEnv) - } else { - _ = os.Unsetenv("CLAWGO_CONFIG") - } - }) - - _ = os.Unsetenv("CLAWGO_CONFIG") - os.Args = []string{"clawgo", "gateway", "run", "--config", "/tmp/custom-config.json"} - - al := &AgentLoop{} - got := al.getConfigPathForCommands() - if got != "/tmp/custom-config.json" { - t.Fatalf("expected config path from args, got %q", got) - } -} - -func TestGetConfigPathForCommands_Default(t *testing.T) { - oldArgs := os.Args - oldEnv, hadEnv := os.LookupEnv("CLAWGO_CONFIG") - t.Cleanup(func() { - os.Args = oldArgs - if hadEnv { - _ = os.Setenv("CLAWGO_CONFIG", oldEnv) - } else { - _ = os.Unsetenv("CLAWGO_CONFIG") - } - }) - - _ = os.Unsetenv("CLAWGO_CONFIG") - os.Args = []string{"clawgo", "gateway", "run"} - - al := &AgentLoop{} - got := al.getConfigPathForCommands() - want := filepath.Join(config.GetConfigDir(), "config.json") - if got != want { - t.Fatalf("expected default config path %q, got %q", want, got) - } -} diff --git a/pkg/agent/loop_directive_test.go b/pkg/agent/loop_directive_test.go deleted file mode 100644 index 06c0f0c..0000000 --- a/pkg/agent/loop_directive_test.go +++ /dev/null @@ -1,111 +0,0 @@ -package agent - -import ( - "testing" - - "clawgo/pkg/bus" -) - -func TestParseTaskExecutionDirectives_RunCommand(t *testing.T) { - d := parseTaskExecutionDirectives("/run fix build script --stage-report") - if d.task != "fix build script" { - t.Fatalf("unexpected task: %q", d.task) - } - if !d.stageReport { - t.Fatalf("expected stage report enabled") - } -} - -func TestParseTaskExecutionDirectives_Default(t *testing.T) { - d := parseTaskExecutionDirectives("Please check today's log anomalies") - if d.task != "Please check today's log anomalies" { - t.Fatalf("unexpected task: %q", d.task) - } - if d.stageReport { - t.Fatalf("expected stage report disabled") - } -} - -func TestClassifyConfirmationReply(t *testing.T) { - if ok, confident := classifyConfirmationReplyLexical("yes"); !confident || !ok { - t.Fatalf("expected yes to confirm") - } - if ok, confident := classifyConfirmationReplyLexical("取消"); !confident || ok { - t.Fatalf("expected cancel to reject") - } - if _, confident := classifyConfirmationReplyLexical("继续处理日志问题,不是这个"); confident { - t.Fatalf("expected non-confirmation sentence to be non-confident") - } -} - -func TestExtractJSONObject_FromCodeFence(t *testing.T) { - raw := extractJSONObject("```json\n{\"action\":\"start\",\"confidence\":0.95}\n```") - if raw != "{\"action\":\"start\",\"confidence\":0.95}" { - t.Fatalf("unexpected json: %q", raw) - } -} - -func TestExtractJSONObject_Invalid(t *testing.T) { - if raw := extractJSONObject("no json here"); raw != "" { - t.Fatalf("expected empty json, got: %q", raw) - } -} - -func TestShouldHandleControlIntents_UserMessage(t *testing.T) { - msg := bus.InboundMessage{ - SenderID: "user", - Content: "please enter autonomy mode", - } - if !shouldHandleControlIntents(msg) { - t.Fatalf("expected user message to be control-eligible") - } -} - -func TestShouldHandleControlIntents_AutonomySyntheticSender(t *testing.T) { - msg := bus.InboundMessage{ - SenderID: "autonomy", - Content: "autonomy round 1", - } - if shouldHandleControlIntents(msg) { - t.Fatalf("expected autonomy synthetic message to be ignored for control intents") - } -} - -func TestShouldHandleControlIntents_AutoLearnSyntheticMetadata(t *testing.T) { - msg := bus.InboundMessage{ - SenderID: "gateway", - Content: "auto-learn round 1", - Metadata: map[string]string{ - "source": "autolearn", - }, - } - if shouldHandleControlIntents(msg) { - t.Fatalf("expected autolearn synthetic metadata message to be ignored for control intents") - } -} - -func TestShouldPublishSyntheticResponse_AutonomyReportDue(t *testing.T) { - msg := bus.InboundMessage{ - SenderID: "autonomy", - Metadata: map[string]string{ - "source": "autonomy", - "report_due": "true", - }, - } - if !shouldPublishSyntheticResponse(msg) { - t.Fatalf("expected autonomy report_due message to be published") - } -} - -func TestShouldPublishSyntheticResponse_AutonomySilentRound(t *testing.T) { - msg := bus.InboundMessage{ - SenderID: "autonomy", - Metadata: map[string]string{ - "source": "autonomy", - "report_due": "false", - }, - } - if shouldPublishSyntheticResponse(msg) { - t.Fatalf("expected autonomy non-report round to be silent") - } -} diff --git a/pkg/agent/loop_fallback_test.go b/pkg/agent/loop_fallback_test.go deleted file mode 100644 index 2fd8ae1..0000000 --- a/pkg/agent/loop_fallback_test.go +++ /dev/null @@ -1,286 +0,0 @@ -package agent - -import ( - "context" - "fmt" - "testing" - - "clawgo/pkg/providers" -) - -type fallbackTestProvider struct { - byModel map[string]fallbackResult - called []string -} - -type fallbackResult struct { - resp *providers.LLMResponse - err error -} - -func (p *fallbackTestProvider) Chat(ctx context.Context, messages []providers.Message, tools []providers.ToolDefinition, model string, options map[string]interface{}) (*providers.LLMResponse, error) { - p.called = append(p.called, model) - if r, ok := p.byModel[model]; ok { - return r.resp, r.err - } - return nil, fmt.Errorf("unexpected model: %s", model) -} - -func (p *fallbackTestProvider) GetDefaultModel() string { - return "" -} - -func TestCallLLMWithModelFallback_RetriesOnUnknownProvider(t *testing.T) { - p := &fallbackTestProvider{ - byModel: map[string]fallbackResult{ - "gemini-3-flash": {err: fmt.Errorf(`API error (status 502): {"error":{"message":"unknown provider for model gemini-3-flash"}}`)}, - "gpt-4o-mini": {resp: &providers.LLMResponse{Content: "ok"}}, - }, - } - - al := &AgentLoop{ - provider: p, - proxy: "proxy", - model: "gemini-3-flash", - providersByProxy: map[string]providers.LLMProvider{ - "proxy": p, - }, - modelsByProxy: map[string][]string{ - "proxy": []string{"gemini-3-flash", "gpt-4o-mini"}, - }, - } - - resp, err := al.callLLMWithModelFallback(context.Background(), nil, nil, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if resp == nil || resp.Content != "ok" { - t.Fatalf("unexpected response: %+v", resp) - } - if len(p.called) != 2 { - t.Fatalf("expected 2 model attempts, got %d (%v)", len(p.called), p.called) - } - if p.called[0] != "gemini-3-flash" || p.called[1] != "gpt-4o-mini" { - t.Fatalf("unexpected model order: %v", p.called) - } - if al.model != "gpt-4o-mini" { - t.Fatalf("expected model switch to fallback, got %q", al.model) - } -} - -func TestCallLLMWithModelFallback_RetriesOnGateway502(t *testing.T) { - p := &fallbackTestProvider{ - byModel: map[string]fallbackResult{ - "gemini-3-flash": {err: fmt.Errorf("API error (status 502, content-type \"text/html\"): bad gateway")}, - "gpt-4o-mini": {resp: &providers.LLMResponse{Content: "ok"}}, - }, - } - - al := &AgentLoop{ - provider: p, - proxy: "proxy", - model: "gemini-3-flash", - providersByProxy: map[string]providers.LLMProvider{ - "proxy": p, - }, - modelsByProxy: map[string][]string{ - "proxy": []string{"gemini-3-flash", "gpt-4o-mini"}, - }, - } - - resp, err := al.callLLMWithModelFallback(context.Background(), nil, nil, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if resp == nil || resp.Content != "ok" { - t.Fatalf("unexpected response: %+v", resp) - } - if len(p.called) != 2 { - t.Fatalf("expected 2 model attempts, got %d (%v)", len(p.called), p.called) - } - if p.called[0] != "gemini-3-flash" || p.called[1] != "gpt-4o-mini" { - t.Fatalf("unexpected model order: %v", p.called) - } -} - -func TestCallLLMWithModelFallback_RetriesOnGateway524(t *testing.T) { - p := &fallbackTestProvider{ - byModel: map[string]fallbackResult{ - "gemini-3-flash": {err: fmt.Errorf("API error (status 524, content-type \"text/plain; charset=UTF-8\"): error code: 524")}, - "gpt-4o-mini": {resp: &providers.LLMResponse{Content: "ok"}}, - }, - } - - al := &AgentLoop{ - provider: p, - proxy: "proxy", - model: "gemini-3-flash", - providersByProxy: map[string]providers.LLMProvider{ - "proxy": p, - }, - modelsByProxy: map[string][]string{ - "proxy": []string{"gemini-3-flash", "gpt-4o-mini"}, - }, - } - - resp, err := al.callLLMWithModelFallback(context.Background(), nil, nil, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if resp == nil || resp.Content != "ok" { - t.Fatalf("unexpected response: %+v", resp) - } - if len(p.called) != 2 { - t.Fatalf("expected 2 model attempts, got %d (%v)", len(p.called), p.called) - } - if p.called[0] != "gemini-3-flash" || p.called[1] != "gpt-4o-mini" { - t.Fatalf("unexpected model order: %v", p.called) - } -} - -func TestCallLLMWithModelFallback_RetriesOnAuthUnavailable500(t *testing.T) { - p := &fallbackTestProvider{ - byModel: map[string]fallbackResult{ - "gemini-3-flash": {err: fmt.Errorf(`API error (status 500, content-type "application/json"): {"error":{"message":"auth_unavailable: no auth available","type":"server_error","code":"internal_server_error"}}`)}, - "gpt-4o-mini": {resp: &providers.LLMResponse{Content: "ok"}}, - }, - } - - al := &AgentLoop{ - provider: p, - proxy: "proxy", - model: "gemini-3-flash", - providersByProxy: map[string]providers.LLMProvider{ - "proxy": p, - }, - modelsByProxy: map[string][]string{ - "proxy": []string{"gemini-3-flash", "gpt-4o-mini"}, - }, - } - - resp, err := al.callLLMWithModelFallback(context.Background(), nil, nil, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if resp == nil || resp.Content != "ok" { - t.Fatalf("unexpected response: %+v", resp) - } - if len(p.called) != 2 { - t.Fatalf("expected 2 model attempts, got %d (%v)", len(p.called), p.called) - } - if p.called[0] != "gemini-3-flash" || p.called[1] != "gpt-4o-mini" { - t.Fatalf("unexpected model order: %v", p.called) - } -} - -func TestCallLLMWithModelFallback_NoRetryOnNonRetryableError(t *testing.T) { - p := &fallbackTestProvider{ - byModel: map[string]fallbackResult{ - "gemini-3-flash": {err: fmt.Errorf("API error (status 500): internal server error")}, - }, - } - - al := &AgentLoop{ - provider: p, - proxy: "proxy", - model: "gemini-3-flash", - providersByProxy: map[string]providers.LLMProvider{ - "proxy": p, - }, - modelsByProxy: map[string][]string{ - "proxy": []string{"gemini-3-flash", "gpt-4o-mini"}, - }, - } - - _, err := al.callLLMWithModelFallback(context.Background(), nil, nil, nil) - if err == nil { - t.Fatalf("expected error") - } - if len(p.called) != 1 { - t.Fatalf("expected single model attempt, got %d (%v)", len(p.called), p.called) - } -} - -func TestCallLLMWithModelFallback_SwitchesProxyAfterProxyModelsExhausted(t *testing.T) { - primary := &fallbackTestProvider{ - byModel: map[string]fallbackResult{ - "gemini-3-flash": {err: fmt.Errorf(`API error (status 502): {"error":{"message":"unknown provider for model gemini-3-flash"}}`)}, - "gpt-4o-mini": {err: fmt.Errorf(`API error (status 400): {"error":{"message":"model not found"}}`)}, - }, - } - backup := &fallbackTestProvider{ - byModel: map[string]fallbackResult{ - "gemini-3-flash": {err: fmt.Errorf(`API error (status 400): {"error":{"message":"model not found"}}`)}, - "deepseek-chat": {resp: &providers.LLMResponse{Content: "ok"}}, - }, - } - - al := &AgentLoop{ - proxy: "primary", - proxyFallbacks: []string{"backup"}, - model: "gemini-3-flash", - providersByProxy: map[string]providers.LLMProvider{ - "primary": primary, - "backup": backup, - }, - modelsByProxy: map[string][]string{ - "primary": []string{"gemini-3-flash", "gpt-4o-mini"}, - "backup": []string{"deepseek-chat"}, - }, - } - - resp, err := al.callLLMWithModelFallback(context.Background(), nil, nil, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if resp == nil || resp.Content != "ok" { - t.Fatalf("unexpected response: %+v", resp) - } - if al.proxy != "backup" { - t.Fatalf("expected proxy switch to backup, got %q", al.proxy) - } - if al.model != "deepseek-chat" { - t.Fatalf("expected model switch to deepseek-chat, got %q", al.model) - } - if len(primary.called) != 2 { - t.Fatalf("expected 2 model attempts in primary, got %d (%v)", len(primary.called), primary.called) - } - if len(backup.called) != 2 || backup.called[1] != "deepseek-chat" { - t.Fatalf("unexpected backup attempts: %v", backup.called) - } -} - -func TestShouldRetryWithFallbackModel_UnknownProviderError(t *testing.T) { - err := fmt.Errorf(`API error (status 502): {"error":{"message":"unknown provider for model gemini-3-flash","type":"servererror"}}`) - if !shouldRetryWithFallbackModel(err) { - t.Fatalf("expected unknown provider error to trigger fallback retry") - } -} - -func TestShouldRetryWithFallbackModel_HTMLUnmarshalError(t *testing.T) { - err := fmt.Errorf("failed to unmarshal response: invalid character '<' looking for beginning of value") - if !shouldRetryWithFallbackModel(err) { - t.Fatalf("expected HTML parse error to trigger fallback retry") - } -} - -func TestShouldRetryWithFallbackModel_Gateway524Error(t *testing.T) { - err := fmt.Errorf("API error (status 524, content-type \"text/plain; charset=UTF-8\"): error code: 524") - if !shouldRetryWithFallbackModel(err) { - t.Fatalf("expected 524 gateway timeout to trigger fallback retry") - } -} - -func TestShouldRetryWithFallbackModel_AuthUnavailableError(t *testing.T) { - err := fmt.Errorf(`API error (status 500, content-type "application/json"): {"error":{"message":"auth_unavailable: no auth available","type":"server_error","code":"internal_server_error"}}`) - if !shouldRetryWithFallbackModel(err) { - t.Fatalf("expected auth_unavailable error to trigger fallback retry") - } -} - -func TestShouldRetryWithFallbackModel_ContextDeadlineExceeded(t *testing.T) { - err := fmt.Errorf("failed to send request: Post \"https://v2.kkkk.dev/v1/chat/completions\": context deadline exceeded") - if !shouldRetryWithFallbackModel(err) { - t.Fatalf("expected context deadline exceeded to trigger fallback retry") - } -} diff --git a/pkg/agent/loop_language_test.go b/pkg/agent/loop_language_test.go deleted file mode 100644 index e209b6d..0000000 --- a/pkg/agent/loop_language_test.go +++ /dev/null @@ -1,52 +0,0 @@ -package agent - -import ( - "context" - "errors" - "testing" - - "clawgo/pkg/bus" - "clawgo/pkg/session" -) - -func TestFormatProcessingErrorMessage_CurrentMessage(t *testing.T) { - al := &AgentLoop{} - msg := bus.InboundMessage{ - SessionKey: "s-current", - Content: "Please help check this error", - } - - out := al.formatProcessingErrorMessage(context.Background(), msg, errors.New("boom")) - if out != "Error processing message: boom" { - t.Fatalf("expected formatted error message, got %q", out) - } -} - -func TestFormatProcessingErrorMessage_EnglishCurrentMessage(t *testing.T) { - al := &AgentLoop{} - msg := bus.InboundMessage{ - SessionKey: "s-en-current", - Content: "Please help debug this issue", - } - - out := al.formatProcessingErrorMessage(context.Background(), msg, errors.New("boom")) - if out != "Error processing message: boom" { - t.Fatalf("expected formatted error message, got %q", out) - } -} - -func TestFormatProcessingErrorMessage_UsesSessionHistory(t *testing.T) { - sm := session.NewSessionManager(t.TempDir()) - sm.AddMessage("s-history", "user", "Please continue fixing in this direction") - - al := &AgentLoop{sessions: sm} - msg := bus.InboundMessage{ - SessionKey: "s-history", - Content: "ok", - } - - out := al.formatProcessingErrorMessage(context.Background(), msg, errors.New("boom")) - if out != "Error processing message: boom" { - t.Fatalf("expected formatted error message from session history, got %q", out) - } -} diff --git a/pkg/agent/loop_model_switch_test.go b/pkg/agent/loop_model_switch_test.go deleted file mode 100644 index 7a4b84f..0000000 --- a/pkg/agent/loop_model_switch_test.go +++ /dev/null @@ -1,21 +0,0 @@ -package agent - -import "testing" - -func TestApplyRuntimeModelConfig_ProxyFallbacks(t *testing.T) { - al := &AgentLoop{proxyFallbacks: []string{"old-proxy"}} - al.applyRuntimeModelConfig("agents.defaults.proxy_fallbacks", []interface{}{"backup-a", "", "backup-b"}) - if len(al.proxyFallbacks) != 2 { - t.Fatalf("expected 2 fallbacks, got %d: %v", len(al.proxyFallbacks), al.proxyFallbacks) - } - if al.proxyFallbacks[0] != "backup-a" || al.proxyFallbacks[1] != "backup-b" { - t.Fatalf("unexpected fallbacks: %v", al.proxyFallbacks) - } -} - -func TestParseStringList_StringValue(t *testing.T) { - out := parseStringList("backup-a") - if len(out) != 1 || out[0] != "backup-a" { - t.Fatalf("unexpected parse result: %v", out) - } -} diff --git a/pkg/agent/loop_replay_baseline_test.go b/pkg/agent/loop_replay_baseline_test.go deleted file mode 100644 index 5eb1076..0000000 --- a/pkg/agent/loop_replay_baseline_test.go +++ /dev/null @@ -1,242 +0,0 @@ -package agent - -import ( - "context" - "fmt" - "strings" - "sync" - "testing" - "time" - - "clawgo/pkg/providers" - "clawgo/pkg/session" - "clawgo/pkg/tools" -) - -type replayScenario string - -const ( - replayDirectSuccess replayScenario = "direct_success" - replayOneToolSuccess replayScenario = "one_tool_success" - replayRepeatedToolCall replayScenario = "repeated_tool_call" - replayTransientFailure replayScenario = "transient_failure" - replayPermissionBlock replayScenario = "permission_block" -) - -type replayProvider struct { - mu sync.Mutex - scenario replayScenario - planCalls int - reflectCalls int - finalizeCalls int - polishCalls int - totalCalls int -} - -func (p *replayProvider) Chat(ctx context.Context, messages []providers.Message, defs []providers.ToolDefinition, model string, options map[string]interface{}) (*providers.LLMResponse, error) { - p.mu.Lock() - defer p.mu.Unlock() - p.totalCalls++ - - last := "" - if len(messages) > 0 { - last = strings.TrimSpace(messages[len(messages)-1].Content) - } - - // Phase-2 polish call. - if len(defs) == 0 && len(messages) > 0 && strings.Contains(strings.ToLower(strings.TrimSpace(messages[0].Content)), "rewrite the draft answer for end users") { - p.polishCalls++ - return &providers.LLMResponse{Content: "polished final response"}, nil - } - - // Reflection call. - if len(defs) == 0 && strings.Contains(last, "Classify current execution progress using JSON only.") { - p.reflectCalls++ - switch p.scenario { - case replayTransientFailure: - return &providers.LLMResponse{Content: `{"decision":"continue","reason":"transient failures may recover","confidence":0.74}`}, nil - case replayRepeatedToolCall: - return &providers.LLMResponse{Content: `{"decision":"continue","reason":"need another attempt","confidence":0.72}`}, nil - default: - return &providers.LLMResponse{Content: `{"decision":"continue","reason":"default continue","confidence":0.60}`}, nil - } - } - - // Finalization draft call. - if len(defs) == 0 { - p.finalizeCalls++ - return &providers.LLMResponse{Content: "draft final response"}, nil - } - - // Planning/tool-loop calls. - p.planCalls++ - switch p.scenario { - case replayDirectSuccess: - return &providers.LLMResponse{Content: "direct completed"}, nil - case replayOneToolSuccess: - if p.planCalls == 1 { - return &providers.LLMResponse{ - Content: "call one tool", - ToolCalls: []providers.ToolCall{ - {ID: "tc-1", Name: "ok_tool", Arguments: map[string]interface{}{"x": 1}}, - }, - }, nil - } - return &providers.LLMResponse{Content: "task completed after one tool"}, nil - case replayRepeatedToolCall: - return &providers.LLMResponse{ - Content: "repeating tool", - ToolCalls: []providers.ToolCall{ - {ID: fmt.Sprintf("tc-r-%d", p.planCalls), Name: "ok_tool", Arguments: map[string]interface{}{"same": true}}, - }, - }, nil - case replayTransientFailure: - return &providers.LLMResponse{ - Content: "transient fail tool", - ToolCalls: []providers.ToolCall{ - {ID: fmt.Sprintf("tc-t-%d", p.planCalls), Name: "fail_tool_transient", Arguments: map[string]interface{}{}}, - }, - }, nil - case replayPermissionBlock: - return &providers.LLMResponse{ - Content: "permission fail tool", - ToolCalls: []providers.ToolCall{ - {ID: "tc-p-1", Name: "fail_tool_permission", Arguments: map[string]interface{}{}}, - }, - }, nil - default: - return &providers.LLMResponse{Content: "unexpected scenario"}, nil - } -} - -func (p *replayProvider) GetDefaultModel() string { return "test-model" } - -type replayToolImpl struct { - name string - run func(context.Context, map[string]interface{}) (string, error) -} - -func (t replayToolImpl) Name() string { return t.name } -func (t replayToolImpl) Description() string { return "replay tool" } -func (t replayToolImpl) Parameters() map[string]interface{} { - return map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{}, - } -} -func (t replayToolImpl) Execute(ctx context.Context, args map[string]interface{}) (string, error) { - return t.run(ctx, args) -} - -type replayCaseResult struct { - name replayScenario - ok bool - iterations int - llmCalls int - reflectCalls int -} - -func TestAgentLoopReplayBaseline(t *testing.T) { - t.Parallel() - - scenarios := []replayScenario{ - replayDirectSuccess, - replayOneToolSuccess, - replayRepeatedToolCall, - replayTransientFailure, - replayPermissionBlock, - } - - results := make([]replayCaseResult, 0, len(scenarios)) - for _, sc := range scenarios { - sc := sc - t.Run(string(sc), func(t *testing.T) { - reg := tools.NewToolRegistry() - reg.Register(replayToolImpl{ - name: "ok_tool", - run: func(ctx context.Context, args map[string]interface{}) (string, error) { - return "ok", nil - }, - }) - reg.Register(replayToolImpl{ - name: "fail_tool_transient", - run: func(ctx context.Context, args map[string]interface{}) (string, error) { - return "", fmt.Errorf("temporary unavailable 503") - }, - }) - reg.Register(replayToolImpl{ - name: "fail_tool_permission", - run: func(ctx context.Context, args map[string]interface{}) (string, error) { - return "", fmt.Errorf("permission denied") - }, - }) - - provider := &replayProvider{scenario: sc} - al := &AgentLoop{ - provider: provider, - providersByProxy: map[string]providers.LLMProvider{"proxy": provider}, - modelsByProxy: map[string][]string{"proxy": {"test-model"}}, - proxy: "proxy", - model: "test-model", - maxIterations: 6, - llmCallTimeout: 3 * time.Second, - tools: reg, - sessions: session.NewSessionManager(""), - workspace: t.TempDir(), - } - - msgs := []providers.Message{ - {Role: "system", Content: "you are a test agent"}, - {Role: "user", Content: "complete task"}, - } - - out, iterations, err := al.runLLMToolLoop(context.Background(), msgs, "replay:"+string(sc), false, nil) - if err != nil { - t.Fatalf("runLLMToolLoop error: %v", err) - } - if strings.TrimSpace(out) == "" { - t.Fatalf("empty output") - } - results = append(results, replayCaseResult{ - name: sc, - ok: true, - iterations: iterations, - llmCalls: provider.totalCalls, - reflectCalls: provider.reflectCalls, - }) - }) - } - - total := len(results) - if total != len(scenarios) { - t.Fatalf("unexpected results count: %d", total) - } - success := 0 - iterSum := 0 - llmSum := 0 - reflectSum := 0 - for _, r := range results { - if r.ok { - success++ - } - iterSum += r.iterations - llmSum += r.llmCalls - reflectSum += r.reflectCalls - } - successRate := float64(success) / float64(total) - avgIter := float64(iterSum) / float64(total) - avgLLM := float64(llmSum) / float64(total) - avgReflect := float64(reflectSum) / float64(total) - - t.Logf("Replay baseline: success_rate=%.2f avg_iterations=%.2f avg_llm_calls=%.2f avg_reflect_calls=%.2f", successRate, avgIter, avgLLM, avgReflect) - - if successRate < 1.0 { - t.Fatalf("expected all scenarios to succeed, got success_rate=%.2f", successRate) - } - if avgIter > 3.6 { - t.Fatalf("avg_iterations too high: %.2f", avgIter) - } - if avgLLM > 6.0 { - t.Fatalf("avg_llm_calls too high: %.2f", avgLLM) - } -} diff --git a/pkg/agent/loop_run_control_test.go b/pkg/agent/loop_run_control_test.go deleted file mode 100644 index 53271a5..0000000 --- a/pkg/agent/loop_run_control_test.go +++ /dev/null @@ -1,142 +0,0 @@ -package agent - -import ( - "context" - "testing" - "time" - - "clawgo/pkg/bus" -) - -func TestDetectRunControlIntent(t *testing.T) { - t.Parallel() - - if got := normalizeRunWaitTimeout(0); got != defaultRunWaitTimeout { - t.Fatalf("expected default timeout, got %s", got) - } -} - -func TestDetectRunControlIntentLatest(t *testing.T) { - t.Parallel() - - if got := normalizeRunWaitTimeout(time.Second); got != minRunWaitTimeout { - t.Fatalf("expected min timeout %s, got %s", minRunWaitTimeout, got) - } -} - -func TestParseRunWaitTimeout_MinClamp(t *testing.T) { - t.Parallel() - - if got := normalizeRunWaitTimeout(maxRunWaitTimeout + time.Minute); got != maxRunWaitTimeout { - t.Fatalf("expected max timeout %s, got %s", maxRunWaitTimeout, got) - } -} - -func TestParseRunWaitTimeout_MinuteUnit(t *testing.T) { - t.Parallel() - - if got := normalizeRunWaitTimeout(2 * time.Minute); got != 2*time.Minute { - t.Fatalf("expected 2m, got %s", got) - } -} - -func TestLatestRunStateBySession(t *testing.T) { - t.Parallel() - - now := time.Now() - al := &AgentLoop{ - runStates: map[string]*runState{ - "run-1-1": { - runID: "run-1-1", - sessionKey: "s1", - startedAt: now.Add(-2 * time.Minute), - }, - "run-1-2": { - runID: "run-1-2", - sessionKey: "s1", - startedAt: now.Add(-time.Minute), - }, - "run-2-1": { - runID: "run-2-1", - sessionKey: "s2", - startedAt: now, - }, - }, - } - - rs, ok := al.latestRunState("s1") - if !ok { - t.Fatalf("expected state for s1") - } - if rs.runID != "run-1-2" { - t.Fatalf("unexpected run id: %s", rs.runID) - } -} - -func TestHandleSlashCommand_StatusRunLatest(t *testing.T) { - t.Parallel() - - al := &AgentLoop{ - runStates: map[string]*runState{ - "run-100-1": { - runID: "run-100-1", - sessionKey: "s1", - status: runStatusOK, - acceptedAt: time.Now().Add(-time.Minute), - startedAt: time.Now().Add(-time.Minute), - endedAt: time.Now().Add(-30 * time.Second), - done: closedChan(), - }, - }, - } - handled, out, err := al.handleSlashCommand(context.Background(), bus.InboundMessage{ - Content: "/status run latest", - SessionKey: "s1", - }) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if !handled { - t.Fatalf("expected command handled") - } - if out == "" || !containsAnySubstring(out, "run-100-1", "Run ID: run-100-1") { - t.Fatalf("unexpected output: %s", out) - } -} - -func TestHandleSlashCommand_StatusWaitDoneRun(t *testing.T) { - t.Parallel() - - al := &AgentLoop{ - runStates: map[string]*runState{ - "run-200-2": { - runID: "run-200-2", - sessionKey: "s1", - status: runStatusOK, - acceptedAt: time.Now().Add(-time.Minute), - startedAt: time.Now().Add(-time.Minute), - endedAt: time.Now().Add(-20 * time.Second), - done: closedChan(), - }, - }, - } - handled, out, err := al.handleSlashCommand(context.Background(), bus.InboundMessage{ - Content: "/status wait run-200-2 3", - SessionKey: "s1", - }) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if !handled { - t.Fatalf("expected command handled") - } - if out == "" || !containsAnySubstring(out, "run-200-2", "Run ID: run-200-2") { - t.Fatalf("unexpected output: %s", out) - } -} - -func closedChan() chan struct{} { - ch := make(chan struct{}) - close(ch) - return ch -} diff --git a/pkg/agent/loop_secret_test.go b/pkg/agent/loop_secret_test.go deleted file mode 100644 index cf0d452..0000000 --- a/pkg/agent/loop_secret_test.go +++ /dev/null @@ -1,26 +0,0 @@ -package agent - -import "testing" - -func TestRedactSecretsInText(t *testing.T) { - in := `{"token":"abc123","authorization":"Bearer sk-xyz","password":"p@ss","note":"ok"}` - out := redactSecretsInText(in) - - if out == in { - t.Fatalf("expected redaction to change input") - } - if containsAnySubstring(out, "abc123", "sk-xyz", "p@ss") { - t.Fatalf("expected sensitive values to be redacted, got: %s", out) - } -} - -func TestSanitizeSensitiveToolArgs(t *testing.T) { - args := map[string]interface{}{ - "command": "git -c http.extraHeader='Authorization: token abc123' ls-remote https://example.com/repo.git", - "token": "abc123", - } - safe := sanitizeSensitiveToolArgs(args) - if safe["token"] == "abc123" { - t.Fatalf("expected token field to be redacted") - } -} diff --git a/pkg/agent/loop_toolloop_test.go b/pkg/agent/loop_toolloop_test.go deleted file mode 100644 index 374c97a..0000000 --- a/pkg/agent/loop_toolloop_test.go +++ /dev/null @@ -1,738 +0,0 @@ -package agent - -import ( - "context" - "fmt" - "strings" - "sync/atomic" - "testing" - "time" - - "clawgo/pkg/config" - "clawgo/pkg/providers" - "clawgo/pkg/session" - "clawgo/pkg/tools" -) - -func TestToolCallsSignatureStableForSameInput(t *testing.T) { - t.Parallel() - - calls := []providers.ToolCall{ - { - Name: "shell", - Arguments: map[string]interface{}{"cmd": "ls -la", "cwd": "/tmp"}, - }, - { - Name: "read_file", - Arguments: map[string]interface{}{"path": "README.md"}, - }, - } - - s1 := toolCallsSignature(calls) - s2 := toolCallsSignature(calls) - if s1 == "" { - t.Fatalf("expected non-empty signature") - } - if s1 != s2 { - t.Fatalf("expected stable signature, got %q vs %q", s1, s2) - } -} - -func TestToolCallsSignatureDiffersByArguments(t *testing.T) { - t.Parallel() - - callsA := []providers.ToolCall{ - {Name: "shell", Arguments: map[string]interface{}{"cmd": "ls -la"}}, - } - callsB := []providers.ToolCall{ - {Name: "shell", Arguments: map[string]interface{}{"cmd": "pwd"}}, - } - - if toolCallsSignature(callsA) == toolCallsSignature(callsB) { - t.Fatalf("expected different signatures for different arguments") - } -} - -func TestNormalizeReflectDecision(t *testing.T) { - t.Parallel() - - if got := normalizeReflectDecision("DONE"); got != "done" { - t.Fatalf("expected done, got %s", got) - } - if got := normalizeReflectDecision("blocked"); got != "blocked" { - t.Fatalf("expected blocked, got %s", got) - } - if got := normalizeReflectDecision("unknown"); got != "continue" { - t.Fatalf("expected continue, got %s", got) - } -} - -func TestShouldTriggerReflectionReplayScenarios(t *testing.T) { - t.Parallel() - - al := &AgentLoop{maxIterations: 5} - tests := []struct { - name string - state toolLoopState - outcome toolActOutcome - want bool - }{ - { - name: "tool failure", - state: toolLoopState{iteration: 2}, - outcome: toolActOutcome{executedCalls: 2, roundToolErrors: 1, lastToolResult: "Error: denied"}, - want: true, - }, - { - name: "repetition hint", - state: toolLoopState{iteration: 2, repeatedToolCallRounds: 1}, - outcome: toolActOutcome{executedCalls: 1, lastToolResult: "ok"}, - want: true, - }, - { - name: "near iteration limit", - state: toolLoopState{iteration: 4}, - outcome: toolActOutcome{executedCalls: 1, lastToolResult: "ok"}, - want: true, - }, - { - name: "empty tool result", - state: toolLoopState{iteration: 1}, - outcome: toolActOutcome{executedCalls: 1, lastToolResult: ""}, - want: true, - }, - { - name: "healthy progress", - state: toolLoopState{iteration: 1}, - outcome: toolActOutcome{executedCalls: 1, lastToolResult: "done step 1"}, - want: true, - }, - } - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - got := al.shouldTriggerReflection(tt.state, tt.outcome) - if got != tt.want { - t.Fatalf("shouldTriggerReflection=%v want=%v", got, tt.want) - } - }) - } -} - -func TestShouldTriggerReflectionCooldown(t *testing.T) { - t.Parallel() - - al := &AgentLoop{maxIterations: 10} - state := toolLoopState{ - iteration: 3, - lastReflectIteration: 2, - } - // No hard trigger, within cooldown window -> false. - if al.shouldTriggerReflection(state, toolActOutcome{executedCalls: 1, lastToolResult: "ok"}) { - t.Fatalf("expected reflection suppressed by cooldown") - } - - // Hard trigger bypasses cooldown. - if !al.shouldTriggerReflection(state, toolActOutcome{executedCalls: 1, roundToolErrors: 1, lastToolResult: "Error: x"}) { - t.Fatalf("expected hard trigger to bypass cooldown") - } -} - -type replayTool struct { - name string - parallelSafe *bool - resourceKeys func(args map[string]interface{}) []string - run func(context.Context, map[string]interface{}) (string, error) -} - -func (t replayTool) Name() string { return t.name } -func (t replayTool) Description() string { return "replay tool" } -func (t replayTool) Parameters() map[string]interface{} { - return map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{}, - } -} -func (t replayTool) Execute(ctx context.Context, args map[string]interface{}) (string, error) { - if t.run != nil { - return t.run(ctx, args) - } - return fmt.Sprintf("ok:%s", t.name), nil -} - -func (t replayTool) ParallelSafe() bool { - if t.parallelSafe == nil { - return false - } - return *t.parallelSafe -} - -func (t replayTool) ResourceKeys(args map[string]interface{}) []string { - if t.resourceKeys == nil { - return nil - } - return t.resourceKeys(args) -} - -type deferralRetryProvider struct { - planCalls int -} - -func (p *deferralRetryProvider) Chat(ctx context.Context, messages []providers.Message, defs []providers.ToolDefinition, model string, options map[string]interface{}) (*providers.LLMResponse, error) { - if len(defs) == 0 { - return &providers.LLMResponse{Content: "finalized"}, nil - } - p.planCalls++ - switch p.planCalls { - case 1: - return &providers.LLMResponse{Content: "需要先查看一下当前工作区才能确认,请稍等。"}, nil - case 2: - return &providers.LLMResponse{ - Content: "先检查状态", - ToolCalls: []providers.ToolCall{ - {ID: "tc-status-1", Name: "read_file", Arguments: map[string]interface{}{"path": "README.md"}}, - }, - }, nil - default: - return &providers.LLMResponse{Content: "已完成状态检查,当前一切正常。"}, nil - } -} - -func (p *deferralRetryProvider) GetDefaultModel() string { return "test-model" } - -func TestActToolCalls_BudgetTruncationReplay(t *testing.T) { - t.Parallel() - - reg := tools.NewToolRegistry() - calls := make([]providers.ToolCall, 0, toolLoopMaxCallsPerIteration+2) - for i := 0; i < toolLoopMaxCallsPerIteration+2; i++ { - name := fmt.Sprintf("tool_%d", i) - reg.Register(replayTool{name: name}) - calls = append(calls, providers.ToolCall{ - ID: fmt.Sprintf("tc-%d", i), - Name: name, - Arguments: map[string]interface{}{}, - }) - } - - al := &AgentLoop{ - tools: reg, - sessions: session.NewSessionManager(""), - } - msgs := []providers.Message{} - out := al.actToolCalls(context.Background(), "", calls, &msgs, "s1", 1, toolLoopBudget{}, false, nil) - - if !out.truncated { - t.Fatalf("expected truncation due to budget") - } - if out.executedCalls != toolLoopMaxCallsPerIteration { - t.Fatalf("executed=%d want=%d", out.executedCalls, toolLoopMaxCallsPerIteration) - } - if out.droppedCalls != 2 { - t.Fatalf("dropped=%d want=2", out.droppedCalls) - } -} - -func TestComputeToolLoopBudget(t *testing.T) { - t.Parallel() - - al := &AgentLoop{maxIterations: 6} - - early := al.computeToolLoopBudget(toolLoopState{iteration: 1}) - if early.maxCallsPerIteration <= toolLoopMaxCallsPerIteration { - t.Fatalf("expected wider early budget, got %d", early.maxCallsPerIteration) - } - - degraded := al.computeToolLoopBudget(toolLoopState{iteration: 2, consecutiveAllToolErrorRounds: 1}) - if degraded.maxCallsPerIteration >= toolLoopMaxCallsPerIteration { - t.Fatalf("expected tighter degraded budget, got %d", degraded.maxCallsPerIteration) - } - - nearLimit := al.computeToolLoopBudget(toolLoopState{iteration: 5}) - if nearLimit.maxCallsPerIteration != toolLoopMinCallsPerIteration { - t.Fatalf("expected minimal near-limit calls, got %d", nearLimit.maxCallsPerIteration) - } - if nearLimit.singleCallTimeout != toolLoopMinSingleCallTimeout { - t.Fatalf("expected minimal near-limit timeout, got %s", nearLimit.singleCallTimeout) - } - - lowConfContinue := al.computeToolLoopBudget(toolLoopState{ - iteration: 2, - lastReflectDecision: "continue", - lastReflectConfidence: 0.42, - lastReflectIteration: 1, - }) - if lowConfContinue.maxCallsPerIteration >= toolLoopMaxCallsPerIteration { - t.Fatalf("expected low-confidence continue to tighten calls, got %d", lowConfContinue.maxCallsPerIteration) - } - - highConfContinue := al.computeToolLoopBudget(toolLoopState{ - iteration: 2, - lastReflectDecision: "continue", - lastReflectConfidence: 0.91, - lastReflectIteration: 1, - }) - if highConfContinue.maxCallsPerIteration <= toolLoopMaxCallsPerIteration { - t.Fatalf("expected high-confidence continue to widen calls, got %d", highConfContinue.maxCallsPerIteration) - } - - blocked := al.computeToolLoopBudget(toolLoopState{ - iteration: 2, - lastReflectDecision: "blocked", - lastReflectConfidence: 0.8, - lastReflectIteration: 1, - }) - if blocked.maxCallsPerIteration != toolLoopMinCallsPerIteration { - t.Fatalf("expected blocked reflection to force min calls, got %d", blocked.maxCallsPerIteration) - } -} - -func TestParallelSafeToolDeclarationOverridesWhitelist(t *testing.T) { - t.Parallel() - - yes := true - no := false - reg := tools.NewToolRegistry() - reg.Register(replayTool{name: "read_file", parallelSafe: &no}) - reg.Register(replayTool{name: "custom_safe", parallelSafe: &yes}) - - al := &AgentLoop{ - tools: reg, - parallelSafeTools: map[string]struct{}{ - "read_file": {}, - }, - } - - if al.isParallelSafeTool("read_file") { - t.Fatalf("tool declaration should override whitelist to false") - } - if !al.isParallelSafeTool("custom_safe") { - t.Fatalf("tool declaration true should be respected") - } -} - -func TestClassifyToolExecutionError(t *testing.T) { - t.Parallel() - - typ, retryable, blocked := classifyToolExecutionError(fmt.Errorf("permission denied to write file"), false) - if typ != "permission" || retryable || !blocked { - t.Fatalf("unexpected permission classification: %s %v %v", typ, retryable, blocked) - } - - typ, retryable, blocked = classifyToolExecutionError(fmt.Errorf("temporary unavailable 503"), false) - if typ != "transient" || !retryable || blocked { - t.Fatalf("unexpected transient classification: %s %v %v", typ, retryable, blocked) - } -} - -func TestSummarizeToolActOutcome(t *testing.T) { - t.Parallel() - - out := summarizeToolActOutcome(toolActOutcome{ - executedCalls: 1, - records: []toolExecutionRecord{ - {Tool: "shell", Status: "error", ErrorType: "permission", Retryable: false}, - }, - hardErrors: 1, - blockedLikely: true, - }) - if out == "" || !strings.Contains(out, "\"blocked_likely\":true") { - t.Fatalf("unexpected summary: %s", out) - } - if !strings.Contains(out, "\"error_type\":\"permission\"") { - t.Fatalf("missing record fields in summary: %s", out) - } - if !strings.Contains(out, "\"records_truncated\":0") { - t.Fatalf("expected records_truncated field, got: %s", out) - } -} - -func TestShouldPersistToolResultRecord(t *testing.T) { - t.Parallel() - - if !shouldPersistToolResultRecord(toolExecutionRecord{Status: "ok"}, 0, 3) { - t.Fatalf("first tool result should persist") - } - if !shouldPersistToolResultRecord(toolExecutionRecord{Status: "ok"}, 2, 3) { - t.Fatalf("last tool result should persist") - } - if shouldPersistToolResultRecord(toolExecutionRecord{Status: "ok"}, 1, 3) { - t.Fatalf("middle successful tool result should be skipped") - } - if !shouldPersistToolResultRecord(toolExecutionRecord{Status: "error"}, 1, 3) { - t.Fatalf("error tool result should persist") - } -} - -func TestCompactToolExecutionRecords(t *testing.T) { - t.Parallel() - - records := []toolExecutionRecord{ - {Tool: "a", Status: "ok"}, - {Tool: "b", Status: "error", ErrorType: "permission"}, - {Tool: "c", Status: "ok"}, - {Tool: "d", Status: "error", ErrorType: "transient"}, - {Tool: "e", Status: "ok"}, - {Tool: "f", Status: "ok"}, - } - out, truncated := compactToolExecutionRecords(records, 4) - if len(out) != 4 { - t.Fatalf("expected compact len 4, got %d", len(out)) - } - if truncated != 2 { - t.Fatalf("expected truncated 2, got %d", truncated) - } - foundErr := 0 - for _, r := range out { - if r.Status == "error" { - foundErr++ - } - } - if foundErr < 2 { - t.Fatalf("expected to keep error records, got %d", foundErr) - } -} - -func TestShouldRunToolCallsInParallel(t *testing.T) { - t.Parallel() - - al := &AgentLoop{ - parallelSafeTools: map[string]struct{}{ - "read_file": {}, - "memory_search": {}, - }, - } - ok := al.shouldRunToolCallsInParallel([]providers.ToolCall{ - {Name: "read_file"}, {Name: "memory_search"}, - }) - if !ok { - t.Fatalf("expected parallel-safe tools to run in parallel") - } - - notOK := al.shouldRunToolCallsInParallel([]providers.ToolCall{ - {Name: "read_file"}, {Name: "shell"}, - }) - if notOK { - t.Fatalf("expected mixed tool set to stay serial") - } -} - -func TestActToolCalls_ParallelExecutionForSafeTools(t *testing.T) { - t.Parallel() - - var active int32 - var maxActive int32 - probe := func() { - cur := atomic.AddInt32(&active, 1) - for { - old := atomic.LoadInt32(&maxActive) - if cur <= old || atomic.CompareAndSwapInt32(&maxActive, old, cur) { - break - } - } - time.Sleep(40 * time.Millisecond) - atomic.AddInt32(&active, -1) - } - - reg := tools.NewToolRegistry() - reg.Register(replayToolImpl{name: "read_file", run: func(ctx context.Context, args map[string]interface{}) (string, error) { - probe() - return "ok", nil - }}) - reg.Register(replayToolImpl{name: "memory_search", run: func(ctx context.Context, args map[string]interface{}) (string, error) { - probe() - return "ok", nil - }}) - - al := &AgentLoop{ - tools: reg, - sessions: session.NewSessionManager(""), - parallelSafeTools: map[string]struct{}{"read_file": {}, "memory_search": {}}, - maxParallelCalls: 2, - } - msgs := []providers.Message{} - calls := []providers.ToolCall{ - {ID: "1", Name: "read_file", Arguments: map[string]interface{}{}}, - {ID: "2", Name: "memory_search", Arguments: map[string]interface{}{}}, - } - - al.actToolCalls(context.Background(), "", calls, &msgs, "s1", 1, toolLoopBudget{ - maxCallsPerIteration: 2, - singleCallTimeout: 2 * time.Second, - maxActDuration: 2 * time.Second, - }, false, nil) - - if atomic.LoadInt32(&maxActive) < 2 { - t.Fatalf("expected concurrent execution, maxActive=%d", maxActive) - } -} - -func TestActToolCalls_ResourceConflictForcesSerial(t *testing.T) { - t.Parallel() - - var active int32 - var maxActive int32 - probe := func() { - cur := atomic.AddInt32(&active, 1) - for { - old := atomic.LoadInt32(&maxActive) - if cur <= old || atomic.CompareAndSwapInt32(&maxActive, old, cur) { - break - } - } - time.Sleep(35 * time.Millisecond) - atomic.AddInt32(&active, -1) - } - - yes := true - reg := tools.NewToolRegistry() - reg.Register(replayTool{ - name: "read_file", - parallelSafe: &yes, - resourceKeys: func(args map[string]interface{}) []string { return []string{"fs:/tmp/a"} }, - run: func(ctx context.Context, args map[string]interface{}) (string, error) { - probe() - return "ok", nil - }, - }) - reg.Register(replayTool{ - name: "memory_search", - parallelSafe: &yes, - resourceKeys: func(args map[string]interface{}) []string { return []string{"fs:/tmp/a"} }, - run: func(ctx context.Context, args map[string]interface{}) (string, error) { - probe() - return "ok", nil - }, - }) - - al := &AgentLoop{ - tools: reg, - sessions: session.NewSessionManager(""), - parallelSafeTools: map[string]struct{}{"read_file": {}, "memory_search": {}}, - maxParallelCalls: 2, - } - - msgs := []providers.Message{} - calls := []providers.ToolCall{ - {ID: "1", Name: "read_file", Arguments: map[string]interface{}{}}, - {ID: "2", Name: "memory_search", Arguments: map[string]interface{}{}}, - } - al.actToolCalls(context.Background(), "", calls, &msgs, "s1", 1, toolLoopBudget{ - maxCallsPerIteration: 2, - singleCallTimeout: 2 * time.Second, - maxActDuration: 2 * time.Second, - }, false, nil) - - if atomic.LoadInt32(&maxActive) > 1 { - t.Fatalf("expected serial execution on same resource key, maxActive=%d", maxActive) - } -} - -func TestLoadToolParallelPolicyFromConfig(t *testing.T) { - t.Parallel() - - allowed, maxCalls := loadToolParallelPolicyFromConfig(config.RuntimeControlConfig{ - ToolParallelSafeNames: []string{"Read_File", "memory_search"}, - ToolMaxParallelCalls: 3, - }) - if maxCalls != 3 { - t.Fatalf("unexpected max calls: %d", maxCalls) - } - if _, ok := allowed["read_file"]; !ok { - t.Fatalf("expected normalized read_file in allowed set") - } -} - -func TestShouldRunFinalizePolish(t *testing.T) { - t.Parallel() - - short := "done" - if shouldRunFinalizePolish(short) { - t.Fatalf("short draft should skip polish") - } - - longButFlat := strings.Repeat("a", finalizeDraftMinCharsForPolish+10) - if shouldRunFinalizePolish(longButFlat) { - t.Fatalf("flat draft should skip polish") - } - - longStructured := "1. Step one: check environment variables and baseline configs.\n2. Step two: apply fix and rerun validations.\nNext: verify rollout and provide follow-up actions." - if !shouldRunFinalizePolish(longStructured) { - t.Fatalf("structured draft should trigger polish") - } -} - -func TestLocalFinalizeDraftQualityScore(t *testing.T) { - t.Parallel() - - high := localFinalizeDraftQualityScore("1. Step one: inspect environment.\n2. Step two: apply fix.\nNext steps: validate rollout and summarize conclusions.") - low := localFinalizeDraftQualityScore("todo\ntodo\ntodo") - if high <= low { - t.Fatalf("expected high-quality score > low-quality score, got %.2f <= %.2f", high, low) - } - if high < 0.30 { - t.Fatalf("unexpectedly low high-quality score: %.2f", high) - } -} - -func TestClamp01(t *testing.T) { - t.Parallel() - - if got := clamp01(-0.1); got != 0 { - t.Fatalf("expected 0, got %v", got) - } - if got := clamp01(1.2); got != 1 { - t.Fatalf("expected 1, got %v", got) - } -} - -func TestInferLocalReflectionSignal(t *testing.T) { - t.Parallel() - - blocked := inferLocalReflectionSignal([]providers.Message{ - {Role: "tool", Content: "Error: permission denied"}, - {Role: "tool", Content: "Error: permission denied"}, - }) - if blocked.decision != "blocked" || blocked.uncertain { - t.Fatalf("expected blocked deterministic signal, got %+v", blocked) - } - - done := inferLocalReflectionSignal([]providers.Message{ - {Role: "tool", Content: "success: completed ok"}, - }) - if done.decision != "done" || done.uncertain { - t.Fatalf("expected done deterministic signal, got %+v", done) - } - - unknown := inferLocalReflectionSignal([]providers.Message{ - {Role: "tool", Content: "partial result"}, - }) - if unknown.decision != "continue" || !unknown.uncertain { - t.Fatalf("expected uncertain continue signal, got %+v", unknown) - } -} - -func TestShouldForceSelfRepairHeuristic(t *testing.T) { - t.Parallel() - - needs, prompt := shouldForceSelfRepairHeuristic("Please provide steps to fix this", "It should work.") - if !needs || strings.TrimSpace(prompt) == "" { - t.Fatalf("expected self-repair for missing structured steps") - } - - needs, _ = shouldForceSelfRepairHeuristic("summarize logs", "Here is summary.") - if needs { - t.Fatalf("did not expect repair for normal concise response") - } -} - -func TestShouldRetryAfterDeferralNoTools(t *testing.T) { - t.Parallel() - - if !shouldRetryAfterDeferralNoTools("需要先查看一下当前工作区才能确认,请稍等。", "当前状态", 1, false, false, false) { - t.Fatalf("expected deferral text to trigger retry") - } - if shouldRetryAfterDeferralNoTools("这里是直接答案。", "当前状态", 1, false, false, false) { - t.Fatalf("did not expect normal direct answer to trigger retry") - } - if shouldRetryAfterDeferralNoTools("需要先查看一下当前工作区才能确认,请稍等。", "当前状态", 2, false, false, false) { - t.Fatalf("did not expect retry after first iteration") - } - if !shouldRetryAfterDeferralNoTools("你可以先执行 git clone,然后配置远程。", "帮我链接git仓库", 1, false, false, false) { - t.Fatalf("expected git task instruction-only reply to trigger retry") - } -} - -func TestControlIntentKeywordGate(t *testing.T) { - t.Parallel() - - if shouldAttemptAutonomyIntentInference("当前系统状态看板") { - t.Fatalf("generic status should not trigger autonomy inference") - } - if !shouldAttemptAutonomyIntentInference("查看 autonomy mode 状态") { - t.Fatalf("autonomy keyword should trigger autonomy inference") - } - if shouldAttemptAutoLearnIntentInference("当前系统状态看板") { - t.Fatalf("generic status should not trigger auto-learn inference") - } - if !shouldAttemptAutoLearnIntentInference("请看一下 auto-learn 状态") { - t.Fatalf("auto-learn keyword should trigger auto-learn inference") - } -} - -func TestShouldRejectNaturalizedOutput(t *testing.T) { - t.Parallel() - - if !shouldRejectNaturalizedOutput("不", "Autonomy mode is not enabled.") { - t.Fatalf("expected single-token degeneration to be rejected") - } - if shouldRejectNaturalizedOutput("Autonomy mode is currently not enabled.", "Autonomy mode is not enabled.") { - t.Fatalf("expected normal rewrite to be accepted") - } -} - -func TestRunLLMToolLoop_RecoversFromDeferralWithoutTools(t *testing.T) { - t.Parallel() - - var toolExecCount int32 - reg := tools.NewToolRegistry() - reg.Register(replayToolImpl{ - name: "read_file", - run: func(ctx context.Context, args map[string]interface{}) (string, error) { - atomic.AddInt32(&toolExecCount, 1) - return "README content", nil - }, - }) - - provider := &deferralRetryProvider{} - al := &AgentLoop{ - provider: provider, - providersByProxy: map[string]providers.LLMProvider{"proxy": provider}, - modelsByProxy: map[string][]string{"proxy": []string{"test-model"}}, - proxy: "proxy", - model: "test-model", - maxIterations: 5, - llmCallTimeout: 3 * time.Second, - tools: reg, - sessions: session.NewSessionManager(""), - workspace: t.TempDir(), - } - - msgs := []providers.Message{ - {Role: "system", Content: "test system"}, - {Role: "user", Content: "当前状态"}, - } - - out, iterations, err := al.runLLMToolLoop(context.Background(), msgs, "deferral:test", false, nil) - if err != nil { - t.Fatalf("runLLMToolLoop error: %v", err) - } - if strings.TrimSpace(out) == "" { - t.Fatalf("expected non-empty output") - } - if provider.planCalls < 3 { - t.Fatalf("expected additional planning round after deferral, got planCalls=%d", provider.planCalls) - } - if atomic.LoadInt32(&toolExecCount) == 0 { - t.Fatalf("expected tool execution after deferral recovery") - } - if iterations < 3 { - t.Fatalf("expected at least 3 iterations, got %d", iterations) - } -} - -func TestSelfRepairMemoryPromptDedup(t *testing.T) { - t.Parallel() - - mem := selfRepairMemory{ - promptsUsed: map[string]struct{}{ - normalizeRepairPrompt("Provide structured step-by-step answer."): {}, - }, - } - if !promptSeen(mem, "provide structured step-by-step answer.") { - t.Fatalf("expected prompt to be detected as already used") - } - if promptSeen(mem, "different prompt") { - t.Fatalf("did not expect unrelated prompt to be marked used") - } -}