mirror of
https://github.com/YspCoder/clawgo.git
synced 2026-04-18 07:27:28 +08:00
fix loop
This commit is contained in:
47
README.md
47
README.md
@@ -97,6 +97,7 @@ clawgo config reload
|
||||
- `config set` 使用原子写入。
|
||||
- 网关运行时若热更新失败,会自动回滚备份,避免损坏配置。
|
||||
- `--config` 指定的自定义配置路径会被 `config` 命令与通道内 `/config` 指令一致使用。
|
||||
- 配置加载使用严格 JSON 解析:未知字段与多余 JSON 内容会直接报错,避免拼写错误被静默忽略。
|
||||
|
||||
## 🌐 通道与消息控制
|
||||
|
||||
@@ -106,20 +107,22 @@ clawgo config reload
|
||||
/help
|
||||
/stop
|
||||
/status
|
||||
/status run [run_id|latest]
|
||||
/status wait <run_id|latest> [timeout_seconds]
|
||||
/config get <path>
|
||||
/config set <path> <value>
|
||||
/reload
|
||||
/autonomy start [idle]
|
||||
/autonomy stop
|
||||
/autonomy status
|
||||
/autolearn start [interval]
|
||||
/autolearn stop
|
||||
/autolearn status
|
||||
/pipeline list
|
||||
/pipeline status <pipeline_id>
|
||||
/pipeline ready <pipeline_id>
|
||||
```
|
||||
|
||||
自主与学习控制默认使用自然语言,不再依赖斜杠命令。例如:
|
||||
- `开始自主模式,每 30 分钟巡检一次`
|
||||
- `停止自动学习`
|
||||
- `看看最新 run 的状态`
|
||||
- `等待 run-1739950000000000000-8 完成后告诉我结果`
|
||||
|
||||
调度语义(按 `session_key`):
|
||||
- 同会话严格 FIFO 串行处理。
|
||||
- `/stop` 会中断当前回复并继续队列后续消息。
|
||||
@@ -156,6 +159,38 @@ clawgo channel test --channel telegram --to <chat_id> -m "ping"
|
||||
}
|
||||
```
|
||||
|
||||
运行控制配置示例(意图阈值 / 自主循环守卫 / 运行态保留):
|
||||
|
||||
```json
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"runtime_control": {
|
||||
"intent_high_confidence": 0.75,
|
||||
"intent_confirm_min_confidence": 0.45,
|
||||
"intent_max_input_chars": 1200,
|
||||
"confirm_ttl_seconds": 300,
|
||||
"confirm_max_clarification_turns": 2,
|
||||
"autonomy_tick_interval_sec": 20,
|
||||
"autonomy_min_run_interval_sec": 20,
|
||||
"autonomy_idle_threshold_sec": 20,
|
||||
"autonomy_max_rounds_without_user": 120,
|
||||
"autonomy_max_pending_duration_sec": 180,
|
||||
"autonomy_max_consecutive_stalls": 3,
|
||||
"autolearn_max_rounds_without_user": 200,
|
||||
"run_state_ttl_seconds": 1800,
|
||||
"run_state_max": 500,
|
||||
"run_control_latest_keywords": ["latest", "last run", "recent run", "最新", "最近", "上一次", "上个"],
|
||||
"run_control_wait_keywords": ["wait", "等待", "等到", "阻塞"],
|
||||
"run_control_status_keywords": ["status", "状态", "进度", "running", "运行"],
|
||||
"run_control_run_mention_keywords": ["run", "任务"],
|
||||
"run_control_minute_units": ["分钟", "min", "mins", "minute", "minutes", "m"],
|
||||
"tool_parallel_safe_names": ["read_file", "list_files", "find_files", "grep_files", "memory_search", "web_search", "repo_map", "system_info"],
|
||||
"tool_max_parallel_calls": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 🤖 多智能体编排 (Pipeline)
|
||||
|
||||
内置标准化编排工具:
|
||||
|
||||
47
README_EN.md
47
README_EN.md
@@ -97,6 +97,7 @@ Notes:
|
||||
- `config set` uses atomic write.
|
||||
- If gateway reload fails while running, config auto-rolls back from backup.
|
||||
- Custom `--config` path is consistently used by CLI config commands and in-channel `/config` commands.
|
||||
- Config loading uses strict JSON decoding: unknown fields and trailing JSON content now fail fast.
|
||||
|
||||
## 🌐 Channels and Message Control
|
||||
|
||||
@@ -106,20 +107,22 @@ Supported in-channel slash commands:
|
||||
/help
|
||||
/stop
|
||||
/status
|
||||
/status run [run_id|latest]
|
||||
/status wait <run_id|latest> [timeout_seconds]
|
||||
/config get <path>
|
||||
/config set <path> <value>
|
||||
/reload
|
||||
/autonomy start [idle]
|
||||
/autonomy stop
|
||||
/autonomy status
|
||||
/autolearn start [interval]
|
||||
/autolearn stop
|
||||
/autolearn status
|
||||
/pipeline list
|
||||
/pipeline status <pipeline_id>
|
||||
/pipeline ready <pipeline_id>
|
||||
```
|
||||
|
||||
Autonomy and auto-learn control now default to natural language (no slash commands required). Examples:
|
||||
- `start autonomy mode and check every 30 minutes`
|
||||
- `stop auto-learn`
|
||||
- `show latest run status`
|
||||
- `wait for run-1739950000000000000-8 and report when done`
|
||||
|
||||
Scheduling semantics (`session_key` based):
|
||||
- Strict FIFO processing per session.
|
||||
- `/stop` interrupts current response and continues queued messages.
|
||||
@@ -156,6 +159,38 @@ Context compaction config example:
|
||||
}
|
||||
```
|
||||
|
||||
Runtime-control config example (intent thresholds / autonomy guards / run-state retention):
|
||||
|
||||
```json
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"runtime_control": {
|
||||
"intent_high_confidence": 0.75,
|
||||
"intent_confirm_min_confidence": 0.45,
|
||||
"intent_max_input_chars": 1200,
|
||||
"confirm_ttl_seconds": 300,
|
||||
"confirm_max_clarification_turns": 2,
|
||||
"autonomy_tick_interval_sec": 20,
|
||||
"autonomy_min_run_interval_sec": 20,
|
||||
"autonomy_idle_threshold_sec": 20,
|
||||
"autonomy_max_rounds_without_user": 120,
|
||||
"autonomy_max_pending_duration_sec": 180,
|
||||
"autonomy_max_consecutive_stalls": 3,
|
||||
"autolearn_max_rounds_without_user": 200,
|
||||
"run_state_ttl_seconds": 1800,
|
||||
"run_state_max": 500,
|
||||
"run_control_latest_keywords": ["latest", "last run", "recent run", "最新", "最近", "上一次", "上个"],
|
||||
"run_control_wait_keywords": ["wait", "等待", "等到", "阻塞"],
|
||||
"run_control_status_keywords": ["status", "状态", "进度", "running", "运行"],
|
||||
"run_control_run_mention_keywords": ["run", "任务"],
|
||||
"run_control_minute_units": ["分钟", "min", "mins", "minute", "minutes", "m"],
|
||||
"tool_parallel_safe_names": ["read_file", "list_files", "find_files", "grep_files", "memory_search", "web_search", "repo_map", "system_info"],
|
||||
"tool_max_parallel_calls": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 🤖 Multi-Agent Orchestration (Pipeline)
|
||||
|
||||
Built-in orchestration tools:
|
||||
|
||||
@@ -14,6 +14,29 @@
|
||||
"keep_recent_messages": 20,
|
||||
"max_summary_chars": 6000,
|
||||
"max_transcript_chars": 20000
|
||||
},
|
||||
"runtime_control": {
|
||||
"intent_high_confidence": 0.75,
|
||||
"intent_confirm_min_confidence": 0.45,
|
||||
"intent_max_input_chars": 1200,
|
||||
"confirm_ttl_seconds": 300,
|
||||
"confirm_max_clarification_turns": 2,
|
||||
"autonomy_tick_interval_sec": 20,
|
||||
"autonomy_min_run_interval_sec": 20,
|
||||
"autonomy_idle_threshold_sec": 20,
|
||||
"autonomy_max_rounds_without_user": 120,
|
||||
"autonomy_max_pending_duration_sec": 180,
|
||||
"autonomy_max_consecutive_stalls": 3,
|
||||
"autolearn_max_rounds_without_user": 200,
|
||||
"run_state_ttl_seconds": 1800,
|
||||
"run_state_max": 500,
|
||||
"run_control_latest_keywords": ["latest", "last run", "recent run", "最新", "最近", "上一次", "上个"],
|
||||
"run_control_wait_keywords": ["wait", "等待", "等到", "阻塞"],
|
||||
"run_control_status_keywords": ["status", "状态", "进度", "running", "运行"],
|
||||
"run_control_run_mention_keywords": ["run", "任务"],
|
||||
"run_control_minute_units": ["分钟", "min", "mins", "minute", "minutes", "m"],
|
||||
"tool_parallel_safe_names": ["read_file", "list_files", "find_files", "grep_files", "memory_search", "web_search", "repo_map", "system_info"],
|
||||
"tool_max_parallel_calls": 2
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
2423
pkg/agent/loop.go
2423
pkg/agent/loop.go
File diff suppressed because it is too large
Load Diff
242
pkg/agent/loop_replay_baseline_test.go
Normal file
242
pkg/agent/loop_replay_baseline_test.go
Normal file
@@ -0,0 +1,242 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"clawgo/pkg/providers"
|
||||
"clawgo/pkg/session"
|
||||
"clawgo/pkg/tools"
|
||||
)
|
||||
|
||||
type replayScenario string
|
||||
|
||||
const (
|
||||
replayDirectSuccess replayScenario = "direct_success"
|
||||
replayOneToolSuccess replayScenario = "one_tool_success"
|
||||
replayRepeatedToolCall replayScenario = "repeated_tool_call"
|
||||
replayTransientFailure replayScenario = "transient_failure"
|
||||
replayPermissionBlock replayScenario = "permission_block"
|
||||
)
|
||||
|
||||
type replayProvider struct {
|
||||
mu sync.Mutex
|
||||
scenario replayScenario
|
||||
planCalls int
|
||||
reflectCalls int
|
||||
finalizeCalls int
|
||||
polishCalls int
|
||||
totalCalls int
|
||||
}
|
||||
|
||||
func (p *replayProvider) Chat(ctx context.Context, messages []providers.Message, defs []providers.ToolDefinition, model string, options map[string]interface{}) (*providers.LLMResponse, error) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
p.totalCalls++
|
||||
|
||||
last := ""
|
||||
if len(messages) > 0 {
|
||||
last = strings.TrimSpace(messages[len(messages)-1].Content)
|
||||
}
|
||||
|
||||
// Phase-2 polish call.
|
||||
if len(defs) == 0 && len(messages) > 0 && strings.Contains(strings.ToLower(strings.TrimSpace(messages[0].Content)), "rewrite the draft answer for end users") {
|
||||
p.polishCalls++
|
||||
return &providers.LLMResponse{Content: "polished final response"}, nil
|
||||
}
|
||||
|
||||
// Reflection call.
|
||||
if len(defs) == 0 && strings.Contains(last, "Classify current execution progress using JSON only.") {
|
||||
p.reflectCalls++
|
||||
switch p.scenario {
|
||||
case replayTransientFailure:
|
||||
return &providers.LLMResponse{Content: `{"decision":"continue","reason":"transient failures may recover","confidence":0.74}`}, nil
|
||||
case replayRepeatedToolCall:
|
||||
return &providers.LLMResponse{Content: `{"decision":"continue","reason":"need another attempt","confidence":0.72}`}, nil
|
||||
default:
|
||||
return &providers.LLMResponse{Content: `{"decision":"continue","reason":"default continue","confidence":0.60}`}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Finalization draft call.
|
||||
if len(defs) == 0 {
|
||||
p.finalizeCalls++
|
||||
return &providers.LLMResponse{Content: "draft final response"}, nil
|
||||
}
|
||||
|
||||
// Planning/tool-loop calls.
|
||||
p.planCalls++
|
||||
switch p.scenario {
|
||||
case replayDirectSuccess:
|
||||
return &providers.LLMResponse{Content: "direct completed"}, nil
|
||||
case replayOneToolSuccess:
|
||||
if p.planCalls == 1 {
|
||||
return &providers.LLMResponse{
|
||||
Content: "call one tool",
|
||||
ToolCalls: []providers.ToolCall{
|
||||
{ID: "tc-1", Name: "ok_tool", Arguments: map[string]interface{}{"x": 1}},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
return &providers.LLMResponse{Content: "task completed after one tool"}, nil
|
||||
case replayRepeatedToolCall:
|
||||
return &providers.LLMResponse{
|
||||
Content: "repeating tool",
|
||||
ToolCalls: []providers.ToolCall{
|
||||
{ID: fmt.Sprintf("tc-r-%d", p.planCalls), Name: "ok_tool", Arguments: map[string]interface{}{"same": true}},
|
||||
},
|
||||
}, nil
|
||||
case replayTransientFailure:
|
||||
return &providers.LLMResponse{
|
||||
Content: "transient fail tool",
|
||||
ToolCalls: []providers.ToolCall{
|
||||
{ID: fmt.Sprintf("tc-t-%d", p.planCalls), Name: "fail_tool_transient", Arguments: map[string]interface{}{}},
|
||||
},
|
||||
}, nil
|
||||
case replayPermissionBlock:
|
||||
return &providers.LLMResponse{
|
||||
Content: "permission fail tool",
|
||||
ToolCalls: []providers.ToolCall{
|
||||
{ID: "tc-p-1", Name: "fail_tool_permission", Arguments: map[string]interface{}{}},
|
||||
},
|
||||
}, nil
|
||||
default:
|
||||
return &providers.LLMResponse{Content: "unexpected scenario"}, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (p *replayProvider) GetDefaultModel() string { return "test-model" }
|
||||
|
||||
type replayToolImpl struct {
|
||||
name string
|
||||
run func(context.Context, map[string]interface{}) (string, error)
|
||||
}
|
||||
|
||||
func (t replayToolImpl) Name() string { return t.name }
|
||||
func (t replayToolImpl) Description() string { return "replay tool" }
|
||||
func (t replayToolImpl) Parameters() map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{},
|
||||
}
|
||||
}
|
||||
func (t replayToolImpl) Execute(ctx context.Context, args map[string]interface{}) (string, error) {
|
||||
return t.run(ctx, args)
|
||||
}
|
||||
|
||||
type replayCaseResult struct {
|
||||
name replayScenario
|
||||
ok bool
|
||||
iterations int
|
||||
llmCalls int
|
||||
reflectCalls int
|
||||
}
|
||||
|
||||
func TestAgentLoopReplayBaseline(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
scenarios := []replayScenario{
|
||||
replayDirectSuccess,
|
||||
replayOneToolSuccess,
|
||||
replayRepeatedToolCall,
|
||||
replayTransientFailure,
|
||||
replayPermissionBlock,
|
||||
}
|
||||
|
||||
results := make([]replayCaseResult, 0, len(scenarios))
|
||||
for _, sc := range scenarios {
|
||||
sc := sc
|
||||
t.Run(string(sc), func(t *testing.T) {
|
||||
reg := tools.NewToolRegistry()
|
||||
reg.Register(replayToolImpl{
|
||||
name: "ok_tool",
|
||||
run: func(ctx context.Context, args map[string]interface{}) (string, error) {
|
||||
return "ok", nil
|
||||
},
|
||||
})
|
||||
reg.Register(replayToolImpl{
|
||||
name: "fail_tool_transient",
|
||||
run: func(ctx context.Context, args map[string]interface{}) (string, error) {
|
||||
return "", fmt.Errorf("temporary unavailable 503")
|
||||
},
|
||||
})
|
||||
reg.Register(replayToolImpl{
|
||||
name: "fail_tool_permission",
|
||||
run: func(ctx context.Context, args map[string]interface{}) (string, error) {
|
||||
return "", fmt.Errorf("permission denied")
|
||||
},
|
||||
})
|
||||
|
||||
provider := &replayProvider{scenario: sc}
|
||||
al := &AgentLoop{
|
||||
provider: provider,
|
||||
providersByProxy: map[string]providers.LLMProvider{"proxy": provider},
|
||||
modelsByProxy: map[string][]string{"proxy": {"test-model"}},
|
||||
proxy: "proxy",
|
||||
model: "test-model",
|
||||
maxIterations: 6,
|
||||
llmCallTimeout: 3 * time.Second,
|
||||
tools: reg,
|
||||
sessions: session.NewSessionManager(""),
|
||||
workspace: t.TempDir(),
|
||||
}
|
||||
|
||||
msgs := []providers.Message{
|
||||
{Role: "system", Content: "you are a test agent"},
|
||||
{Role: "user", Content: "complete task"},
|
||||
}
|
||||
|
||||
out, iterations, err := al.runLLMToolLoop(context.Background(), msgs, "replay:"+string(sc), false, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("runLLMToolLoop error: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(out) == "" {
|
||||
t.Fatalf("empty output")
|
||||
}
|
||||
results = append(results, replayCaseResult{
|
||||
name: sc,
|
||||
ok: true,
|
||||
iterations: iterations,
|
||||
llmCalls: provider.totalCalls,
|
||||
reflectCalls: provider.reflectCalls,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
total := len(results)
|
||||
if total != len(scenarios) {
|
||||
t.Fatalf("unexpected results count: %d", total)
|
||||
}
|
||||
success := 0
|
||||
iterSum := 0
|
||||
llmSum := 0
|
||||
reflectSum := 0
|
||||
for _, r := range results {
|
||||
if r.ok {
|
||||
success++
|
||||
}
|
||||
iterSum += r.iterations
|
||||
llmSum += r.llmCalls
|
||||
reflectSum += r.reflectCalls
|
||||
}
|
||||
successRate := float64(success) / float64(total)
|
||||
avgIter := float64(iterSum) / float64(total)
|
||||
avgLLM := float64(llmSum) / float64(total)
|
||||
avgReflect := float64(reflectSum) / float64(total)
|
||||
|
||||
t.Logf("Replay baseline: success_rate=%.2f avg_iterations=%.2f avg_llm_calls=%.2f avg_reflect_calls=%.2f", successRate, avgIter, avgLLM, avgReflect)
|
||||
|
||||
if successRate < 1.0 {
|
||||
t.Fatalf("expected all scenarios to succeed, got success_rate=%.2f", successRate)
|
||||
}
|
||||
if avgIter > 3.6 {
|
||||
t.Fatalf("avg_iterations too high: %.2f", avgIter)
|
||||
}
|
||||
if avgLLM > 6.0 {
|
||||
t.Fatalf("avg_llm_calls too high: %.2f", avgLLM)
|
||||
}
|
||||
}
|
||||
192
pkg/agent/loop_run_control_test.go
Normal file
192
pkg/agent/loop_run_control_test.go
Normal file
@@ -0,0 +1,192 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"clawgo/pkg/bus"
|
||||
)
|
||||
|
||||
func TestDetectRunControlIntent(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
intent, ok := detectRunControlIntent("请等待 run-123-7 120 秒后告诉我状态")
|
||||
if !ok {
|
||||
t.Fatalf("expected run control intent")
|
||||
}
|
||||
if intent.runID != "run-123-7" {
|
||||
t.Fatalf("unexpected run id: %s", intent.runID)
|
||||
}
|
||||
if !intent.wait {
|
||||
t.Fatalf("expected wait=true")
|
||||
}
|
||||
if intent.timeout != 120*time.Second {
|
||||
t.Fatalf("unexpected timeout: %s", intent.timeout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectRunControlIntentLatest(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
intent, ok := detectRunControlIntent("latest run status")
|
||||
if !ok {
|
||||
t.Fatalf("expected latest run status intent")
|
||||
}
|
||||
if !intent.latest {
|
||||
t.Fatalf("expected latest=true")
|
||||
}
|
||||
if intent.runID != "" {
|
||||
t.Fatalf("expected empty run id")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRunWaitTimeout_MinClamp(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
got := parseRunWaitTimeout("wait run-1-1 1 s")
|
||||
if got != minRunWaitTimeout {
|
||||
t.Fatalf("expected min timeout %s, got %s", minRunWaitTimeout, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRunWaitTimeout_MinuteUnit(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
got := parseRunWaitTimeout("等待 run-1-1 2 分钟")
|
||||
if got != 2*time.Minute {
|
||||
t.Fatalf("expected 2m, got %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectRunControlIntentIgnoresNonControlText(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
if _, ok := detectRunControlIntent("帮我写一个README"); ok {
|
||||
t.Fatalf("did not expect run control intent")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectRunControlIntentWithCustomLexicon(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
lex := runControlLexicon{
|
||||
latestKeywords: []string{"newest"},
|
||||
waitKeywords: []string{"block"},
|
||||
statusKeywords: []string{"health"},
|
||||
runMentionKeywords: []string{"job"},
|
||||
minuteUnits: map[string]struct{}{"mins": {}},
|
||||
}
|
||||
|
||||
intent, ok := detectRunControlIntentWithLexicon("block run-55-1 for 2 mins and show health", lex)
|
||||
if !ok {
|
||||
t.Fatalf("expected intent with custom lexicon")
|
||||
}
|
||||
if !intent.wait {
|
||||
t.Fatalf("expected wait=true")
|
||||
}
|
||||
if intent.timeout != 2*time.Minute {
|
||||
t.Fatalf("unexpected timeout: %s", intent.timeout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLatestRunStateBySession(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
now := time.Now()
|
||||
al := &AgentLoop{
|
||||
runStates: map[string]*runState{
|
||||
"run-1-1": {
|
||||
runID: "run-1-1",
|
||||
sessionKey: "s1",
|
||||
startedAt: now.Add(-2 * time.Minute),
|
||||
},
|
||||
"run-1-2": {
|
||||
runID: "run-1-2",
|
||||
sessionKey: "s1",
|
||||
startedAt: now.Add(-time.Minute),
|
||||
},
|
||||
"run-2-1": {
|
||||
runID: "run-2-1",
|
||||
sessionKey: "s2",
|
||||
startedAt: now,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
rs, ok := al.latestRunState("s1")
|
||||
if !ok {
|
||||
t.Fatalf("expected state for s1")
|
||||
}
|
||||
if rs.runID != "run-1-2" {
|
||||
t.Fatalf("unexpected run id: %s", rs.runID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleSlashCommand_StatusRunLatest(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
al := &AgentLoop{
|
||||
runStates: map[string]*runState{
|
||||
"run-100-1": {
|
||||
runID: "run-100-1",
|
||||
sessionKey: "s1",
|
||||
status: runStatusOK,
|
||||
acceptedAt: time.Now().Add(-time.Minute),
|
||||
startedAt: time.Now().Add(-time.Minute),
|
||||
endedAt: time.Now().Add(-30 * time.Second),
|
||||
done: closedChan(),
|
||||
},
|
||||
},
|
||||
}
|
||||
handled, out, err := al.handleSlashCommand(context.Background(), bus.InboundMessage{
|
||||
Content: "/status run latest",
|
||||
SessionKey: "s1",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !handled {
|
||||
t.Fatalf("expected command handled")
|
||||
}
|
||||
if out == "" || !containsAnySubstring(out, "run-100-1", "Run ID: run-100-1") {
|
||||
t.Fatalf("unexpected output: %s", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleSlashCommand_StatusWaitDoneRun(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
al := &AgentLoop{
|
||||
runStates: map[string]*runState{
|
||||
"run-200-2": {
|
||||
runID: "run-200-2",
|
||||
sessionKey: "s1",
|
||||
status: runStatusOK,
|
||||
acceptedAt: time.Now().Add(-time.Minute),
|
||||
startedAt: time.Now().Add(-time.Minute),
|
||||
endedAt: time.Now().Add(-20 * time.Second),
|
||||
done: closedChan(),
|
||||
},
|
||||
},
|
||||
}
|
||||
handled, out, err := al.handleSlashCommand(context.Background(), bus.InboundMessage{
|
||||
Content: "/status wait run-200-2 3",
|
||||
SessionKey: "s1",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !handled {
|
||||
t.Fatalf("expected command handled")
|
||||
}
|
||||
if out == "" || !containsAnySubstring(out, "run-200-2", "Run ID: run-200-2") {
|
||||
t.Fatalf("unexpected output: %s", out)
|
||||
}
|
||||
}
|
||||
|
||||
func closedChan() chan struct{} {
|
||||
ch := make(chan struct{})
|
||||
close(ch)
|
||||
return ch
|
||||
}
|
||||
617
pkg/agent/loop_toolloop_test.go
Normal file
617
pkg/agent/loop_toolloop_test.go
Normal file
@@ -0,0 +1,617 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"clawgo/pkg/config"
|
||||
"clawgo/pkg/providers"
|
||||
"clawgo/pkg/session"
|
||||
"clawgo/pkg/tools"
|
||||
)
|
||||
|
||||
func TestToolCallsSignatureStableForSameInput(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
calls := []providers.ToolCall{
|
||||
{
|
||||
Name: "shell",
|
||||
Arguments: map[string]interface{}{"cmd": "ls -la", "cwd": "/tmp"},
|
||||
},
|
||||
{
|
||||
Name: "read_file",
|
||||
Arguments: map[string]interface{}{"path": "README.md"},
|
||||
},
|
||||
}
|
||||
|
||||
s1 := toolCallsSignature(calls)
|
||||
s2 := toolCallsSignature(calls)
|
||||
if s1 == "" {
|
||||
t.Fatalf("expected non-empty signature")
|
||||
}
|
||||
if s1 != s2 {
|
||||
t.Fatalf("expected stable signature, got %q vs %q", s1, s2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolCallsSignatureDiffersByArguments(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
callsA := []providers.ToolCall{
|
||||
{Name: "shell", Arguments: map[string]interface{}{"cmd": "ls -la"}},
|
||||
}
|
||||
callsB := []providers.ToolCall{
|
||||
{Name: "shell", Arguments: map[string]interface{}{"cmd": "pwd"}},
|
||||
}
|
||||
|
||||
if toolCallsSignature(callsA) == toolCallsSignature(callsB) {
|
||||
t.Fatalf("expected different signatures for different arguments")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeReflectDecision(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
if got := normalizeReflectDecision("DONE"); got != "done" {
|
||||
t.Fatalf("expected done, got %s", got)
|
||||
}
|
||||
if got := normalizeReflectDecision("blocked"); got != "blocked" {
|
||||
t.Fatalf("expected blocked, got %s", got)
|
||||
}
|
||||
if got := normalizeReflectDecision("unknown"); got != "continue" {
|
||||
t.Fatalf("expected continue, got %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldTriggerReflectionReplayScenarios(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
al := &AgentLoop{maxIterations: 5}
|
||||
tests := []struct {
|
||||
name string
|
||||
state toolLoopState
|
||||
outcome toolActOutcome
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "tool failure",
|
||||
state: toolLoopState{iteration: 2},
|
||||
outcome: toolActOutcome{executedCalls: 2, roundToolErrors: 1, lastToolResult: "Error: denied"},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "repetition hint",
|
||||
state: toolLoopState{iteration: 2, repeatedToolCallRounds: 1},
|
||||
outcome: toolActOutcome{executedCalls: 1, lastToolResult: "ok"},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "near iteration limit",
|
||||
state: toolLoopState{iteration: 4},
|
||||
outcome: toolActOutcome{executedCalls: 1, lastToolResult: "ok"},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "empty tool result",
|
||||
state: toolLoopState{iteration: 1},
|
||||
outcome: toolActOutcome{executedCalls: 1, lastToolResult: ""},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "healthy progress",
|
||||
state: toolLoopState{iteration: 1},
|
||||
outcome: toolActOutcome{executedCalls: 1, lastToolResult: "done step 1"},
|
||||
want: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := al.shouldTriggerReflection(tt.state, tt.outcome)
|
||||
if got != tt.want {
|
||||
t.Fatalf("shouldTriggerReflection=%v want=%v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldTriggerReflectionCooldown(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
al := &AgentLoop{maxIterations: 10}
|
||||
state := toolLoopState{
|
||||
iteration: 3,
|
||||
lastReflectIteration: 2,
|
||||
}
|
||||
// No hard trigger, within cooldown window -> false.
|
||||
if al.shouldTriggerReflection(state, toolActOutcome{executedCalls: 1, lastToolResult: "ok"}) {
|
||||
t.Fatalf("expected reflection suppressed by cooldown")
|
||||
}
|
||||
|
||||
// Hard trigger bypasses cooldown.
|
||||
if !al.shouldTriggerReflection(state, toolActOutcome{executedCalls: 1, roundToolErrors: 1, lastToolResult: "Error: x"}) {
|
||||
t.Fatalf("expected hard trigger to bypass cooldown")
|
||||
}
|
||||
}
|
||||
|
||||
type replayTool struct {
|
||||
name string
|
||||
parallelSafe *bool
|
||||
resourceKeys func(args map[string]interface{}) []string
|
||||
run func(context.Context, map[string]interface{}) (string, error)
|
||||
}
|
||||
|
||||
func (t replayTool) Name() string { return t.name }
|
||||
func (t replayTool) Description() string { return "replay tool" }
|
||||
func (t replayTool) Parameters() map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{},
|
||||
}
|
||||
}
|
||||
func (t replayTool) Execute(ctx context.Context, args map[string]interface{}) (string, error) {
|
||||
if t.run != nil {
|
||||
return t.run(ctx, args)
|
||||
}
|
||||
return fmt.Sprintf("ok:%s", t.name), nil
|
||||
}
|
||||
|
||||
func (t replayTool) ParallelSafe() bool {
|
||||
if t.parallelSafe == nil {
|
||||
return false
|
||||
}
|
||||
return *t.parallelSafe
|
||||
}
|
||||
|
||||
func (t replayTool) ResourceKeys(args map[string]interface{}) []string {
|
||||
if t.resourceKeys == nil {
|
||||
return nil
|
||||
}
|
||||
return t.resourceKeys(args)
|
||||
}
|
||||
|
||||
func TestActToolCalls_BudgetTruncationReplay(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
reg := tools.NewToolRegistry()
|
||||
calls := make([]providers.ToolCall, 0, toolLoopMaxCallsPerIteration+2)
|
||||
for i := 0; i < toolLoopMaxCallsPerIteration+2; i++ {
|
||||
name := fmt.Sprintf("tool_%d", i)
|
||||
reg.Register(replayTool{name: name})
|
||||
calls = append(calls, providers.ToolCall{
|
||||
ID: fmt.Sprintf("tc-%d", i),
|
||||
Name: name,
|
||||
Arguments: map[string]interface{}{},
|
||||
})
|
||||
}
|
||||
|
||||
al := &AgentLoop{
|
||||
tools: reg,
|
||||
sessions: session.NewSessionManager(""),
|
||||
}
|
||||
msgs := []providers.Message{}
|
||||
out := al.actToolCalls(context.Background(), "", calls, &msgs, "s1", 1, toolLoopBudget{}, false, nil)
|
||||
|
||||
if !out.truncated {
|
||||
t.Fatalf("expected truncation due to budget")
|
||||
}
|
||||
if out.executedCalls != toolLoopMaxCallsPerIteration {
|
||||
t.Fatalf("executed=%d want=%d", out.executedCalls, toolLoopMaxCallsPerIteration)
|
||||
}
|
||||
if out.droppedCalls != 2 {
|
||||
t.Fatalf("dropped=%d want=2", out.droppedCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeToolLoopBudget(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
al := &AgentLoop{maxIterations: 6}
|
||||
|
||||
early := al.computeToolLoopBudget(toolLoopState{iteration: 1})
|
||||
if early.maxCallsPerIteration <= toolLoopMaxCallsPerIteration {
|
||||
t.Fatalf("expected wider early budget, got %d", early.maxCallsPerIteration)
|
||||
}
|
||||
|
||||
degraded := al.computeToolLoopBudget(toolLoopState{iteration: 2, consecutiveAllToolErrorRounds: 1})
|
||||
if degraded.maxCallsPerIteration >= toolLoopMaxCallsPerIteration {
|
||||
t.Fatalf("expected tighter degraded budget, got %d", degraded.maxCallsPerIteration)
|
||||
}
|
||||
|
||||
nearLimit := al.computeToolLoopBudget(toolLoopState{iteration: 5})
|
||||
if nearLimit.maxCallsPerIteration != toolLoopMinCallsPerIteration {
|
||||
t.Fatalf("expected minimal near-limit calls, got %d", nearLimit.maxCallsPerIteration)
|
||||
}
|
||||
if nearLimit.singleCallTimeout != toolLoopMinSingleCallTimeout {
|
||||
t.Fatalf("expected minimal near-limit timeout, got %s", nearLimit.singleCallTimeout)
|
||||
}
|
||||
|
||||
lowConfContinue := al.computeToolLoopBudget(toolLoopState{
|
||||
iteration: 2,
|
||||
lastReflectDecision: "continue",
|
||||
lastReflectConfidence: 0.42,
|
||||
lastReflectIteration: 1,
|
||||
})
|
||||
if lowConfContinue.maxCallsPerIteration >= toolLoopMaxCallsPerIteration {
|
||||
t.Fatalf("expected low-confidence continue to tighten calls, got %d", lowConfContinue.maxCallsPerIteration)
|
||||
}
|
||||
|
||||
highConfContinue := al.computeToolLoopBudget(toolLoopState{
|
||||
iteration: 2,
|
||||
lastReflectDecision: "continue",
|
||||
lastReflectConfidence: 0.91,
|
||||
lastReflectIteration: 1,
|
||||
})
|
||||
if highConfContinue.maxCallsPerIteration <= toolLoopMaxCallsPerIteration {
|
||||
t.Fatalf("expected high-confidence continue to widen calls, got %d", highConfContinue.maxCallsPerIteration)
|
||||
}
|
||||
|
||||
blocked := al.computeToolLoopBudget(toolLoopState{
|
||||
iteration: 2,
|
||||
lastReflectDecision: "blocked",
|
||||
lastReflectConfidence: 0.8,
|
||||
lastReflectIteration: 1,
|
||||
})
|
||||
if blocked.maxCallsPerIteration != toolLoopMinCallsPerIteration {
|
||||
t.Fatalf("expected blocked reflection to force min calls, got %d", blocked.maxCallsPerIteration)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParallelSafeToolDeclarationOverridesWhitelist(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
yes := true
|
||||
no := false
|
||||
reg := tools.NewToolRegistry()
|
||||
reg.Register(replayTool{name: "read_file", parallelSafe: &no})
|
||||
reg.Register(replayTool{name: "custom_safe", parallelSafe: &yes})
|
||||
|
||||
al := &AgentLoop{
|
||||
tools: reg,
|
||||
parallelSafeTools: map[string]struct{}{
|
||||
"read_file": {},
|
||||
},
|
||||
}
|
||||
|
||||
if al.isParallelSafeTool("read_file") {
|
||||
t.Fatalf("tool declaration should override whitelist to false")
|
||||
}
|
||||
if !al.isParallelSafeTool("custom_safe") {
|
||||
t.Fatalf("tool declaration true should be respected")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClassifyToolExecutionError(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
typ, retryable, blocked := classifyToolExecutionError(fmt.Errorf("permission denied to write file"), false)
|
||||
if typ != "permission" || retryable || !blocked {
|
||||
t.Fatalf("unexpected permission classification: %s %v %v", typ, retryable, blocked)
|
||||
}
|
||||
|
||||
typ, retryable, blocked = classifyToolExecutionError(fmt.Errorf("temporary unavailable 503"), false)
|
||||
if typ != "transient" || !retryable || blocked {
|
||||
t.Fatalf("unexpected transient classification: %s %v %v", typ, retryable, blocked)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSummarizeToolActOutcome(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
out := summarizeToolActOutcome(toolActOutcome{
|
||||
executedCalls: 1,
|
||||
records: []toolExecutionRecord{
|
||||
{Tool: "shell", Status: "error", ErrorType: "permission", Retryable: false},
|
||||
},
|
||||
hardErrors: 1,
|
||||
blockedLikely: true,
|
||||
})
|
||||
if out == "" || !strings.Contains(out, "\"blocked_likely\":true") {
|
||||
t.Fatalf("unexpected summary: %s", out)
|
||||
}
|
||||
if !strings.Contains(out, "\"error_type\":\"permission\"") {
|
||||
t.Fatalf("missing record fields in summary: %s", out)
|
||||
}
|
||||
if !strings.Contains(out, "\"records_truncated\":0") {
|
||||
t.Fatalf("expected records_truncated field, got: %s", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldPersistToolResultRecord(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
if !shouldPersistToolResultRecord(toolExecutionRecord{Status: "ok"}, 0, 3) {
|
||||
t.Fatalf("first tool result should persist")
|
||||
}
|
||||
if !shouldPersistToolResultRecord(toolExecutionRecord{Status: "ok"}, 2, 3) {
|
||||
t.Fatalf("last tool result should persist")
|
||||
}
|
||||
if shouldPersistToolResultRecord(toolExecutionRecord{Status: "ok"}, 1, 3) {
|
||||
t.Fatalf("middle successful tool result should be skipped")
|
||||
}
|
||||
if !shouldPersistToolResultRecord(toolExecutionRecord{Status: "error"}, 1, 3) {
|
||||
t.Fatalf("error tool result should persist")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompactToolExecutionRecords(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
records := []toolExecutionRecord{
|
||||
{Tool: "a", Status: "ok"},
|
||||
{Tool: "b", Status: "error", ErrorType: "permission"},
|
||||
{Tool: "c", Status: "ok"},
|
||||
{Tool: "d", Status: "error", ErrorType: "transient"},
|
||||
{Tool: "e", Status: "ok"},
|
||||
{Tool: "f", Status: "ok"},
|
||||
}
|
||||
out, truncated := compactToolExecutionRecords(records, 4)
|
||||
if len(out) != 4 {
|
||||
t.Fatalf("expected compact len 4, got %d", len(out))
|
||||
}
|
||||
if truncated != 2 {
|
||||
t.Fatalf("expected truncated 2, got %d", truncated)
|
||||
}
|
||||
foundErr := 0
|
||||
for _, r := range out {
|
||||
if r.Status == "error" {
|
||||
foundErr++
|
||||
}
|
||||
}
|
||||
if foundErr < 2 {
|
||||
t.Fatalf("expected to keep error records, got %d", foundErr)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldRunToolCallsInParallel(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
al := &AgentLoop{
|
||||
parallelSafeTools: map[string]struct{}{
|
||||
"read_file": {},
|
||||
"memory_search": {},
|
||||
},
|
||||
}
|
||||
ok := al.shouldRunToolCallsInParallel([]providers.ToolCall{
|
||||
{Name: "read_file"}, {Name: "memory_search"},
|
||||
})
|
||||
if !ok {
|
||||
t.Fatalf("expected parallel-safe tools to run in parallel")
|
||||
}
|
||||
|
||||
notOK := al.shouldRunToolCallsInParallel([]providers.ToolCall{
|
||||
{Name: "read_file"}, {Name: "shell"},
|
||||
})
|
||||
if notOK {
|
||||
t.Fatalf("expected mixed tool set to stay serial")
|
||||
}
|
||||
}
|
||||
|
||||
func TestActToolCalls_ParallelExecutionForSafeTools(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var active int32
|
||||
var maxActive int32
|
||||
probe := func() {
|
||||
cur := atomic.AddInt32(&active, 1)
|
||||
for {
|
||||
old := atomic.LoadInt32(&maxActive)
|
||||
if cur <= old || atomic.CompareAndSwapInt32(&maxActive, old, cur) {
|
||||
break
|
||||
}
|
||||
}
|
||||
time.Sleep(40 * time.Millisecond)
|
||||
atomic.AddInt32(&active, -1)
|
||||
}
|
||||
|
||||
reg := tools.NewToolRegistry()
|
||||
reg.Register(replayToolImpl{name: "read_file", run: func(ctx context.Context, args map[string]interface{}) (string, error) {
|
||||
probe()
|
||||
return "ok", nil
|
||||
}})
|
||||
reg.Register(replayToolImpl{name: "memory_search", run: func(ctx context.Context, args map[string]interface{}) (string, error) {
|
||||
probe()
|
||||
return "ok", nil
|
||||
}})
|
||||
|
||||
al := &AgentLoop{
|
||||
tools: reg,
|
||||
sessions: session.NewSessionManager(""),
|
||||
parallelSafeTools: map[string]struct{}{"read_file": {}, "memory_search": {}},
|
||||
maxParallelCalls: 2,
|
||||
}
|
||||
msgs := []providers.Message{}
|
||||
calls := []providers.ToolCall{
|
||||
{ID: "1", Name: "read_file", Arguments: map[string]interface{}{}},
|
||||
{ID: "2", Name: "memory_search", Arguments: map[string]interface{}{}},
|
||||
}
|
||||
|
||||
al.actToolCalls(context.Background(), "", calls, &msgs, "s1", 1, toolLoopBudget{
|
||||
maxCallsPerIteration: 2,
|
||||
singleCallTimeout: 2 * time.Second,
|
||||
maxActDuration: 2 * time.Second,
|
||||
}, false, nil)
|
||||
|
||||
if atomic.LoadInt32(&maxActive) < 2 {
|
||||
t.Fatalf("expected concurrent execution, maxActive=%d", maxActive)
|
||||
}
|
||||
}
|
||||
|
||||
func TestActToolCalls_ResourceConflictForcesSerial(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var active int32
|
||||
var maxActive int32
|
||||
probe := func() {
|
||||
cur := atomic.AddInt32(&active, 1)
|
||||
for {
|
||||
old := atomic.LoadInt32(&maxActive)
|
||||
if cur <= old || atomic.CompareAndSwapInt32(&maxActive, old, cur) {
|
||||
break
|
||||
}
|
||||
}
|
||||
time.Sleep(35 * time.Millisecond)
|
||||
atomic.AddInt32(&active, -1)
|
||||
}
|
||||
|
||||
yes := true
|
||||
reg := tools.NewToolRegistry()
|
||||
reg.Register(replayTool{
|
||||
name: "read_file",
|
||||
parallelSafe: &yes,
|
||||
resourceKeys: func(args map[string]interface{}) []string { return []string{"fs:/tmp/a"} },
|
||||
run: func(ctx context.Context, args map[string]interface{}) (string, error) {
|
||||
probe()
|
||||
return "ok", nil
|
||||
},
|
||||
})
|
||||
reg.Register(replayTool{
|
||||
name: "memory_search",
|
||||
parallelSafe: &yes,
|
||||
resourceKeys: func(args map[string]interface{}) []string { return []string{"fs:/tmp/a"} },
|
||||
run: func(ctx context.Context, args map[string]interface{}) (string, error) {
|
||||
probe()
|
||||
return "ok", nil
|
||||
},
|
||||
})
|
||||
|
||||
al := &AgentLoop{
|
||||
tools: reg,
|
||||
sessions: session.NewSessionManager(""),
|
||||
parallelSafeTools: map[string]struct{}{"read_file": {}, "memory_search": {}},
|
||||
maxParallelCalls: 2,
|
||||
}
|
||||
|
||||
msgs := []providers.Message{}
|
||||
calls := []providers.ToolCall{
|
||||
{ID: "1", Name: "read_file", Arguments: map[string]interface{}{}},
|
||||
{ID: "2", Name: "memory_search", Arguments: map[string]interface{}{}},
|
||||
}
|
||||
al.actToolCalls(context.Background(), "", calls, &msgs, "s1", 1, toolLoopBudget{
|
||||
maxCallsPerIteration: 2,
|
||||
singleCallTimeout: 2 * time.Second,
|
||||
maxActDuration: 2 * time.Second,
|
||||
}, false, nil)
|
||||
|
||||
if atomic.LoadInt32(&maxActive) > 1 {
|
||||
t.Fatalf("expected serial execution on same resource key, maxActive=%d", maxActive)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadToolParallelPolicyFromConfig(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
allowed, maxCalls := loadToolParallelPolicyFromConfig(config.RuntimeControlConfig{
|
||||
ToolParallelSafeNames: []string{"Read_File", "memory_search"},
|
||||
ToolMaxParallelCalls: 3,
|
||||
})
|
||||
if maxCalls != 3 {
|
||||
t.Fatalf("unexpected max calls: %d", maxCalls)
|
||||
}
|
||||
if _, ok := allowed["read_file"]; !ok {
|
||||
t.Fatalf("expected normalized read_file in allowed set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldRunFinalizePolish(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
short := "done"
|
||||
if shouldRunFinalizePolish(short) {
|
||||
t.Fatalf("short draft should skip polish")
|
||||
}
|
||||
|
||||
longButFlat := strings.Repeat("a", finalizeDraftMinCharsForPolish+10)
|
||||
if shouldRunFinalizePolish(longButFlat) {
|
||||
t.Fatalf("flat draft should skip polish")
|
||||
}
|
||||
|
||||
longStructured := "1. Step one: check environment variables and baseline configs.\n2. Step two: apply fix and rerun validations.\nNext: verify rollout and provide follow-up actions."
|
||||
if !shouldRunFinalizePolish(longStructured) {
|
||||
t.Fatalf("structured draft should trigger polish")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLocalFinalizeDraftQualityScore(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
high := localFinalizeDraftQualityScore("1. Step one: inspect environment.\n2. Step two: apply fix.\nNext steps: validate rollout and summarize conclusions.")
|
||||
low := localFinalizeDraftQualityScore("todo\ntodo\ntodo")
|
||||
if high <= low {
|
||||
t.Fatalf("expected high-quality score > low-quality score, got %.2f <= %.2f", high, low)
|
||||
}
|
||||
if high < 0.30 {
|
||||
t.Fatalf("unexpectedly low high-quality score: %.2f", high)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClamp01(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
if got := clamp01(-0.1); got != 0 {
|
||||
t.Fatalf("expected 0, got %v", got)
|
||||
}
|
||||
if got := clamp01(1.2); got != 1 {
|
||||
t.Fatalf("expected 1, got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInferLocalReflectionSignal(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
blocked := inferLocalReflectionSignal([]providers.Message{
|
||||
{Role: "tool", Content: "Error: permission denied"},
|
||||
{Role: "tool", Content: "Error: permission denied"},
|
||||
})
|
||||
if blocked.decision != "blocked" || blocked.uncertain {
|
||||
t.Fatalf("expected blocked deterministic signal, got %+v", blocked)
|
||||
}
|
||||
|
||||
done := inferLocalReflectionSignal([]providers.Message{
|
||||
{Role: "tool", Content: "success: completed ok"},
|
||||
})
|
||||
if done.decision != "done" || done.uncertain {
|
||||
t.Fatalf("expected done deterministic signal, got %+v", done)
|
||||
}
|
||||
|
||||
unknown := inferLocalReflectionSignal([]providers.Message{
|
||||
{Role: "tool", Content: "partial result"},
|
||||
})
|
||||
if unknown.decision != "continue" || !unknown.uncertain {
|
||||
t.Fatalf("expected uncertain continue signal, got %+v", unknown)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldForceSelfRepairHeuristic(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
needs, prompt := shouldForceSelfRepairHeuristic("Please provide steps to fix this", "It should work.")
|
||||
if !needs || strings.TrimSpace(prompt) == "" {
|
||||
t.Fatalf("expected self-repair for missing structured steps")
|
||||
}
|
||||
|
||||
needs, _ = shouldForceSelfRepairHeuristic("summarize logs", "Here is summary.")
|
||||
if needs {
|
||||
t.Fatalf("did not expect repair for normal concise response")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelfRepairMemoryPromptDedup(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
mem := selfRepairMemory{
|
||||
promptsUsed: map[string]struct{}{
|
||||
normalizeRepairPrompt("Provide structured step-by-step answer."): {},
|
||||
},
|
||||
}
|
||||
if !promptSeen(mem, "provide structured step-by-step answer.") {
|
||||
t.Fatalf("expected prompt to be detected as already used")
|
||||
}
|
||||
if promptSeen(mem, "different prompt") {
|
||||
t.Fatalf("did not expect unrelated prompt to be marked used")
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,10 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
@@ -35,6 +38,31 @@ type AgentDefaults struct {
|
||||
Temperature float64 `json:"temperature" env:"CLAWGO_AGENTS_DEFAULTS_TEMPERATURE"`
|
||||
MaxToolIterations int `json:"max_tool_iterations" env:"CLAWGO_AGENTS_DEFAULTS_MAX_TOOL_ITERATIONS"`
|
||||
ContextCompaction ContextCompactionConfig `json:"context_compaction"`
|
||||
RuntimeControl RuntimeControlConfig `json:"runtime_control"`
|
||||
}
|
||||
|
||||
type RuntimeControlConfig struct {
|
||||
IntentHighConfidence float64 `json:"intent_high_confidence" env:"CLAWGO_INTENT_HIGH_CONFIDENCE"`
|
||||
IntentConfirmMinConfidence float64 `json:"intent_confirm_min_confidence" env:"CLAWGO_INTENT_CONFIRM_MIN_CONFIDENCE"`
|
||||
IntentMaxInputChars int `json:"intent_max_input_chars" env:"CLAWGO_INTENT_MAX_INPUT_CHARS"`
|
||||
ConfirmTTLSeconds int `json:"confirm_ttl_seconds" env:"CLAWGO_CONFIRM_TTL_SECONDS"`
|
||||
ConfirmMaxClarificationTurns int `json:"confirm_max_clarification_turns" env:"CLAWGO_CONFIRM_MAX_CLARIFY_TURNS"`
|
||||
AutonomyTickIntervalSec int `json:"autonomy_tick_interval_sec" env:"CLAWGO_AUTONOMY_TICK_INTERVAL_SEC"`
|
||||
AutonomyMinRunIntervalSec int `json:"autonomy_min_run_interval_sec" env:"CLAWGO_AUTONOMY_MIN_RUN_INTERVAL_SEC"`
|
||||
AutonomyIdleThresholdSec int `json:"autonomy_idle_threshold_sec" env:"CLAWGO_AUTONOMY_IDLE_THRESHOLD_SEC"`
|
||||
AutonomyMaxRoundsWithoutUser int `json:"autonomy_max_rounds_without_user" env:"CLAWGO_AUTONOMY_MAX_ROUNDS_WITHOUT_USER"`
|
||||
AutonomyMaxPendingDurationSec int `json:"autonomy_max_pending_duration_sec" env:"CLAWGO_AUTONOMY_MAX_PENDING_DURATION_SEC"`
|
||||
AutonomyMaxConsecutiveStalls int `json:"autonomy_max_consecutive_stalls" env:"CLAWGO_AUTONOMY_MAX_STALLS"`
|
||||
AutoLearnMaxRoundsWithoutUser int `json:"autolearn_max_rounds_without_user" env:"CLAWGO_AUTOLEARN_MAX_ROUNDS_WITHOUT_USER"`
|
||||
RunStateTTLSeconds int `json:"run_state_ttl_seconds" env:"CLAWGO_RUN_STATE_TTL_SECONDS"`
|
||||
RunStateMax int `json:"run_state_max" env:"CLAWGO_RUN_STATE_MAX"`
|
||||
RunControlLatestKeywords []string `json:"run_control_latest_keywords"`
|
||||
RunControlWaitKeywords []string `json:"run_control_wait_keywords"`
|
||||
RunControlStatusKeywords []string `json:"run_control_status_keywords"`
|
||||
RunControlRunMentionKeywords []string `json:"run_control_run_mention_keywords"`
|
||||
RunControlMinuteUnits []string `json:"run_control_minute_units"`
|
||||
ToolParallelSafeNames []string `json:"tool_parallel_safe_names"`
|
||||
ToolMaxParallelCalls int `json:"tool_max_parallel_calls"`
|
||||
}
|
||||
|
||||
type ContextCompactionConfig struct {
|
||||
@@ -246,6 +274,29 @@ func DefaultConfig() *Config {
|
||||
MaxSummaryChars: 6000,
|
||||
MaxTranscriptChars: 20000,
|
||||
},
|
||||
RuntimeControl: RuntimeControlConfig{
|
||||
IntentHighConfidence: 0.75,
|
||||
IntentConfirmMinConfidence: 0.45,
|
||||
IntentMaxInputChars: 1200,
|
||||
ConfirmTTLSeconds: 300,
|
||||
ConfirmMaxClarificationTurns: 2,
|
||||
AutonomyTickIntervalSec: 20,
|
||||
AutonomyMinRunIntervalSec: 20,
|
||||
AutonomyIdleThresholdSec: 20,
|
||||
AutonomyMaxRoundsWithoutUser: 120,
|
||||
AutonomyMaxPendingDurationSec: 180,
|
||||
AutonomyMaxConsecutiveStalls: 3,
|
||||
AutoLearnMaxRoundsWithoutUser: 200,
|
||||
RunStateTTLSeconds: 1800,
|
||||
RunStateMax: 500,
|
||||
RunControlLatestKeywords: []string{"latest", "last run", "recent run", "最新", "最近", "上一次", "上个"},
|
||||
RunControlWaitKeywords: []string{"wait", "等待", "等到", "阻塞"},
|
||||
RunControlStatusKeywords: []string{"status", "状态", "进度", "running", "运行"},
|
||||
RunControlRunMentionKeywords: []string{"run", "任务"},
|
||||
RunControlMinuteUnits: []string{"分钟", "min", "mins", "minute", "minutes", "m"},
|
||||
ToolParallelSafeNames: []string{"read_file", "list_files", "find_files", "grep_files", "memory_search", "web_search", "repo_map", "system_info"},
|
||||
ToolMaxParallelCalls: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
Channels: ChannelsConfig{
|
||||
@@ -382,7 +433,7 @@ func LoadConfig(path string) (*Config, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(data, cfg); err != nil {
|
||||
if err := unmarshalConfigStrict(data, cfg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -393,6 +444,22 @@ func LoadConfig(path string) (*Config, error) {
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func unmarshalConfigStrict(data []byte, cfg *Config) error {
|
||||
dec := json.NewDecoder(bytes.NewReader(data))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
var extra json.RawMessage
|
||||
if err := dec.Decode(&extra); err != io.EOF {
|
||||
if err == nil {
|
||||
return fmt.Errorf("invalid config: trailing JSON content")
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func SaveConfig(path string, cfg *Config) error {
|
||||
cfg.mu.RLock()
|
||||
defer cfg.mu.RUnlock()
|
||||
|
||||
95
pkg/config/config_test.go
Normal file
95
pkg/config/config_test.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLoadConfigRejectsUnknownField(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "config.json")
|
||||
content := `{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"runtime_control": {
|
||||
"intent_high_confidence": 0.8,
|
||||
"unknown_field": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}`
|
||||
if err := os.WriteFile(cfgPath, []byte(content), 0o644); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
|
||||
_, err := LoadConfig(cfgPath)
|
||||
if err == nil {
|
||||
t.Fatalf("expected unknown field error")
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(err.Error()), "unknown field") {
|
||||
t.Fatalf("expected unknown field error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfigRejectsTrailingJSONContent(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "config.json")
|
||||
content := `{"agents":{"defaults":{"runtime_control":{"intent_high_confidence":0.8}}}}{"extra":true}`
|
||||
if err := os.WriteFile(cfgPath, []byte(content), 0o644); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
|
||||
_, err := LoadConfig(cfgPath)
|
||||
if err == nil {
|
||||
t.Fatalf("expected trailing json content error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "trailing JSON content") {
|
||||
t.Fatalf("expected trailing JSON content error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfigAllowsKnownRuntimeControlFields(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "config.json")
|
||||
content := `{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"runtime_control": {
|
||||
"intent_high_confidence": 0.88,
|
||||
"run_state_max": 321,
|
||||
"run_control_wait_keywords": ["wait", "block"],
|
||||
"tool_parallel_safe_names": ["read_file", "memory_search"],
|
||||
"tool_max_parallel_calls": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
}`
|
||||
if err := os.WriteFile(cfgPath, []byte(content), 0o644); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
|
||||
cfg, err := LoadConfig(cfgPath)
|
||||
if err != nil {
|
||||
t.Fatalf("load config: %v", err)
|
||||
}
|
||||
if got := cfg.Agents.Defaults.RuntimeControl.IntentHighConfidence; got != 0.88 {
|
||||
t.Fatalf("intent_high_confidence mismatch: got %.2f", got)
|
||||
}
|
||||
if got := cfg.Agents.Defaults.RuntimeControl.RunStateMax; got != 321 {
|
||||
t.Fatalf("run_state_max mismatch: got %d", got)
|
||||
}
|
||||
if got := len(cfg.Agents.Defaults.RuntimeControl.RunControlWaitKeywords); got != 2 {
|
||||
t.Fatalf("run_control_wait_keywords mismatch: got %d", got)
|
||||
}
|
||||
if got := cfg.Agents.Defaults.RuntimeControl.ToolMaxParallelCalls; got != 3 {
|
||||
t.Fatalf("tool_max_parallel_calls mismatch: got %d", got)
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,58 @@ func Validate(cfg *Config) []error {
|
||||
if cfg.Agents.Defaults.MaxToolIterations <= 0 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.max_tool_iterations must be > 0"))
|
||||
}
|
||||
rc := cfg.Agents.Defaults.RuntimeControl
|
||||
if rc.IntentHighConfidence <= 0 || rc.IntentHighConfidence > 1 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.intent_high_confidence must be in (0,1]"))
|
||||
}
|
||||
if rc.IntentConfirmMinConfidence < 0 || rc.IntentConfirmMinConfidence >= rc.IntentHighConfidence {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.intent_confirm_min_confidence must be >= 0 and < intent_high_confidence"))
|
||||
}
|
||||
if rc.IntentMaxInputChars < 200 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.intent_max_input_chars must be >= 200"))
|
||||
}
|
||||
if rc.ConfirmTTLSeconds <= 0 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.confirm_ttl_seconds must be > 0"))
|
||||
}
|
||||
if rc.ConfirmMaxClarificationTurns < 0 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.confirm_max_clarification_turns must be >= 0"))
|
||||
}
|
||||
if rc.AutonomyTickIntervalSec < 5 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.autonomy_tick_interval_sec must be >= 5"))
|
||||
}
|
||||
if rc.AutonomyMinRunIntervalSec < 5 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.autonomy_min_run_interval_sec must be >= 5"))
|
||||
}
|
||||
if rc.AutonomyIdleThresholdSec < 5 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.autonomy_idle_threshold_sec must be >= 5"))
|
||||
}
|
||||
if rc.AutonomyMaxRoundsWithoutUser <= 0 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.autonomy_max_rounds_without_user must be > 0"))
|
||||
}
|
||||
if rc.AutonomyMaxPendingDurationSec < 10 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.autonomy_max_pending_duration_sec must be >= 10"))
|
||||
}
|
||||
if rc.AutonomyMaxConsecutiveStalls <= 0 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.autonomy_max_consecutive_stalls must be > 0"))
|
||||
}
|
||||
if rc.AutoLearnMaxRoundsWithoutUser <= 0 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.autolearn_max_rounds_without_user must be > 0"))
|
||||
}
|
||||
if rc.RunStateTTLSeconds < 60 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.run_state_ttl_seconds must be >= 60"))
|
||||
}
|
||||
if rc.RunStateMax <= 0 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.run_state_max must be > 0"))
|
||||
}
|
||||
errs = append(errs, validateNonEmptyStringList("agents.defaults.runtime_control.run_control_latest_keywords", rc.RunControlLatestKeywords)...)
|
||||
errs = append(errs, validateNonEmptyStringList("agents.defaults.runtime_control.run_control_wait_keywords", rc.RunControlWaitKeywords)...)
|
||||
errs = append(errs, validateNonEmptyStringList("agents.defaults.runtime_control.run_control_status_keywords", rc.RunControlStatusKeywords)...)
|
||||
errs = append(errs, validateNonEmptyStringList("agents.defaults.runtime_control.run_control_run_mention_keywords", rc.RunControlRunMentionKeywords)...)
|
||||
errs = append(errs, validateNonEmptyStringList("agents.defaults.runtime_control.run_control_minute_units", rc.RunControlMinuteUnits)...)
|
||||
errs = append(errs, validateNonEmptyStringList("agents.defaults.runtime_control.tool_parallel_safe_names", rc.ToolParallelSafeNames)...)
|
||||
if rc.ToolMaxParallelCalls <= 0 {
|
||||
errs = append(errs, fmt.Errorf("agents.defaults.runtime_control.tool_max_parallel_calls must be > 0"))
|
||||
}
|
||||
if cfg.Agents.Defaults.ContextCompaction.Enabled {
|
||||
cc := cfg.Agents.Defaults.ContextCompaction
|
||||
if cc.Mode != "" {
|
||||
@@ -199,3 +251,16 @@ func providerConfigByName(cfg *Config, name string) (ProviderConfig, bool) {
|
||||
pc, ok := cfg.Providers.Proxies[name]
|
||||
return pc, ok
|
||||
}
|
||||
|
||||
func validateNonEmptyStringList(path string, values []string) []error {
|
||||
if len(values) == 0 {
|
||||
return nil
|
||||
}
|
||||
var errs []error
|
||||
for i, value := range values {
|
||||
if strings.TrimSpace(value) == "" {
|
||||
errs = append(errs, fmt.Errorf("%s[%d] must not be empty", path, i))
|
||||
}
|
||||
}
|
||||
return errs
|
||||
}
|
||||
|
||||
@@ -9,6 +9,20 @@ type Tool interface {
|
||||
Execute(ctx context.Context, args map[string]interface{}) (string, error)
|
||||
}
|
||||
|
||||
// ParallelSafeTool is an optional capability interface.
|
||||
// If implemented by a tool, AgentLoop should trust this declaration
|
||||
// over name-based whitelist when deciding parallel execution safety.
|
||||
type ParallelSafeTool interface {
|
||||
ParallelSafe() bool
|
||||
}
|
||||
|
||||
// ResourceScopedTool is an optional capability interface.
|
||||
// If implemented by a tool, AgentLoop can avoid running calls that touch
|
||||
// the same resource keys in parallel.
|
||||
type ResourceScopedTool interface {
|
||||
ResourceKeys(args map[string]interface{}) []string
|
||||
}
|
||||
|
||||
func ToolToSchema(tool Tool) map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"type": "function",
|
||||
|
||||
Reference in New Issue
Block a user