track autonomy retry windows and dedupe metrics in task/status views

This commit is contained in:
DBT
2026-02-24 01:36:12 +00:00
parent 6ceb37ef55
commit 4667f7afcf
3 changed files with 68 additions and 27 deletions

View File

@@ -7,6 +7,7 @@ import (
"path/filepath" "path/filepath"
"sort" "sort"
"strings" "strings"
"time"
"clawgo/pkg/config" "clawgo/pkg/config"
"clawgo/pkg/providers" "clawgo/pkg/providers"
@@ -128,8 +129,11 @@ func statusCmd() {
fmt.Printf(" - %s\n", key) fmt.Printf(" - %s\n", key)
} }
} }
if summary, err := collectAutonomyTaskSummary(filepath.Join(workspace, "memory", "tasks.json")); err == nil { if summary, nextRetry, dedupeHits, err := collectAutonomyTaskSummary(filepath.Join(workspace, "memory", "tasks.json")); err == nil {
fmt.Printf("Autonomy Tasks: todo=%d doing=%d blocked=%d done=%d\n", summary["todo"], summary["doing"], summary["blocked"], summary["done"]) fmt.Printf("Autonomy Tasks: todo=%d doing=%d blocked=%d done=%d dedupe_hits=%d\n", summary["todo"], summary["doing"], summary["blocked"], summary["done"], dedupeHits)
if nextRetry != "" {
fmt.Printf("Autonomy Next Retry: %s\n", nextRetry)
}
} }
} }
} }
@@ -244,28 +248,42 @@ func collectTriggerErrorCounts(path string) (map[string]int, error) {
return counts, nil return counts, nil
} }
func collectAutonomyTaskSummary(path string) (map[string]int, error) { func collectAutonomyTaskSummary(path string) (map[string]int, string, int, error) {
data, err := os.ReadFile(path) data, err := os.ReadFile(path)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
return map[string]int{"todo": 0, "doing": 0, "blocked": 0, "done": 0}, nil return map[string]int{"todo": 0, "doing": 0, "blocked": 0, "done": 0}, "", 0, nil
} }
return nil, err return nil, "", 0, err
} }
var items []struct { var items []struct {
Status string `json:"status"` Status string `json:"status"`
RetryAfter string `json:"retry_after"`
DedupeHits int `json:"dedupe_hits"`
} }
if err := json.Unmarshal(data, &items); err != nil { if err := json.Unmarshal(data, &items); err != nil {
return nil, err return nil, "", 0, err
} }
summary := map[string]int{"todo": 0, "doing": 0, "blocked": 0, "done": 0} summary := map[string]int{"todo": 0, "doing": 0, "blocked": 0, "done": 0}
nextRetry := ""
nextRetryAt := time.Time{}
totalDedupe := 0
for _, it := range items { for _, it := range items {
s := strings.ToLower(strings.TrimSpace(it.Status)) s := strings.ToLower(strings.TrimSpace(it.Status))
if _, ok := summary[s]; ok { if _, ok := summary[s]; ok {
summary[s]++ summary[s]++
} }
totalDedupe += it.DedupeHits
if strings.TrimSpace(it.RetryAfter) != "" {
if t, err := time.Parse(time.RFC3339, it.RetryAfter); err == nil {
if nextRetryAt.IsZero() || t.Before(nextRetryAt) {
nextRetryAt = t
nextRetry = t.Format(time.RFC3339)
}
}
}
} }
return summary, nil return summary, nextRetry, totalDedupe, nil
} }
func collectRecentSubagentSessions(sessionsDir string, limit int) ([]string, error) { func collectRecentSubagentSessions(sessionsDir string, limit int) ([]string, error) {

View File

@@ -39,7 +39,9 @@ type taskState struct {
Status string // idle|running|waiting|blocked|completed Status string // idle|running|waiting|blocked|completed
LastRunAt time.Time LastRunAt time.Time
LastAutonomyAt time.Time LastAutonomyAt time.Time
RetryAfter time.Time
ConsecutiveStall int ConsecutiveStall int
DedupeHits int
} }
type Engine struct { type Engine struct {
@@ -125,17 +127,24 @@ func (e *Engine) tick() {
st, ok := e.state[t.ID] st, ok := e.state[t.ID]
if !ok { if !ok {
status := "idle" status := "idle"
retryAfter := time.Time{}
if old, ok := storedMap[t.ID]; ok { if old, ok := storedMap[t.ID]; ok {
if old.Status == "blocked" { if old.Status == "blocked" {
status = "blocked" status = "blocked"
} }
if strings.TrimSpace(old.RetryAfter) != "" {
if rt, err := time.Parse(time.RFC3339, old.RetryAfter); err == nil {
retryAfter = rt
}
}
} }
e.state[t.ID] = &taskState{ID: t.ID, Content: t.Content, Priority: t.Priority, DueAt: t.DueAt, Status: status} e.state[t.ID] = &taskState{ID: t.ID, Content: t.Content, Priority: t.Priority, DueAt: t.DueAt, Status: status, RetryAfter: retryAfter, DedupeHits: t.DedupeHits}
continue continue
} }
st.Content = t.Content st.Content = t.Content
st.Priority = t.Priority st.Priority = t.Priority
st.DueAt = t.DueAt st.DueAt = t.DueAt
st.DedupeHits = t.DedupeHits
if st.Status == "completed" { if st.Status == "completed" {
st.Status = "idle" st.Status = "idle"
} }
@@ -178,6 +187,9 @@ func (e *Engine) tick() {
continue continue
} }
if st.Status == "blocked" { if st.Status == "blocked" {
if !st.RetryAfter.IsZero() && now.Before(st.RetryAfter) {
continue
}
if now.Sub(st.LastRunAt) >= blockedRetryBackoff(st.ConsecutiveStall, e.opts.MinRunIntervalSec) { if now.Sub(st.LastRunAt) >= blockedRetryBackoff(st.ConsecutiveStall, e.opts.MinRunIntervalSec) {
st.Status = "idle" st.Status = "idle"
} else { } else {
@@ -191,6 +203,7 @@ func (e *Engine) tick() {
st.ConsecutiveStall++ st.ConsecutiveStall++
if st.ConsecutiveStall > e.opts.MaxConsecutiveStalls { if st.ConsecutiveStall > e.opts.MaxConsecutiveStalls {
st.Status = "blocked" st.Status = "blocked"
st.RetryAfter = now.Add(blockedRetryBackoff(st.ConsecutiveStall, e.opts.MinRunIntervalSec))
e.sendFailureNotification(st, "max consecutive stalls reached") e.sendFailureNotification(st, "max consecutive stalls reached")
continue continue
} }
@@ -207,10 +220,11 @@ func (e *Engine) tick() {
} }
type todoItem struct { type todoItem struct {
ID string ID string
Content string Content string
Priority string Priority string
DueAt string DueAt string
DedupeHits int
} }
func (e *Engine) scanTodos() []todoItem { func (e *Engine) scanTodos() []todoItem {
@@ -229,6 +243,7 @@ func (e *Engine) scanTodos() []todoItem {
if cur.DueAt == "" && it.DueAt != "" { if cur.DueAt == "" && it.DueAt != "" {
cur.DueAt = it.DueAt cur.DueAt = it.DueAt
} }
cur.DedupeHits++
merged[it.ID] = cur merged[it.ID] = cur
return return
} }
@@ -411,14 +426,20 @@ func (e *Engine) persistStateLocked() {
default: default:
status = "todo" status = "todo"
} }
retryAfter := ""
if !st.RetryAfter.IsZero() {
retryAfter = st.RetryAfter.UTC().Format(time.RFC3339)
}
items = append(items, TaskItem{ items = append(items, TaskItem{
ID: st.ID, ID: st.ID,
Content: st.Content, Content: st.Content,
Priority: st.Priority, Priority: st.Priority,
DueAt: st.DueAt, DueAt: st.DueAt,
Status: status, Status: status,
Source: "memory_todo", RetryAfter: retryAfter,
UpdatedAt: nowRFC3339(), Source: "memory_todo",
DedupeHits: st.DedupeHits,
UpdatedAt: nowRFC3339(),
}) })
} }
_ = e.taskStore.Save(items) _ = e.taskStore.Save(items)

View File

@@ -10,13 +10,15 @@ import (
) )
type TaskItem struct { type TaskItem struct {
ID string `json:"id"` ID string `json:"id"`
Content string `json:"content"` Content string `json:"content"`
Priority string `json:"priority"` Priority string `json:"priority"`
DueAt string `json:"due_at,omitempty"` DueAt string `json:"due_at,omitempty"`
Status string `json:"status"` // todo|doing|blocked|done Status string `json:"status"` // todo|doing|blocked|done
Source string `json:"source"` RetryAfter string `json:"retry_after,omitempty"`
UpdatedAt string `json:"updated_at"` Source string `json:"source"`
DedupeHits int `json:"dedupe_hits,omitempty"`
UpdatedAt string `json:"updated_at"`
} }
type TaskStore struct { type TaskStore struct {