ekg-memory integration: record repeated-error incidents to memory and use memory-linked signatures for earlier escalation

This commit is contained in:
DBT
2026-03-01 06:21:04 +00:00
parent c46c0b9d2d
commit cc48c028ca
4 changed files with 70 additions and 2 deletions

View File

@@ -340,6 +340,7 @@ func (e *Engine) tick() {
st.BlockReason = "repeated_error_signature"
st.RetryAfter = now.Add(5 * time.Minute)
e.enqueueAutoRepairTaskLocked(st, errSig)
e.appendMemoryIncidentLocked(st, errSig, advice.Reason)
e.sendFailureNotification(st, "repeated error signature detected; escalate")
continue
}
@@ -753,6 +754,34 @@ func (e *Engine) enqueueAutoRepairTaskLocked(st *taskState, errSig string) {
e.writeReflectLog("infer", st, "generated auto-repair task due to repeated error signature")
}
func (e *Engine) appendMemoryIncidentLocked(st *taskState, errSig string, reasons []string) {
if st == nil || strings.TrimSpace(e.opts.Workspace) == "" {
return
}
errSig = ekg.NormalizeErrorSignature(errSig)
if errSig == "" {
errSig = "unknown_error_signature"
}
marker := "[EKG_INCIDENT] errsig=" + errSig
line := fmt.Sprintf("- [EKG_INCIDENT] errsig=%s task=%s reason=%s time=%s", errSig, shortTask(st.Content), strings.Join(reasons, ";"), time.Now().UTC().Format(time.RFC3339))
appendIfMissing := func(path string) {
_ = os.MkdirAll(filepath.Dir(path), 0755)
b, _ := os.ReadFile(path)
if strings.Contains(string(b), marker) {
return
}
f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return
}
defer f.Close()
_, _ = f.WriteString(line + "\n")
}
dayPath := filepath.Join(e.opts.Workspace, "memory", time.Now().UTC().Format("2006-01-02")+".md")
appendIfMissing(dayPath)
appendIfMissing(filepath.Join(e.opts.Workspace, "MEMORY.md"))
}
func (e *Engine) sendFailureNotification(st *taskState, reason string) {
e.writeReflectLog("blocked", st, reason)
e.writeTriggerAudit("blocked", st, reason)

View File

@@ -38,14 +38,16 @@ type Advice struct {
}
type Engine struct {
workspace string
path string
recentLines int
consecutiveErrorThreshold int
}
func New(workspace string) *Engine {
p := filepath.Join(strings.TrimSpace(workspace), "memory", "ekg-events.jsonl")
return &Engine{path: p, recentLines: 2000, consecutiveErrorThreshold: 3}
ws := strings.TrimSpace(workspace)
p := filepath.Join(ws, "memory", "ekg-events.jsonl")
return &Engine{workspace: ws, path: p, recentLines: 2000, consecutiveErrorThreshold: 3}
}
func (e *Engine) SetConsecutiveErrorThreshold(v int) {
@@ -127,6 +129,14 @@ func (e *Engine) GetAdvice(ctx SignalContext) Advice {
adv.Reason = append(adv.Reason, "same task and error signature exceeded threshold")
return adv
}
// Memory-linked fast path: if this errsig was documented as incident, escalate one step earlier.
if consecutive >= e.consecutiveErrorThreshold-1 && e.hasMemoryIncident(errSig) {
adv.ShouldEscalate = true
adv.RetryBackoffSec = 300
adv.Reason = append(adv.Reason, "memory_linked_repeated_error_signature")
adv.Reason = append(adv.Reason, "same errsig already recorded in memory incident")
return adv
}
continue
}
// Same signature but success/suppressed encountered: reset chain.
@@ -225,6 +235,33 @@ func (e *Engine) RankProvidersForError(candidates []string, errSig string) []str
return ordered
}
func (e *Engine) hasMemoryIncident(errSig string) bool {
if e == nil || strings.TrimSpace(e.workspace) == "" {
return false
}
errSig = NormalizeErrorSignature(errSig)
if errSig == "" {
return false
}
needle := "[EKG_INCIDENT]"
candidates := []string{
filepath.Join(e.workspace, "MEMORY.md"),
filepath.Join(e.workspace, "memory", time.Now().UTC().Format("2006-01-02")+".md"),
filepath.Join(e.workspace, "memory", time.Now().UTC().AddDate(0, 0, -1).Format("2006-01-02")+".md"),
}
for _, p := range candidates {
b, err := os.ReadFile(p)
if err != nil || len(b) == 0 {
continue
}
txt := strings.ToLower(string(b))
if strings.Contains(txt, strings.ToLower(needle)) && strings.Contains(txt, errSig) {
return true
}
}
return false
}
func NormalizeErrorSignature(s string) string {
s = strings.TrimSpace(strings.ToLower(s))
if s == "" {