feat: implement channel healthcheck and sentinel auto-healing

This commit is contained in:
root
2026-02-14 16:20:14 +00:00
parent f56005246d
commit 1e4bf34fac
5 changed files with 73 additions and 4 deletions

View File

@@ -1,12 +1,14 @@
package sentinel
import (
"context"
"fmt"
"os"
"path/filepath"
"sync"
"time"
"clawgo/pkg/channels"
"clawgo/pkg/config"
"clawgo/pkg/lifecycle"
"clawgo/pkg/logger"
@@ -23,6 +25,7 @@ type Service struct {
runner *lifecycle.LoopRunner
mu sync.RWMutex
lastAlerts map[string]time.Time
mgr *channels.Manager
}
func NewService(cfgPath, workspace string, intervalSec int, autoHeal bool, onAlert AlertFunc) *Service {
@@ -40,6 +43,10 @@ func NewService(cfgPath, workspace string, intervalSec int, autoHeal bool, onAle
}
}
func (s *Service) SetManager(mgr *channels.Manager) {
s.mgr = mgr
}
func (s *Service) Start() {
if !s.runner.Start(s.loop) {
return
@@ -76,6 +83,7 @@ func (s *Service) runChecks() {
issues := s.checkConfig()
issues = append(issues, s.checkMemory()...)
issues = append(issues, s.checkLogs()...)
issues = append(issues, s.checkChannels()...)
if len(issues) == 0 {
return
@@ -86,6 +94,34 @@ func (s *Service) runChecks() {
}
}
func (s *Service) checkChannels() []string {
if s.mgr == nil {
return nil
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
health := s.mgr.CheckHealth(ctx)
var issues []string
for name, err := range health {
if err != nil {
msg := fmt.Sprintf("sentinel: channel %s health check failed: %v", name, err)
issues = append(issues, msg)
if s.autoHeal {
go func(n string) {
logger.InfoCF("sentinel", "Attempting auto-heal for channel", map[string]interface{}{"channel": n})
if rErr := s.mgr.RestartChannel(context.Background(), n); rErr != nil {
logger.ErrorCF("sentinel", "Auto-heal restart failed", map[string]interface{}{"channel": n, "error": rErr.Error()})
} else {
logger.InfoCF("sentinel", "Auto-heal successful", map[string]interface{}{"channel": n})
}
}(name)
}
}
}
return issues
}
func (s *Service) checkConfig() []string {
_, err := os.Stat(s.cfgPath)
if err != nil {