feat: enhance sentinel and add button support for telegram

This commit is contained in:
root
2026-02-14 17:00:21 +00:00
parent 1ac0fb123b
commit 4ed24b57e3
4 changed files with 63 additions and 4 deletions

View File

@@ -25,7 +25,8 @@ type Service struct {
runner *lifecycle.LoopRunner
mu sync.RWMutex
lastAlerts map[string]time.Time
mgr *channels.Manager
mgr *channels.Manager
healingChannels map[string]bool
}
func NewService(cfgPath, workspace string, intervalSec int, autoHeal bool, onAlert AlertFunc) *Service {
@@ -40,6 +41,7 @@ func NewService(cfgPath, workspace string, intervalSec int, autoHeal bool, onAle
onAlert: onAlert,
runner: lifecycle.NewLoopRunner(),
lastAlerts: map[string]time.Time{},
healingChannels: map[string]bool{},
}
}
@@ -108,8 +110,22 @@ func (s *Service) checkChannels() []string {
msg := fmt.Sprintf("sentinel: channel %s health check failed: %v", name, err)
issues = append(issues, msg)
if s.autoHeal {
s.mu.Lock()
if s.healingChannels[name] {
s.mu.Unlock()
continue
}
s.healingChannels[name] = true
s.mu.Unlock()
go func(n string) {
defer func() {
s.mu.Lock()
delete(s.healingChannels, n)
s.mu.Unlock()
}()
logger.InfoCF("sentinel", "Attempting auto-heal for channel", map[string]interface{}{"channel": n})
// Use a fresh context for restart to avoid being canceled by sentinel loop
if rErr := s.mgr.RestartChannel(context.Background(), n); rErr != nil {
logger.ErrorCF("sentinel", "Auto-heal restart failed", map[string]interface{}{"channel": n, "error": rErr.Error()})
} else {