diff --git a/clawgo_new b/clawgo_new new file mode 100755 index 0000000..157826d Binary files /dev/null and b/clawgo_new differ diff --git a/pkg/channels/base.go b/pkg/channels/base.go index 54a4d6d..2e83e99 100644 --- a/pkg/channels/base.go +++ b/pkg/channels/base.go @@ -17,6 +17,7 @@ type Channel interface { Send(ctx context.Context, msg bus.OutboundMessage) error IsRunning() bool IsAllowed(senderID string) bool + HealthCheck(ctx context.Context) error } type BaseChannel struct { diff --git a/pkg/channels/manager.go b/pkg/channels/manager.go index f602f92..81ba7e0 100644 --- a/pkg/channels/manager.go +++ b/pkg/channels/manager.go @@ -217,6 +217,31 @@ func (m *Manager) StopAll(ctx context.Context) error { return nil } +func (m *Manager) CheckHealth(ctx context.Context) map[string]error { + m.mu.RLock() + defer m.mu.RUnlock() + + results := make(map[string]error) + for name, channel := range m.channels { + results[name] = channel.HealthCheck(ctx) + } + return results +} + +func (m *Manager) RestartChannel(ctx context.Context, name string) error { + m.mu.Lock() + defer m.mu.Unlock() + + channel, ok := m.channels[name] + if !ok { + return fmt.Errorf("channel %s not found", name) + } + + logger.InfoCF("channels", "Restarting channel", map[string]interface{}{"channel": name}) + _ = channel.Stop(ctx) + return channel.Start(ctx) +} + func (m *Manager) dispatchOutbound(ctx context.Context) { logger.InfoC("channels", "Outbound dispatcher started") @@ -271,10 +296,7 @@ func (m *Manager) GetStatus() map[string]interface{} { status := make(map[string]interface{}) for name, channel := range m.channels { - status[name] = map[string]interface{}{ - "enabled": true, - "running": channel.IsRunning(), - } + status[name] = map[string]interface{}{} } return status } diff --git a/pkg/channels/telegram.go b/pkg/channels/telegram.go index f0e4cb1..4e0b187 100644 --- a/pkg/channels/telegram.go +++ b/pkg/channels/telegram.go @@ -74,6 +74,16 @@ func (c *TelegramChannel) SetTranscriber(transcriber *voice.GroqTranscriber) { c.transcriber = transcriber } +func (c *TelegramChannel) HealthCheck(ctx context.Context) error { + if !c.IsRunning() { + return fmt.Errorf("telegram bot not running") + } + hCtx, cancel := withTelegramAPITimeout(ctx) + defer cancel() + _, err := c.bot.GetMe(hCtx) + return err +} + func (c *TelegramChannel) Start(ctx context.Context) error { if c.IsRunning() { return nil diff --git a/pkg/sentinel/service.go b/pkg/sentinel/service.go index 77cb04a..e438e04 100644 --- a/pkg/sentinel/service.go +++ b/pkg/sentinel/service.go @@ -1,12 +1,14 @@ package sentinel import ( + "context" "fmt" "os" "path/filepath" "sync" "time" + "clawgo/pkg/channels" "clawgo/pkg/config" "clawgo/pkg/lifecycle" "clawgo/pkg/logger" @@ -23,6 +25,7 @@ type Service struct { runner *lifecycle.LoopRunner mu sync.RWMutex lastAlerts map[string]time.Time + mgr *channels.Manager } func NewService(cfgPath, workspace string, intervalSec int, autoHeal bool, onAlert AlertFunc) *Service { @@ -40,6 +43,10 @@ func NewService(cfgPath, workspace string, intervalSec int, autoHeal bool, onAle } } +func (s *Service) SetManager(mgr *channels.Manager) { + s.mgr = mgr +} + func (s *Service) Start() { if !s.runner.Start(s.loop) { return @@ -76,6 +83,7 @@ func (s *Service) runChecks() { issues := s.checkConfig() issues = append(issues, s.checkMemory()...) issues = append(issues, s.checkLogs()...) + issues = append(issues, s.checkChannels()...) if len(issues) == 0 { return @@ -86,6 +94,34 @@ func (s *Service) runChecks() { } } +func (s *Service) checkChannels() []string { + if s.mgr == nil { + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + health := s.mgr.CheckHealth(ctx) + var issues []string + for name, err := range health { + if err != nil { + msg := fmt.Sprintf("sentinel: channel %s health check failed: %v", name, err) + issues = append(issues, msg) + if s.autoHeal { + go func(n string) { + logger.InfoCF("sentinel", "Attempting auto-heal for channel", map[string]interface{}{"channel": n}) + if rErr := s.mgr.RestartChannel(context.Background(), n); rErr != nil { + logger.ErrorCF("sentinel", "Auto-heal restart failed", map[string]interface{}{"channel": n, "error": rErr.Error()}) + } else { + logger.InfoCF("sentinel", "Auto-heal successful", map[string]interface{}{"channel": n}) + } + }(name) + } + } + } + return issues +} + func (s *Service) checkConfig() []string { _, err := os.Stat(s.cfgPath) if err != nil {