mirror of
https://github.com/YspCoder/clawgo.git
synced 2026-05-19 16:27:30 +08:00
optimize channel orchestration with errgroup and rate limiter
This commit is contained in:
@@ -21,11 +21,13 @@
|
|||||||
|
|
||||||
1. **Actor 化关键路径(process)**
|
1. **Actor 化关键路径(process)**
|
||||||
- process 元数据持久化改为异步队列(`persistQ`)串行落盘。
|
- process 元数据持久化改为异步队列(`persistQ`)串行落盘。
|
||||||
|
- channel 启停编排使用 `errgroup.WithContext` 并发+统一取消。
|
||||||
2. **Typed Events 事件总线**
|
2. **Typed Events 事件总线**
|
||||||
- 新增 `pkg/events/typed_bus.go` 泛型事件总线。
|
- 新增 `pkg/events/typed_bus.go` 泛型事件总线。
|
||||||
- process 生命周期事件(start/exit/kill)可发布订阅。
|
- process 生命周期事件(start/exit/kill)可发布订阅。
|
||||||
3. **日志批量刷盘**
|
3. **日志批量刷盘**
|
||||||
- process 日志由 `logWriter` 批量 flush(时间片 + 大小阈值),减少高频 I/O。
|
- process 日志由 `logWriter` 批量 flush(时间片 + 大小阈值),减少高频 I/O。
|
||||||
|
- outbound 分发增加 `rate.Limiter`(令牌桶)平滑突发流量。
|
||||||
4. **Context 分层取消传播**
|
4. **Context 分层取消传播**
|
||||||
- 后台进程改为 `exec.CommandContext`,通过父 `ctx` 统一取消。
|
- 后台进程改为 `exec.CommandContext`,通过父 `ctx` 统一取消。
|
||||||
5. **原子配置快照**
|
5. **原子配置快照**
|
||||||
|
|||||||
@@ -21,11 +21,13 @@ A recent architecture pass leveraged core Go strengths:
|
|||||||
|
|
||||||
1. **Actor-style process path**
|
1. **Actor-style process path**
|
||||||
- Process metadata persistence is serialized via async queue (`persistQ`).
|
- Process metadata persistence is serialized via async queue (`persistQ`).
|
||||||
|
- Channel start/stop orchestration uses `errgroup.WithContext` for concurrent + unified cancellation.
|
||||||
2. **Typed Events bus**
|
2. **Typed Events bus**
|
||||||
- Added generic typed pub/sub bus (`pkg/events/typed_bus.go`).
|
- Added generic typed pub/sub bus (`pkg/events/typed_bus.go`).
|
||||||
- Process lifecycle events (start/exit/kill) are now publishable.
|
- Process lifecycle events (start/exit/kill) are now publishable.
|
||||||
3. **Batched log flushing**
|
3. **Batched log flushing**
|
||||||
- Process logs are flushed by `logWriter` with time/size thresholds to reduce I/O churn.
|
- Process logs are flushed by `logWriter` with time/size thresholds to reduce I/O churn.
|
||||||
|
- Outbound dispatch adds a token-bucket `rate.Limiter` for burst smoothing.
|
||||||
4. **Context hierarchy + cancellation propagation**
|
4. **Context hierarchy + cancellation propagation**
|
||||||
- Background exec now uses `exec.CommandContext` with parent `ctx` propagation.
|
- Background exec now uses `exec.CommandContext` with parent `ctx` propagation.
|
||||||
5. **Atomic runtime config snapshot**
|
5. **Atomic runtime config snapshot**
|
||||||
|
|||||||
3
go.mod
3
go.mod
@@ -12,6 +12,8 @@ require (
|
|||||||
github.com/open-dingtalk/dingtalk-stream-sdk-go v0.9.1
|
github.com/open-dingtalk/dingtalk-stream-sdk-go v0.9.1
|
||||||
github.com/tencent-connect/botgo v0.2.1
|
github.com/tencent-connect/botgo v0.2.1
|
||||||
golang.org/x/oauth2 v0.35.0
|
golang.org/x/oauth2 v0.35.0
|
||||||
|
golang.org/x/sync v0.19.0
|
||||||
|
golang.org/x/time v0.12.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
@@ -36,6 +38,5 @@ require (
|
|||||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
|
||||||
golang.org/x/crypto v0.48.0 // indirect
|
golang.org/x/crypto v0.48.0 // indirect
|
||||||
golang.org/x/net v0.50.0 // indirect
|
golang.org/x/net v0.50.0 // indirect
|
||||||
golang.org/x/sync v0.19.0 // indirect
|
|
||||||
golang.org/x/sys v0.41.0 // indirect
|
golang.org/x/sys v0.41.0 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -15,15 +15,18 @@ import (
|
|||||||
"clawgo/pkg/bus"
|
"clawgo/pkg/bus"
|
||||||
"clawgo/pkg/config"
|
"clawgo/pkg/config"
|
||||||
"clawgo/pkg/logger"
|
"clawgo/pkg/logger"
|
||||||
|
"golang.org/x/sync/errgroup"
|
||||||
|
"golang.org/x/time/rate"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Manager struct {
|
type Manager struct {
|
||||||
channels map[string]Channel
|
channels map[string]Channel
|
||||||
bus *bus.MessageBus
|
bus *bus.MessageBus
|
||||||
config *config.Config
|
config *config.Config
|
||||||
dispatchTask *asyncTask
|
dispatchTask *asyncTask
|
||||||
dispatchSem chan struct{}
|
dispatchSem chan struct{}
|
||||||
mu sync.RWMutex
|
outboundLimit *rate.Limiter
|
||||||
|
mu sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
type asyncTask struct {
|
type asyncTask struct {
|
||||||
@@ -36,7 +39,8 @@ func NewManager(cfg *config.Config, messageBus *bus.MessageBus) (*Manager, error
|
|||||||
bus: messageBus,
|
bus: messageBus,
|
||||||
config: cfg,
|
config: cfg,
|
||||||
// Limit concurrent outbound sends to avoid unbounded goroutine growth.
|
// Limit concurrent outbound sends to avoid unbounded goroutine growth.
|
||||||
dispatchSem: make(chan struct{}, 32),
|
dispatchSem: make(chan struct{}, 32),
|
||||||
|
outboundLimit: rate.NewLimiter(rate.Limit(40), 80),
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := m.initChannels(); err != nil {
|
if err := m.initChannels(); err != nil {
|
||||||
@@ -161,59 +165,73 @@ func (m *Manager) initChannels() error {
|
|||||||
|
|
||||||
func (m *Manager) StartAll(ctx context.Context) error {
|
func (m *Manager) StartAll(ctx context.Context) error {
|
||||||
m.mu.Lock()
|
m.mu.Lock()
|
||||||
defer m.mu.Unlock()
|
|
||||||
|
|
||||||
if len(m.channels) == 0 {
|
if len(m.channels) == 0 {
|
||||||
|
m.mu.Unlock()
|
||||||
logger.WarnC("channels", "No channels enabled")
|
logger.WarnC("channels", "No channels enabled")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
channelsSnapshot := make(map[string]Channel, len(m.channels))
|
||||||
logger.InfoC("channels", "Starting all channels")
|
for k, v := range m.channels {
|
||||||
|
channelsSnapshot[k] = v
|
||||||
|
}
|
||||||
dispatchCtx, cancel := context.WithCancel(ctx)
|
dispatchCtx, cancel := context.WithCancel(ctx)
|
||||||
m.dispatchTask = &asyncTask{cancel: cancel}
|
m.dispatchTask = &asyncTask{cancel: cancel}
|
||||||
|
m.mu.Unlock()
|
||||||
|
|
||||||
|
logger.InfoC("channels", "Starting all channels")
|
||||||
go m.dispatchOutbound(dispatchCtx)
|
go m.dispatchOutbound(dispatchCtx)
|
||||||
|
|
||||||
for name, channel := range m.channels {
|
g, gctx := errgroup.WithContext(ctx)
|
||||||
logger.InfoCF("channels", "Starting channel", map[string]interface{}{
|
for name, channel := range channelsSnapshot {
|
||||||
logger.FieldChannel: name,
|
name := name
|
||||||
|
channel := channel
|
||||||
|
g.Go(func() error {
|
||||||
|
logger.InfoCF("channels", "Starting channel", map[string]interface{}{logger.FieldChannel: name})
|
||||||
|
if err := channel.Start(gctx); err != nil {
|
||||||
|
logger.ErrorCF("channels", "Failed to start channel", map[string]interface{}{logger.FieldChannel: name, logger.FieldError: err.Error()})
|
||||||
|
return fmt.Errorf("%s: %w", name, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
})
|
})
|
||||||
if err := channel.Start(ctx); err != nil {
|
|
||||||
logger.ErrorCF("channels", "Failed to start channel", map[string]interface{}{
|
|
||||||
logger.FieldChannel: name,
|
|
||||||
logger.FieldError: err.Error(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if err := g.Wait(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
logger.InfoC("channels", "All channels started")
|
logger.InfoC("channels", "All channels started")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) StopAll(ctx context.Context) error {
|
func (m *Manager) StopAll(ctx context.Context) error {
|
||||||
m.mu.Lock()
|
m.mu.Lock()
|
||||||
defer m.mu.Unlock()
|
channelsSnapshot := make(map[string]Channel, len(m.channels))
|
||||||
|
for k, v := range m.channels {
|
||||||
|
channelsSnapshot[k] = v
|
||||||
|
}
|
||||||
|
task := m.dispatchTask
|
||||||
|
m.dispatchTask = nil
|
||||||
|
m.mu.Unlock()
|
||||||
|
|
||||||
logger.InfoC("channels", "Stopping all channels")
|
logger.InfoC("channels", "Stopping all channels")
|
||||||
|
if task != nil {
|
||||||
if m.dispatchTask != nil {
|
task.cancel()
|
||||||
m.dispatchTask.cancel()
|
|
||||||
m.dispatchTask = nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, channel := range m.channels {
|
g, gctx := errgroup.WithContext(ctx)
|
||||||
logger.InfoCF("channels", "Stopping channel", map[string]interface{}{
|
for name, channel := range channelsSnapshot {
|
||||||
logger.FieldChannel: name,
|
name := name
|
||||||
|
channel := channel
|
||||||
|
g.Go(func() error {
|
||||||
|
logger.InfoCF("channels", "Stopping channel", map[string]interface{}{logger.FieldChannel: name})
|
||||||
|
if err := channel.Stop(gctx); err != nil {
|
||||||
|
logger.ErrorCF("channels", "Error stopping channel", map[string]interface{}{logger.FieldChannel: name, logger.FieldError: err.Error()})
|
||||||
|
return fmt.Errorf("%s: %w", name, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
})
|
})
|
||||||
if err := channel.Stop(ctx); err != nil {
|
|
||||||
logger.ErrorCF("channels", "Error stopping channel", map[string]interface{}{
|
|
||||||
logger.FieldChannel: name,
|
|
||||||
logger.FieldError: err.Error(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if err := g.Wait(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
logger.InfoC("channels", "All channels stopped")
|
logger.InfoC("channels", "All channels stopped")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -283,6 +301,12 @@ func (m *Manager) dispatchOutbound(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if m.outboundLimit != nil {
|
||||||
|
if err := m.outboundLimit.Wait(ctx); err != nil {
|
||||||
|
logger.WarnCF("channels", "Outbound rate limiter canceled", map[string]interface{}{logger.FieldError: err.Error()})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
// Bound fan-out concurrency to prevent goroutine explosion under burst traffic.
|
// Bound fan-out concurrency to prevent goroutine explosion under burst traffic.
|
||||||
m.dispatchSem <- struct{}{}
|
m.dispatchSem <- struct{}{}
|
||||||
go func(c Channel, outbound bus.OutboundMessage) {
|
go func(c Channel, outbound bus.OutboundMessage) {
|
||||||
|
|||||||
Reference in New Issue
Block a user