feat: harden jsonl runtime reliability

This commit is contained in:
lpf
2026-04-13 13:41:01 +08:00
parent 36890c7ce0
commit fac235db80
34 changed files with 4370 additions and 757 deletions

View File

@@ -1,7 +1,6 @@
package tools
import (
"bufio"
"encoding/json"
"fmt"
"os"
@@ -10,6 +9,9 @@ import (
"strconv"
"strings"
"sync"
"time"
"github.com/YspCoder/clawgo/pkg/jsonlog"
)
type AgentThread struct {
@@ -36,15 +38,39 @@ type AgentMessage struct {
CreatedAt int64 `json:"created_at"`
}
type mailboxThreadsMetaFile struct {
Version int `json:"version"`
LastOffset int64 `json:"last_offset,omitempty"`
ThreadSeq int `json:"thread_seq,omitempty"`
UpdatedAt int64 `json:"updated_at,omitempty"`
Threads []AgentThread `json:"threads,omitempty"`
}
type mailboxMessagesMetaFile struct {
Version int `json:"version"`
LastOffset int64 `json:"last_offset,omitempty"`
MsgSeq int `json:"msg_seq,omitempty"`
UpdatedAt int64 `json:"updated_at,omitempty"`
Messages []AgentMessage `json:"messages,omitempty"`
ThreadMessages map[string][]string `json:"thread_messages,omitempty"`
QueuedByAgent map[string][]string `json:"queued_by_agent,omitempty"`
QueuedByThreadAgent map[string][]string `json:"queued_by_thread_agent,omitempty"`
}
type AgentMailboxStore struct {
dir string
threadsPath string
msgsPath string
mu sync.RWMutex
threads map[string]*AgentThread
messages map[string]*AgentMessage
msgSeq int
threadSeq int
dir string
threadsPath string
msgsPath string
threadsMetaPath string
msgsMetaPath string
mu sync.RWMutex
threads map[string]*AgentThread
messages map[string]*AgentMessage
threadMessages map[string][]string
queuedByAgent map[string][]string
queuedByThreadAgent map[string][]string
msgSeq int
threadSeq int
}
func NewAgentMailboxStore(workspace string) *AgentMailboxStore {
@@ -54,13 +80,18 @@ func NewAgentMailboxStore(workspace string) *AgentMailboxStore {
}
dir := filepath.Join(workspace, "agents", "runtime")
s := &AgentMailboxStore{
dir: dir,
threadsPath: filepath.Join(dir, "threads.jsonl"),
msgsPath: filepath.Join(dir, "agent_messages.jsonl"),
threads: map[string]*AgentThread{},
messages: map[string]*AgentMessage{},
dir: dir,
threadsPath: filepath.Join(dir, "threads.jsonl"),
msgsPath: filepath.Join(dir, "agent_messages.jsonl"),
threadsMetaPath: filepath.Join(dir, "threads.meta.json"),
msgsMetaPath: filepath.Join(dir, "agent_messages.meta.json"),
threads: map[string]*AgentThread{},
messages: map[string]*AgentMessage{},
threadMessages: map[string][]string{},
queuedByAgent: map[string][]string{},
queuedByThreadAgent: map[string][]string{},
}
_ = os.MkdirAll(dir, 0755)
_ = os.MkdirAll(dir, 0o755)
_ = s.load()
return s
}
@@ -68,75 +99,91 @@ func NewAgentMailboxStore(workspace string) *AgentMailboxStore {
func (s *AgentMailboxStore) load() error {
s.mu.Lock()
defer s.mu.Unlock()
s.threads = map[string]*AgentThread{}
s.messages = map[string]*AgentMessage{}
s.resetLocked()
if err := s.loadThreadsLocked(); err != nil {
return err
}
return s.scanMessagesLocked()
return s.loadMessagesLocked()
}
func (s *AgentMailboxStore) resetLocked() {
s.threads = map[string]*AgentThread{}
s.messages = map[string]*AgentMessage{}
s.threadMessages = map[string][]string{}
s.queuedByAgent = map[string][]string{}
s.queuedByThreadAgent = map[string][]string{}
s.msgSeq = 0
s.threadSeq = 0
}
func (s *AgentMailboxStore) loadThreadsLocked() error {
f, err := os.Open(s.threadsPath)
size, err := jsonlog.FileSize(s.threadsPath)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
defer f.Close()
scanner := bufio.NewScanner(f)
buf := make([]byte, 0, 64*1024)
scanner.Buffer(buf, 2*1024*1024)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
var meta mailboxThreadsMetaFile
if err := jsonlog.ReadJSON(s.threadsMetaPath, &meta); err == nil && meta.Version > 0 && meta.LastOffset == size {
for _, thread := range meta.Threads {
cp := thread
cp.Participants = append([]string(nil), thread.Participants...)
s.threads[cp.ThreadID] = &cp
}
s.threadSeq = meta.ThreadSeq
return nil
}
if size == 0 {
return s.persistThreadsMetaLocked(size)
}
if err := jsonlog.Scan(s.threadsPath, func(line []byte) error {
var thread AgentThread
if err := json.Unmarshal([]byte(line), &thread); err != nil {
continue
if err := json.Unmarshal(line, &thread); err != nil || strings.TrimSpace(thread.ThreadID) == "" {
return nil
}
cp := thread
cp.Participants = append([]string(nil), thread.Participants...)
s.threads[thread.ThreadID] = &cp
if n := parseThreadSequence(thread.ThreadID); n > s.threadSeq {
s.threadSeq = n
}
}
return scanner.Err()
}
func (s *AgentMailboxStore) scanMessagesLocked() error {
f, err := os.Open(s.msgsPath)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return nil
}); err != nil {
return err
}
defer f.Close()
scanner := bufio.NewScanner(f)
buf := make([]byte, 0, 64*1024)
scanner.Buffer(buf, 2*1024*1024)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
var msg AgentMessage
if err := json.Unmarshal([]byte(line), &msg); err != nil {
continue
}
if n := parseMessageSequence(msg.MessageID); n > s.msgSeq {
s.msgSeq = n
}
cp := msg
s.messages[msg.MessageID] = &cp
if thread := s.threads[msg.ThreadID]; thread != nil && msg.CreatedAt > thread.UpdatedAt {
thread.UpdatedAt = msg.CreatedAt
}
return s.persistThreadsMetaLocked(size)
}
func (s *AgentMailboxStore) loadMessagesLocked() error {
size, err := jsonlog.FileSize(s.msgsPath)
if err != nil {
return err
}
return scanner.Err()
var meta mailboxMessagesMetaFile
if err := jsonlog.ReadJSON(s.msgsMetaPath, &meta); err == nil && meta.Version > 0 && meta.LastOffset == size {
for _, msg := range meta.Messages {
cp := msg
s.messages[msg.MessageID] = &cp
}
s.threadMessages = cloneStringSliceMap(meta.ThreadMessages)
s.queuedByAgent = cloneStringSliceMap(meta.QueuedByAgent)
s.queuedByThreadAgent = cloneStringSliceMap(meta.QueuedByThreadAgent)
s.msgSeq = meta.MsgSeq
s.reconcileThreadsFromMessagesLocked()
return nil
}
if size == 0 {
return s.persistMessagesMetaLocked(size)
}
if err := jsonlog.Scan(s.msgsPath, func(line []byte) error {
var msg AgentMessage
if err := json.Unmarshal(line, &msg); err != nil || strings.TrimSpace(msg.MessageID) == "" {
return nil
}
s.indexMessageLocked(msg)
return nil
}); err != nil {
return err
}
return s.persistMessagesMetaLocked(size)
}
func (s *AgentMailboxStore) EnsureThread(thread AgentThread) (AgentThread, error) {
@@ -145,7 +192,7 @@ func (s *AgentMailboxStore) EnsureThread(thread AgentThread) (AgentThread, error
}
s.mu.Lock()
defer s.mu.Unlock()
if err := os.MkdirAll(s.dir, 0755); err != nil {
if err := os.MkdirAll(s.dir, 0o755); err != nil {
return AgentThread{}, err
}
if strings.TrimSpace(thread.ThreadID) == "" {
@@ -165,20 +212,19 @@ func (s *AgentMailboxStore) EnsureThread(thread AgentThread) (AgentThread, error
if thread.UpdatedAt <= 0 {
thread.UpdatedAt = thread.CreatedAt
}
data, err := json.Marshal(thread)
offset, err := jsonlog.AppendLine(s.threadsPath, thread)
if err != nil {
return AgentThread{}, err
}
f, err := os.OpenFile(s.threadsPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return AgentThread{}, err
}
defer f.Close()
if _, err := f.Write(append(data, '\n')); err != nil {
return AgentThread{}, err
}
cp := thread
cp.Participants = append([]string(nil), thread.Participants...)
s.threads[thread.ThreadID] = &cp
if n := parseThreadSequence(thread.ThreadID); n > s.threadSeq {
s.threadSeq = n
}
if err := s.persistThreadsMetaLocked(offset); err != nil {
return AgentThread{}, err
}
return thread, nil
}
@@ -188,7 +234,7 @@ func (s *AgentMailboxStore) AppendMessage(msg AgentMessage) (AgentMessage, error
}
s.mu.Lock()
defer s.mu.Unlock()
if err := os.MkdirAll(s.dir, 0755); err != nil {
if err := os.MkdirAll(s.dir, 0o755); err != nil {
return AgentMessage{}, err
}
if strings.TrimSpace(msg.MessageID) == "" {
@@ -198,26 +244,17 @@ func (s *AgentMailboxStore) AppendMessage(msg AgentMessage) (AgentMessage, error
if strings.TrimSpace(msg.Status) == "" {
msg.Status = "queued"
}
data, err := json.Marshal(msg)
offset, err := jsonlog.AppendLine(s.msgsPath, msg)
if err != nil {
return AgentMessage{}, err
}
f, err := os.OpenFile(s.msgsPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
s.indexMessageLocked(msg)
if err := s.persistMessagesMetaLocked(offset); err != nil {
return AgentMessage{}, err
}
defer f.Close()
if _, err := f.Write(append(data, '\n')); err != nil {
if err := s.persistThreadsMetaLocked(0); err != nil {
return AgentMessage{}, err
}
if thread := s.threads[msg.ThreadID]; thread != nil {
thread.UpdatedAt = msg.CreatedAt
participants := append([]string(nil), thread.Participants...)
participants = append(participants, msg.FromAgent, msg.ToAgent)
thread.Participants = normalizeStringList(participants)
}
cp := msg
s.messages[msg.MessageID] = &cp
return msg, nil
}
@@ -240,30 +277,21 @@ func (s *AgentMailboxStore) MessagesByThread(threadID string, limit int) ([]Agen
if s == nil {
return nil, nil
}
return s.currentMessages(func(msg AgentMessage) bool {
return msg.ThreadID == strings.TrimSpace(threadID)
}, limit), nil
return s.currentIndexedMessages(s.threadMessages[strings.TrimSpace(threadID)], limit), nil
}
func (s *AgentMailboxStore) Inbox(agentID string, limit int) ([]AgentMessage, error) {
if s == nil {
return nil, nil
}
agentID = strings.TrimSpace(agentID)
return s.currentMessages(func(msg AgentMessage) bool {
return msg.ToAgent == agentID && strings.EqualFold(strings.TrimSpace(msg.Status), "queued")
}, limit), nil
return s.currentIndexedMessages(s.queuedByAgent[strings.TrimSpace(agentID)], limit), nil
}
func (s *AgentMailboxStore) ThreadInbox(threadID, agentID string, limit int) ([]AgentMessage, error) {
if s == nil {
return nil, nil
}
threadID = strings.TrimSpace(threadID)
agentID = strings.TrimSpace(agentID)
return s.currentMessages(func(msg AgentMessage) bool {
return msg.ThreadID == threadID && msg.ToAgent == agentID && strings.EqualFold(strings.TrimSpace(msg.Status), "queued")
}, limit), nil
return s.currentIndexedMessages(s.queuedByThreadAgent[mailboxThreadAgentKey(threadID, agentID)], limit), nil
}
func (s *AgentMailboxStore) Message(messageID string) (*AgentMessage, bool) {
@@ -305,16 +333,16 @@ func (s *AgentMailboxStore) UpdateMessageStatus(messageID, status string, at int
return &msg, nil
}
func (s *AgentMailboxStore) currentMessages(match func(AgentMessage) bool, limit int) []AgentMessage {
func (s *AgentMailboxStore) currentIndexedMessages(ids []string, limit int) []AgentMessage {
s.mu.RLock()
defer s.mu.RUnlock()
var out []AgentMessage
for _, item := range s.messages {
msg := *item
if match != nil && !match(msg) {
out := make([]AgentMessage, 0, len(ids))
for _, id := range ids {
msg := s.messages[id]
if msg == nil {
continue
}
out = append(out, msg)
out = append(out, *msg)
}
sort.Slice(out, func(i, j int) bool {
if out[i].CreatedAt != out[j].CreatedAt {
@@ -328,6 +356,136 @@ func (s *AgentMailboxStore) currentMessages(match func(AgentMessage) bool, limit
return out
}
func (s *AgentMailboxStore) indexMessageLocked(msg AgentMessage) {
msg.MessageID = strings.TrimSpace(msg.MessageID)
if msg.MessageID == "" {
return
}
if n := parseMessageSequence(msg.MessageID); n > s.msgSeq {
s.msgSeq = n
}
if existing := s.messages[msg.MessageID]; existing != nil {
s.removeQueuedIndexesLocked(*existing)
}
cp := msg
s.messages[msg.MessageID] = &cp
s.threadMessages[msg.ThreadID] = appendUniqueString(s.threadMessages[msg.ThreadID], msg.MessageID)
if strings.EqualFold(strings.TrimSpace(msg.Status), "queued") {
s.queuedByAgent[msg.ToAgent] = appendUniqueString(s.queuedByAgent[msg.ToAgent], msg.MessageID)
s.queuedByThreadAgent[mailboxThreadAgentKey(msg.ThreadID, msg.ToAgent)] = appendUniqueString(s.queuedByThreadAgent[mailboxThreadAgentKey(msg.ThreadID, msg.ToAgent)], msg.MessageID)
}
thread := s.ensureThreadStateLocked(msg.ThreadID)
if thread != nil {
if msg.CreatedAt > thread.UpdatedAt {
thread.UpdatedAt = msg.CreatedAt
}
participants := append([]string(nil), thread.Participants...)
participants = append(participants, msg.FromAgent, msg.ToAgent)
thread.Participants = normalizeStringList(participants)
}
}
func (s *AgentMailboxStore) ensureThreadStateLocked(threadID string) *AgentThread {
threadID = strings.TrimSpace(threadID)
if threadID == "" {
return nil
}
thread := s.threads[threadID]
if thread != nil {
return thread
}
thread = &AgentThread{
ThreadID: threadID,
Status: "open",
CreatedAt: 1,
UpdatedAt: 1,
Participants: nil,
}
s.threads[threadID] = thread
if n := parseThreadSequence(threadID); n > s.threadSeq {
s.threadSeq = n
}
return thread
}
func (s *AgentMailboxStore) removeQueuedIndexesLocked(msg AgentMessage) {
if !strings.EqualFold(strings.TrimSpace(msg.Status), "queued") {
return
}
s.queuedByAgent[msg.ToAgent] = removeStringValue(s.queuedByAgent[msg.ToAgent], msg.MessageID)
s.queuedByThreadAgent[mailboxThreadAgentKey(msg.ThreadID, msg.ToAgent)] = removeStringValue(s.queuedByThreadAgent[mailboxThreadAgentKey(msg.ThreadID, msg.ToAgent)], msg.MessageID)
}
func (s *AgentMailboxStore) reconcileThreadsFromMessagesLocked() {
for _, msg := range s.messages {
s.ensureThreadStateLocked(msg.ThreadID)
}
for _, msg := range s.messages {
thread := s.threads[msg.ThreadID]
if thread == nil {
continue
}
if msg.CreatedAt > thread.UpdatedAt {
thread.UpdatedAt = msg.CreatedAt
}
participants := append([]string(nil), thread.Participants...)
participants = append(participants, msg.FromAgent, msg.ToAgent)
thread.Participants = normalizeStringList(participants)
}
}
func (s *AgentMailboxStore) persistThreadsMetaLocked(offset int64) error {
if offset <= 0 {
size, err := jsonlog.FileSize(s.threadsPath)
if err != nil {
return err
}
offset = size
}
meta := mailboxThreadsMetaFile{
Version: 1,
LastOffset: offset,
ThreadSeq: s.threadSeq,
UpdatedAt: time.Now().UnixMilli(),
Threads: make([]AgentThread, 0, len(s.threads)),
}
for _, thread := range s.threads {
cp := *thread
cp.Participants = append([]string(nil), thread.Participants...)
meta.Threads = append(meta.Threads, cp)
}
sort.Slice(meta.Threads, func(i, j int) bool {
if meta.Threads[i].UpdatedAt != meta.Threads[j].UpdatedAt {
return meta.Threads[i].UpdatedAt > meta.Threads[j].UpdatedAt
}
return meta.Threads[i].ThreadID < meta.Threads[j].ThreadID
})
return jsonlog.WriteJSON(s.threadsMetaPath, meta)
}
func (s *AgentMailboxStore) persistMessagesMetaLocked(offset int64) error {
meta := mailboxMessagesMetaFile{
Version: 1,
LastOffset: offset,
MsgSeq: s.msgSeq,
UpdatedAt: time.Now().UnixMilli(),
Messages: make([]AgentMessage, 0, len(s.messages)),
ThreadMessages: cloneStringSliceMap(s.threadMessages),
QueuedByAgent: cloneStringSliceMap(s.queuedByAgent),
QueuedByThreadAgent: cloneStringSliceMap(s.queuedByThreadAgent),
}
for _, msg := range s.messages {
meta.Messages = append(meta.Messages, *msg)
}
sort.Slice(meta.Messages, func(i, j int) bool {
if meta.Messages[i].CreatedAt != meta.Messages[j].CreatedAt {
return meta.Messages[i].CreatedAt < meta.Messages[j].CreatedAt
}
return meta.Messages[i].MessageID < meta.Messages[j].MessageID
})
return jsonlog.WriteJSON(s.msgsMetaPath, meta)
}
func parseThreadSequence(threadID string) int {
threadID = strings.TrimSpace(threadID)
if !strings.HasPrefix(threadID, "thread-") {
@@ -386,3 +544,45 @@ func parseMessageSequence(messageID string) int {
n, _ := strconv.Atoi(strings.TrimPrefix(messageID, "msg-"))
return n
}
func mailboxThreadAgentKey(threadID, agentID string) string {
return strings.TrimSpace(threadID) + "::" + strings.TrimSpace(agentID)
}
func cloneStringSliceMap(src map[string][]string) map[string][]string {
if len(src) == 0 {
return map[string][]string{}
}
out := make(map[string][]string, len(src))
for key, values := range src {
out[key] = append([]string(nil), values...)
}
return out
}
func appendUniqueString(values []string, value string) []string {
value = strings.TrimSpace(value)
if value == "" {
return values
}
for _, existing := range values {
if existing == value {
return values
}
}
return append(values, value)
}
func removeStringValue(values []string, value string) []string {
if len(values) == 0 {
return values
}
out := values[:0]
for _, existing := range values {
if existing == value {
continue
}
out = append(out, existing)
}
return append([]string(nil), out...)
}