feat: implement browser automation tool and refine Go-native concurrency patterns

This commit is contained in:
DBT
2026-02-12 08:14:52 +00:00
parent 5e9813e3f2
commit 924b254284
4 changed files with 94 additions and 0 deletions

Binary file not shown.

View File

@@ -84,6 +84,9 @@ func NewAgentLoop(cfg *config.Config, msgBus *bus.MessageBus, provider providers
// Register parallel execution tool (leveraging Go's concurrency)
toolsRegistry.Register(tools.NewParallelTool(toolsRegistry))
// Register browser tool (integrated Chromium support)
toolsRegistry.Register(tools.NewBrowserTool())
// Register camera tool
toolsRegistry.Register(tools.NewCameraTool(workspace))
// Register system info tool

0
pkg/browser/browser.go Normal file
View File

91
pkg/tools/browser.go Normal file
View File

@@ -0,0 +1,91 @@
package tools
import (
"context"
"fmt"
"os/exec"
"time"
)
type BrowserTool struct {
chromePath string
timeout time.Duration
}
func NewBrowserTool() *BrowserTool {
return &BrowserTool{
timeout: 30 * time.Second,
}
}
func (t *BrowserTool) Name() string {
return "browser"
}
func (t *BrowserTool) Description() string {
return "Control a headless browser to capture screenshots or fetch dynamic content using Chromium."
}
func (t *BrowserTool) Parameters() map[string]interface{} {
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{
"type": "string",
"enum": []string{"screenshot", "content"},
},
"url": map[string]interface{}{
"type": "string",
},
},
"required": []string{"action", "url"},
}
}
func (t *BrowserTool) Execute(ctx context.Context, args map[string]interface{}) (string, error) {
action, _ := args["action"].(string)
url, _ := args["url"].(string)
switch action {
case "screenshot":
return t.takeScreenshot(ctx, url)
case "content":
return t.fetchDynamicContent(ctx, url)
default:
return "", fmt.Errorf("unknown browser action: %s", action)
}
}
func (t *BrowserTool) takeScreenshot(ctx context.Context, url string) (string, error) {
// 基于 CLI 的简单实现:使用 chromium-browser --headless
outputPath := fmt.Sprintf("/tmp/screenshot_%d.png", time.Now().UnixNano())
cmd := exec.CommandContext(ctx, "chromium-browser",
"--headless",
"--disable-gpu",
"--no-sandbox",
"--screenshot="+outputPath,
url)
if err := cmd.Run(); err != nil {
return "", fmt.Errorf("failed to take screenshot: %w (ensure chromium-browser is installed)", err)
}
return fmt.Sprintf("Screenshot saved to: %s", outputPath), nil
}
func (t *BrowserTool) fetchDynamicContent(ctx context.Context, url string) (string, error) {
// 简单实现dump-dom
cmd := exec.CommandContext(ctx, "chromium-browser",
"--headless",
"--disable-gpu",
"--no-sandbox",
"--dump-dom",
url)
output, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("failed to fetch content: %w", err)
}
return string(output), nil
}