diff --git a/flake.nix b/flake.nix index d0978fe..e52490a 100644 --- a/flake.nix +++ b/flake.nix @@ -41,6 +41,10 @@ # https://devenv.sh/reference/options/ languages.go.enable = true; languages.javascript.enable = true; + + packages = with pkgs; [ + opencv + ]; } ]; }; diff --git a/go.mod b/go.mod index 6d7a035..ef98a3f 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.24.2 require ( github.com/anthropics/anthropic-sdk-go v1.3.0 github.com/invopop/jsonschema v0.13.0 + gocv.io/x/gocv v0.41.0 ) require ( diff --git a/go.sum b/go.sum index 6de236b..9b5ca94 100644 --- a/go.sum +++ b/go.sum @@ -27,6 +27,8 @@ github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc= github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw= +gocv.io/x/gocv v0.41.0 h1:KM+zRXUP28b6dHfhy+4JxDODbCNQNtLg8kio+YE7TqA= +gocv.io/x/gocv v0.41.0/go.mod h1:zYdWMj29WAEznM3Y8NsU3A0TRq/wR/cy75jeUypThqU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/main.go b/main.go index c0c1425..6ab0cdd 100644 --- a/main.go +++ b/main.go @@ -5,6 +5,7 @@ import ( "context" "encoding/base64" "encoding/json" + "errors" "fmt" "os" "path" @@ -13,6 +14,8 @@ import ( "github.com/anthropics/anthropic-sdk-go" "github.com/invopop/jsonschema" + + "gocv.io/x/gocv" ) func main() { @@ -30,6 +33,7 @@ func main() { ListFilesDefinition, EditFileDefinition, Base64EncodeFileDefinition, + WebcamDefinition, } agent := NewAgent(&client, getUserMessage, tools) @@ -375,3 +379,50 @@ func Base64EncodeFile(input json.RawMessage) (string, error) { return encoded, nil } + +var WebcamDefinition = ToolDefinition{ + Name: "webcam", + Description: `Take a picture using the computer's webcam. + + This way you can see what the user sees and provide a description of what + you see. + `, + InputSchema: WebcamDefinitionInputSchema, + Function: Webcam, +} + +type WebcamDefinitionInput struct{} + +var WebcamDefinitionInputSchema = GenerateSchema[WebcamDefinitionInput]() + +func Webcam(input json.RawMessage) (string, error) { + webcam, err := gocv.OpenVideoCapture(0) + if err != nil { + return "", err + } + defer webcam.Close() + + if !webcam.IsOpened() { + return "", errors.New("Unable to open video capture device") + } + + img := gocv.NewMat() + defer img.Close() + + if ok := webcam.Read(&img); !ok { + return "", errors.New("Cannot read from video capture device") + } + + if img.Empty() { + return "", errors.New("Capture image is empty") + } + + jpegData, err := gocv.IMEncode(".jpg", img) + if err != nil { + return "", err + } + + encoded := base64.StdEncoding.EncodeToString(jpegData.GetBytes()) + + return encoded, nil +}