Added webcam tool

This commit is contained in:
alejandro-angulo 2025-06-07 15:46:19 -07:00
parent b2e6d24fed
commit ed240e2f40
Signed by: alejandro-angulo
GPG key ID: 75579581C74554B6
4 changed files with 58 additions and 0 deletions

View file

@ -41,6 +41,10 @@
# https://devenv.sh/reference/options/
languages.go.enable = true;
languages.javascript.enable = true;
packages = with pkgs; [
opencv
];
}
];
};

1
go.mod
View file

@ -5,6 +5,7 @@ go 1.24.2
require (
github.com/anthropics/anthropic-sdk-go v1.3.0
github.com/invopop/jsonschema v0.13.0
gocv.io/x/gocv v0.41.0
)
require (

2
go.sum
View file

@ -27,6 +27,8 @@ github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
gocv.io/x/gocv v0.41.0 h1:KM+zRXUP28b6dHfhy+4JxDODbCNQNtLg8kio+YE7TqA=
gocv.io/x/gocv v0.41.0/go.mod h1:zYdWMj29WAEznM3Y8NsU3A0TRq/wR/cy75jeUypThqU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

51
main.go
View file

@ -5,6 +5,7 @@ import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"os"
"path"
@ -13,6 +14,8 @@ import (
"github.com/anthropics/anthropic-sdk-go"
"github.com/invopop/jsonschema"
"gocv.io/x/gocv"
)
func main() {
@ -30,6 +33,7 @@ func main() {
ListFilesDefinition,
EditFileDefinition,
Base64EncodeFileDefinition,
WebcamDefinition,
}
agent := NewAgent(&client, getUserMessage, tools)
@ -375,3 +379,50 @@ func Base64EncodeFile(input json.RawMessage) (string, error) {
return encoded, nil
}
var WebcamDefinition = ToolDefinition{
Name: "webcam",
Description: `Take a picture using the computer's webcam.
This way you can see what the user sees and provide a description of what
you see.
`,
InputSchema: WebcamDefinitionInputSchema,
Function: Webcam,
}
type WebcamDefinitionInput struct{}
var WebcamDefinitionInputSchema = GenerateSchema[WebcamDefinitionInput]()
func Webcam(input json.RawMessage) (string, error) {
webcam, err := gocv.OpenVideoCapture(0)
if err != nil {
return "", err
}
defer webcam.Close()
if !webcam.IsOpened() {
return "", errors.New("Unable to open video capture device")
}
img := gocv.NewMat()
defer img.Close()
if ok := webcam.Read(&img); !ok {
return "", errors.New("Cannot read from video capture device")
}
if img.Empty() {
return "", errors.New("Capture image is empty")
}
jpegData, err := gocv.IMEncode(".jpg", img)
if err != nil {
return "", err
}
encoded := base64.StdEncoding.EncodeToString(jpegData.GetBytes())
return encoded, nil
}