8000 support audio content type (#250) · mark3labs/mcp-go@cb632f2 · GitHub
[go: up one dir, main page]

Skip to content 8000

Commit cb632f2

Browse files
authored
support audio content type (#250)
1 parent 9d6b793 commit cb632f2

File tree

5 files changed

+75
-8
lines changed

5 files changed

+75
-8
lines changed

client/inprocess_test.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ func TestInProcessMCPClient(t *testing.T) {
3636
Type: "text",
3737
Text: "Input parameter: " + request.Params.Arguments["parameter-1"].(string),
3838
},
39+
mcp.AudioContent{
40+
Type: "audio",
41+
Data: "base64-encoded-audio-data",
42+
MIMEType: "audio/wav",
43+
},
3944
},
4045
}, nil
4146
})
@@ -77,6 +82,14 @@ func TestInProcessMCPClient(t *testing.T) {
7782
Text: "Test prompt with arg1: " + request.Params.Arguments["arg1"],
7883
},
7984
},
85+
{
86+
Role: mcp.RoleUser,
87+
Content: mcp.AudioContent{
88+
Type: "audio",
89+
Data: "base64-encoded-audio-data",
90+
MIMEType: "audio/wav",
91+
},
92+
},
8093
},
8194
}, nil
8295
},
@@ -192,8 +205,8 @@ func TestInProcessMCPClient(t *testing.T) {
192205
t.Fatalf("CallTool failed: %v", err)
193206
}
194207

195-
if len(result.Content) != 1 {
196-
t.Errorf("Expected 1 content item, got %d", len(result.Content))
208+
if len(result.Content) != 2 {
209+
t.Errorf("Expected 2 content item, got %d", len(result.Content))
197210
}
198211
})
199212

@@ -359,14 +372,17 @@ func TestInProcessMCPClient(t *testing.T) {
359372

360373
request := mcp.GetPromptRequest{}
361374
request.Params.Name = "test-prompt"
375+
request.Params.Arguments = map[string]string{
376+
"arg1": "arg1 value",
377+
}
362378

363379
result, err := client.GetPrompt(context.Background(), request)
364380
if err != nil {
365381
t.Errorf("GetPrompt failed: %v", err)
366382
}
367383

368-
if len(result.Messages) != 1 {
369-
t.Errorf("Expected 1 message, got %d", len(result.Messages))
384+
if len(result.Messages) != 2 {
385+
t.Errorf("Expected 2 message, got %d", len(result.Messages))
370386
}
371387
})
372388

mcp/prompts.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ const (
7878
// resources from the MCP server.
7979
type PromptMessage struct {
8080
Role Role `json:"role"`
81-
Content Content `json:"content"` // Can be TextContent, ImageContent, or EmbeddedResource
81+
Content Content `json:"content"` // Can be TextContent, ImageContent, AudioContent or EmbeddedResource
8282
}
8383

8484
// PromptListChangedNotification is an optional notification from the server

mcp/tools.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ type ListToolsResult struct {
3333
// should be reported as an MCP error response.
3434
type CallToolResult struct {
3535
Result
36-
Content []Content `json:"content"` // Can be TextContent, ImageContent, or EmbeddedResource
36+
Content []Content `json:"content"` // Can be TextContent, ImageContent, AudioContent, or EmbeddedResource
3737
// Whether the tool call ended in an error.
3838
//
3939
// If not set, this is assumed to be false (the call was successful).

mcp/types.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ type CreateMessageResult struct {
656656
// SamplingMessage describes a message issued to or received from an LLM API.
657657
type SamplingMessage struct {
658658
Role Role `json:"role"`
659-
Content interface{} `json:"content"` // Can be TextContent or ImageContent
659+
Content interface{} `json:"content"` // Can be TextContent, ImageContent or AudioContent
660660
}
661661

662662
type Annotations struct {
@@ -709,6 +709,19 @@ type ImageContent struct {
709709

710710
func (ImageContent) isContent() {}
711711

712+
// AudioContent represents the contents of audio, embedded into a prompt or tool call result.
713+
// It must have Type set to "audio".
714+
type AudioContent struct {
715+
Annotated
716+
Type string `json:"type"` // Must be "audio"
717+
// The base64-encoded audio data.
718+
Data string `json:"data"`
719+
// The MIME type of the audio. Different providers may support different audio types.
720+
MIMEType string `json:"mimeType"`
721+
}
722+
723+
func (AudioContent) isContent() {}
724+
712725
// EmbeddedResource represents the contents of a resource, embedded into a prompt or tool call result.
713726
//
714727
// It is up to the client how best to render embedded resources for the

mcp/utils.go

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ func AsImageContent(content interface{}) (*ImageContent, bool) {
7878
return asType[ImageContent](content)
7979
}
8080

81+
// AsAudioContent attempts to cast the given interface to AudioContent
82+
func AsAudioContent(content interface{}) (*AudioContent, bool) {
83+
return asType[AudioContent](content)
84+
}
85+
8186
// AsEmbeddedResource attempts to cast the given interface to EmbeddedResource
8287
func AsEmbeddedResource(content interface{}) (*EmbeddedResource, bool) {
8388
return asType[EmbeddedResource](content)
@@ -208,7 +213,15 @@ func NewImageContent(data, mimeType string) ImageContent {
208213
}
209214
}
210215

211-
// NewEmbeddedResource
216+
// Helper function to create a new AudioContent
217+
func NewAudioContent(data, mimeType string) AudioContent {
218+
return AudioContent{
219+
Type: "audio",
220+
Data: data,
221+
MIMEType: mimeType,
222+
}
223+
}
224+
212225
// Helper function to create a new EmbeddedResource
213226
func NewEmbeddedResource(resource ResourceContents) EmbeddedResource {
214227
return EmbeddedResource{
@@ -246,6 +259,23 @@ func NewToolResultImage(text, imageData, mimeType string) *CallToolResult {
246259
}
247260
}
248261

262+
// NewToolResultAudio creates a new CallToolResult with both text and audio content
263+
func NewToolResultAudio(text, imageData, mimeType string) *CallToolResult {
264+
return &CallToolResult{
265+
Content: []Content{
266+
TextContent{
267+
Type: "text",
268+
Text: text,
269+
},
270+
AudioContent{
271+
Type: "audio",
272+
Data: imageData,
273+
MIMEType: mimeType,
274+
},
275+
},
276+
}
277+
}
278+
249279
// NewToolResultResource creates a new CallToolResult with an embedded resource
250280
func NewToolResultResource(
251281
text string,
@@ -423,6 +453,14 @@ func ParseContent(contentMap map[string]any) (Content, error) {
423453
}
424454
return NewImageContent(data, mimeType), nil
425455

456+
case "audio":
457+
data := ExtractString(contentMap, "data")
458+
mimeType := ExtractString(contentMap, "mimeType")
459+
if data == "" || mimeType == "" {
460+
return nil, fmt.Errorf("audio data or mimeType is missing")
461+
}
462+
return NewAudioContent(data, mimeType), nil
463+
426464
case "resource":
427465
resourceMap := ExtractMap(contentMap, "resource")
428466
if resourceMap == nil {

0 commit comments

Comments
 (0)
0