allow all parsers to parse non-tool-call content.

ggml-org · ochafik · May 26, 2025 · May 25, 2025 · May 25, 2025 · May 25, 2025
commit 6f8c7aa0368367bd504b9538f24934f995a0fef7
diff --git a/common/chat-parser.h b/common/chat-parser.h
@@ -30,6 +30,7 @@ class common_chat_msg_parser {
     const std::string & healing_marker() const { return healing_marker_; }
     const bool & is_partial() const { return is_partial_; }
     const common_chat_msg & result() const { return result_; }
+    const common_chat_syntax & syntax() const { return syntax_; }
 
     void move_to(size_t pos) {
         if (pos > input_.size()) {

diff --git a/common/chat.cpp b/common/chat.cpp
@@ -820,6 +820,10 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
     return data;
 }
 static void common_chat_parse_generic(common_chat_msg_parser & builder) {
+    if (!builder.syntax().parse_tool_calls) {
+        builder.add_content(builder.consume_rest());
+        return;
+    }
     static const std::vector<std::vector<std::string>> content_paths = {
         {"response"},
     };
@@ -892,6 +896,11 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
     return data;
 }
 static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
+    if (!builder.syntax().parse_tool_calls) {
+        builder.add_content(builder.consume_rest());
+        return;
+    }
+
     static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
     parse_prefixed_json_tool_call_array(builder, prefix);
 }
@@ -1104,6 +1113,11 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te
     return data;
 }
 static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
+    if (!builder.syntax().parse_tool_calls) {
+        builder.add_content(builder.consume_rest());
+        return;
+    }
+
     static const common_regex function_regex(
         "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
     static const common_regex close_regex("\\}\\s*");
@@ -1225,6 +1239,10 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
 }
 static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
     builder.try_parse_reasoning("<think>", "</think>");
+    if (!builder.syntax().parse_tool_calls) {
+        builder.add_content(builder.consume_rest());
+        return;
+    }
 
     static const common_regex tool_calls_begin("(?:<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)");
     static const common_regex tool_calls_end("<｜tool▁calls▁end｜>");
@@ -1286,6 +1304,10 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
     return data;
 }
 static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
+    if (!builder.syntax().parse_tool_calls) {
+        builder.add_content(builder.consume_rest());
+        return;
+    }
     static const common_regex prefix(regex_escape(" functools["));
     parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
 }
@@ -1427,6 +1449,10 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
     return data;
 }
 static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
+    if (!builder.syntax().parse_tool_calls) {
+        builder.add_content(builder.consume_rest());
+        return;
+    }
     // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
     static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
 
@@ -1554,6 +1580,10 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
 }
 static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
     builder.try_parse_reasoning("<think>", "</think>");
+    if (!builder.syntax().parse_tool_calls) {
+        builder.add_content(builder.consume_rest());
+        return;
+    }
 
     static const common_regex open_regex(
         "(?:"
@@ -1809,10 +1839,10 @@ static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
     builder.add_content(builder.consume_rest());
 }
 
-static void common_chat_parse(common_chat_msg_parser & builder, common_chat_format format) {
-    LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(format).c_str(), builder.input().c_str());
+static void common_chat_parse(common_chat_msg_parser & builder) {
+    LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format).c_str(), builder.input().c_str());
 
-    switch (format) {
+    switch (builder.syntax().format) {
         case COMMON_CHAT_FORMAT_CONTENT_ONLY:
             common_chat_parse_content_only(builder);
             break;
@@ -1847,15 +1877,15 @@ static void common_chat_parse(common_chat_msg_parser & builder, common_chat_form
             common_chat_parse_command_r7b(builder);
             break;
         default:
-            throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
+            throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
     }
     builder.finish();
 }
 
 common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
     common_chat_msg_parser builder(input, is_partial, syntax);
     try {
-        common_chat_parse(builder, syntax.format);
+        common_chat_parse(builder);
     } catch (const common_chat_msg_partial_exception & ex) {
         LOG_DBG("Partial parse: %s\n", ex.what());
         if (!is_partial) {

diff --git a/common/chat.h b/common/chat.h
@@ -143,6 +143,7 @@ struct common_chat_syntax {
     // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
     bool                     reasoning_in_content  = false;
     bool                     thinking_forced_open  = false;
+    bool                     parse_tool_calls      = true;
 };
 
 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid

diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
@@ -401,9 +401,12 @@ static common_chat_msg simple_assist_msg(const std::string & content, const std:
     }
     return msg;
 }
-const common_chat_msg message_assist                             = simple_assist_msg("Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_empty                       = simple_assist_msg("");
-const common_chat_msg message_assist_thoughts_unparsed_deepseek  = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist                              = simple_assist_msg("Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_empty                        = simple_assist_msg("");
+const common_chat_msg message_assist_thoughts_unparsed_deepseek   = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_md         = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
+const common_chat_msg message_assist_thoughts_unparsed_md_partial = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
+
 const common_chat_msg message_assist_thoughts_unparsed_r7b       = simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
 const common_chat_msg message_assist_thoughts                    = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
 const common_chat_msg message_assist_thoughts_unopened_unparsed  = simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
@@ -591,8 +594,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
         assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
             common_chat_parse(
@@ -619,8 +620,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
         assert_msg_equals(message_assist_thoughts_call_idx,
             common_chat_parse(
@@ -632,8 +631,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
         assert_msg_equals(message_assist_thoughts_no_content,
             common_chat_parse(
@@ -644,8 +641,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
 
         test_templates(tmpls.get(), end_tokens, message_assist_call_idx, tools,
@@ -675,6 +670,18 @@ static void test_template_output_parsers() {
 
         // Generic tool calls doesn't generate / parse content-only messages symmetrically.
 
+        assert_equals(
+            simple_assist_msg("{ \"tool_call\" : { \"name\" : \"t"),
+            common_chat_parse(
+                "{ \"tool_call\" : { \"name\" : \"t",
+                /* is_partial= */ true,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_GENERIC,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ false,
+                    /* .thinking_forced_open = */ true,
+                    /* .parse_tool_calls = */ false,
+                }));
         assert_equals(
             message_assist_empty,
             common_chat_parse(
@@ -776,8 +783,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
         assert_msg_equals(
             simple_assist_msg("Let's call something\n"),
@@ -788,8 +793,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
         assert_msg_equals(message_assist_call_thoughts,
             common_chat_parse(
@@ -979,7 +982,34 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
+                }));
+        assert_msg_equals(message_assist_thoughts,
+            common_chat_parse(
+                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
+                /* is_partial= */ true,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                }));
+        assert_msg_equals(message_assist_thoughts_unparsed_md,
+            common_chat_parse(
+                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
+                /* is_partial= */ false,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ true,
+                    /* .thinking_forced_open = */ false,
+                    /* .parse_tool_calls = */ false,
+                }));
+        assert_msg_equals(message_assist_thoughts_unparsed_md_partial,
+            common_chat_parse(
+                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
+                /* is_partial= */ true,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ true,
                     /* .thinking_forced_open = */ false,
                 }));
         assert_msg_equals(message_assist_thoughts_unopened_unparsed,
@@ -989,8 +1019,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
         assert_msg_equals(message_assist_thoughts,
             common_chat_parse(
@@ -1187,8 +1215,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
         assert_msg_equals(message_assist_thoughts_unopened_unparsed,
             common_chat_parse(
@@ -1197,8 +1223,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
         assert_msg_equals(message_assist_thoughts,
             common_chat_parse(
@@ -1252,8 +1276,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
         assert_msg_equals(message_assist_thoughts,
             common_chat_parse(
@@ -1295,8 +1317,6 @@ static void test_template_output_parsers() {
                 {
                     /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
                 }));
         test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
                 "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"

@@ -364,6 +364,7 @@ struct server_task {
             params.oaicompat_chat_syntax.reasoning_format = params_base.reasoning_format;
             params.oaicompat_chat_syntax.reasoning_in_content = params.stream;
             params.oaicompat_chat_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false);
+            params.oaicompat_chat_syntax.parse_tool_calls = json_value(data, "parse_tool_calls", false);
         }
 
         {

@@ -732,8 +732,11 @@ static json oaicompat_chat_params_parse(
     inputs.use_jinja             = opt.use_jinja;
     inputs.parallel_tool_calls   = json_value(body, "parallel_tool_calls", false);
     inputs.reasoning_format      = opt.reasoning_format;
-    if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && body.contains("grammar")) {
-        throw std::runtime_error("Cannot use custom grammar constraints with tools.");
+    if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
+        if (body.contains("grammar")) {
+            throw std::runtime_error("Cannot use custom grammar constraints with tools.");
+        }
+        llama_params["parse_tool_calls"] = true;
     }
 
     // if the assistant message appears at the end of list, we do not add end-of-turn token