@@ -133,6 +133,7 @@ struct templates_params {
133
133
bool stream;
134
134
std::string grammar;
135
135
bool add_generation_prompt = true ;
136
+ bool enable_thinking = true ;
136
137
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
137
138
};
138
139
@@ -573,7 +574,7 @@ common_chat_templates_ptr common_chat_templates_init(
573
574
return tmpls;
574
575
}
575
576
576
- std::string common_chat_format_name (common_chat_format format) {
577
+ const char * common_chat_format_name (common_chat_format format) {
577
578
switch (format) {
578
579
case COMMON_CHAT_FORMAT_CONTENT_ONLY: return " Content-only" ;
579
580
case COMMON_CHAT_FORMAT_GENERIC: return " Generic" ;
@@ -591,6 +592,15 @@ std::string common_chat_format_name(common_chat_format format) {
591
592
}
592
593
}
593
594
595
+ const char * common_reasoning_format_name (common_reasoning_format format) {
596
+ switch (format) {
597
+ case COMMON_REASONING_FORMAT_NONE: return " none" ;
598
+ case COMMON_REASONING_FORMAT_DEEPSEEK: return " deepseek" ;
599
+ default :
600
+ throw std::runtime_error (" Unknown reasoning format" );
601
+ }
602
+ }
603
+
594
604
static std::string wrap_code_as_arguments (common_chat_msg_parser & builder, const std::string & code) {
595
605
std::string arguments;
596
606
if (builder.is_partial ()) {
@@ -918,7 +928,13 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
918
928
data.prompt = apply (tmpl, adjusted_messages, inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt , {});
919
929
data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
920
930
if (string_ends_with (data.prompt , " <|START_THINKING|>" )) {
921
- data.thinking_forced_open = true ;
931
+ if (!inputs.enable_thinking ) {
932
+ data.prompt += " <|END_THINKING|>" ;
933
+ } else {
934
+ data.thinking_forced_open = true ;
935
+ }
936
+ } else if (!inputs.enable_thinking && string_ends_with (data.prompt , " <|CHATBOT_TOKEN|>" )) {
937
+ data.prompt += " <|START_THINKING|><|END_THINKING|>" ;
922
938
}
923
939
924
940
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -1186,7 +1202,11 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
1186
1202
data.prompt = prompt;
1187
1203
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
1188
1204
if (string_ends_with (data.prompt , " <think>\n " )) {
1189
- data.thinking_forced_open = true ;
1205
+ if (!inputs.enable_thinking ) {
1206
+ data.prompt += " </think>" ;
1207
+ } else {
1208
+ data.thinking_forced_open = true ;
1209
+ }
1190
1210
}
1191
1211
1192
1212
if (inputs.tools .is_array () && !inputs.tools .empty ()) {
@@ -1460,104 +1480,114 @@ static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser
1460
1480
static common_chat_params common_chat_params_init_hermes_2_pro (const common_chat_template & tmpl, const struct templates_params & inputs) {
1461
1481
common_chat_params data;
1462
1482
1463
- data.prompt = apply (tmpl, inputs.messages , inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt );
1483
+ json additional_context = {
1484
+ {" enable_thinking" , inputs.enable_thinking },
1485
+ };
1486
+
1487
+ data.prompt = apply (tmpl, inputs.messages , inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt , additional_context);
1464
1488
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
1465
1489
if (string_ends_with (data.prompt , " <think>\n " )) {
1466
- data.thinking_forced_open = true ;
1490
+ if (!inputs.enable_thinking ) {
1491
+ data.prompt += " </think>" ;
1492
+ } else {
1493
+ data.thinking_forced_open = true ;
1494
+ }
1467
1495
}
1468
1496
1469
- // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1470
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1471
- data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1472
- std::vector<std::string> tool_rules;
1473
- std::vector<std::string> tool_call_alts;
1474
- std::vector<std::string> escaped_names;
1475
- foreach_function (inputs.tools , [&](const json & tool) {
1476
- const auto & function = tool.at (" function" );
1477
- std::string name = function.at (" name" );
1478
- auto parameters = function.at (" parameters" );
1479
- builder.resolve_refs (parameters);
1480
- tool_rules.push_back (builder.add_schema (name + " -call" , {
1481
- {" type" , " object" },
1482
- {" properties" , json {
1483
- {" name" , json {{" const" , name}}},
1484
- {" arguments" , parameters},
1485
- }},
1486
- {" required" , json::array ({" name" , " arguments" })},
1487
- }));
1488
- tool_call_alts.push_back (builder.add_rule (
1489
- name + " -function-tag" ,
1490
- " \" <function\" ( \" =" + name + " \" | \" name=\\\" " + name + " \\\"\" ) \" >\" space " +
1491
- builder.add_schema (name + " -args" , parameters) + " "
1492
- " \" </function>\" space" ));
1497
+ if (!inputs.tools .is_null ()) {
1498
+ // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1499
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1500
+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1501
+ std::vector<std::string> tool_rules;
1502
+ std::vector<std::string> tool_call_alts;
1503
+ std::vector<std::string> escaped_names;
1504
+ foreach_function (inputs.tools , [&](const json & tool) {
1505
+ const auto & function = tool.at (" function" );
1506
+ std::string name = function.at (" name" );
1507
+ auto parameters = function.at (" parameters" );
1508
+ builder.resolve_refs (parameters);
1509
+ tool_rules.push_back (builder.add_schema (name + " -call" , {
1510
+ {" type" , " object" },
1511
+ {" properties" , json {
1512
+ {" name" , json {{" const" , name}}},
1513
+ {" arguments" , parameters},
1514
+ }},
1515
+ {" required" , json::array ({" name" , " arguments" })},
1516
+ }));
1517
+ tool_call_alts.push_back (builder.add_rule (
1518
+ name + " -function-tag" ,
1519
+ " \" <function\" ( \" =" + name + " \" | \" name=\\\" " + name + " \\\"\" ) \" >\" space " +
1520
+ builder.add_schema (name + " -args" , parameters) + " "
1521
+ " \" </function>\" space" ));
1493
1522
1494
- data.grammar_triggers .push_back ({
1495
- COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1496
- " <function=" + name + " >" ,
1523
+ data.grammar_triggers .push_back ({
1524
+ COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1525
+ " <function=" + name + " >" ,
1526
+ });
1527
+ auto escaped_name = regex_escape (name);
1528
+ data.grammar_triggers .push_back ({
1529
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1530
+ " <function\\ s+name\\ s*=\\ s*\" " + escaped_name + " \" " ,
1531
+ });
1532
+ escaped_names.push_back (escaped_name);
1497
1533
});
1498
- auto escaped_name = regex_escape (name);
1534
+ auto any_tool_call = builder.add_rule (" any_tool_call" , " ( " + string_join (tool_rules, " | " ) + " ) space" );
1535
+ std::vector<std::string> alt_tags {
1536
+ any_tool_call,
1537
+ " \" <tool_call>\" space " + any_tool_call + " \" </tool_call>\" " ,
1538
+ // The rest is just to accommodate common "good bad" outputs.
1539
+ " \" <function_call>\" space " + any_tool_call + " \" </function_call>\" " ,
1540
+ " \" <response>\" space " + any_tool_call + " \" </response>\" " ,
1541
+ " \" <tools>\" space " + any_tool_call + " \" </tools>\" " ,
1542
+ " \" <json>\" space " + any_tool_call + " \" </json>\" " ,
1543
+ " \" <xml>\" space " + any_tool_call + " \" </xml>\" " ,
1544
+ " \" <JSON>\" space " + any_tool_call + " \" </JSON>\" " ,
1545
+ };
1546
+ auto wrappable_tool_call = builder.add_rule (" wrappable_tool_call" , " ( " + string_join (alt_tags, " | " ) + " ) space" );
1547
+ tool_call_alts.push_back (wrappable_tool_call);
1548
+ tool_call_alts.push_back (
1549
+ " ( \" ```\\ n\" | \" ```json\\ n\" | \" ```xml\\ n\" ) space " + wrappable_tool_call + " space \" ```\" space " );
1550
+ auto tool_call = builder.add_rule (" tool_call" , string_join (tool_call_alts, " | " ));
1551
+ builder.add_rule (" root" ,
1552
+ std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1553
+ (inputs.parallel_tool_calls ? " (" + tool_call + " )+" : tool_call));
1554
+ // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
1499
1555
data.grammar_triggers .push_back ({
1500
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1501
- " <function\\ s+name\\ s*=\\ s*\" " + escaped_name + " \" " ,
1556
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1557
+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1558
+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1559
+ std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) + (
1560
+ " (\\ s*"
1561
+ " (?:<tool_call>"
1562
+ " |<function"
1563
+ " |(?:```(?:json|xml)?\n\\ s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
1564
+ " \\ s*\\ {\\ s*\" name\"\\ s*:\\ s*\" (?:" + string_join (escaped_names, " |" ) + " )\" "
1565
+ " )"
1566
+ " )[\\ s\\ S]*"
1567
+ ),
1502
1568
});
1503
- escaped_names.push_back (escaped_name);
1504
- });
1505
- auto any_tool_call = builder.add_rule (" any_tool_call" , " ( " + string_join (tool_rules, " | " ) + " ) space" );
1506
- std::vector<std::string> alt_tags {
1507
- any_tool_call,
1508
- " \" <tool_call>\" space " + any_tool_call + " \" </tool_call>\" " ,
1509
- // The rest is just to accommodate common "good bad" outputs.
1510
- " \" <function_call>\" space " + any_tool_call + " \" </function_call>\" " ,
1511
- " \" <response>\" space " + any_tool_call + " \" </response>\" " ,
1512
- " \" <tools>\" space " + any_tool_call + " \" </tools>\" " ,
1513
- " \" <json>\" space " + any_tool_call + " \" </json>\" " ,
1514
- " \" <xml>\" space " + any_tool_call + " \" </xml>\" " ,
1515
- " \" <JSON>\" space " + any_tool_call + " \" </JSON>\" " ,
1516
- };
1517
- auto wrappable_tool_call = builder.add_rule (" wrappable_tool_call" , " ( " + string_join (alt_tags, " | " ) + " ) space" );
1518
- tool_call_alts.push_back (wrappable_tool_call);
1519
- tool_call_alts.push_back (
1520
- " ( \" ```\\ n\" | \" ```json\\ n\" | \" ```xml\\ n\" ) space " + wrappable_tool_call + " space \" ```\" space " );
1521
- auto tool_call = builder.add_rule (" tool_call" , string_join (tool_call_alts, " | " ));
1522
- builder.add_rule (" root" ,
1523
- std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1524
- (inputs.parallel_tool_calls ? " (" + tool_call + " )+" : tool_call));
1525
- // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
1526
- data.grammar_triggers .push_back ({
1527
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1528
- // If thinking_forced_open, then we capture the </think> tag in the grammar,
1529
- // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1530
- std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) + (
1531
- " (\\ s*"
1532
- " (?:<tool_call>"
1533
- " |<function"
1534
- " |(?:```(?:json|xml)?\n\\ s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
1535
- " \\ s*\\ {\\ s*\" name\"\\ s*:\\ s*\" (?:" + string_join (escaped_names, " |" ) + " )\" "
1536
- " )"
1537
- " )[\\ s\\ S]*"
1538
- ),
1569
+ data.preserved_tokens = {
1570
+ " <think>" ,
1571
+ " </think>" ,
1572
+ " <tool_call>" ,
1573
+ " </tool_call>" ,
1574
+ " <function" ,
1575
+ " <tools>" ,
1576
+ " </tools>" ,
1577
+ " <response>" ,
1578
+ " </response>" ,
1579
+ " <function_call>" ,
1580
+ " </function_call>" ,
1581
+ " <json>" ,
1582
+ " </json>" ,
1583
+ " <JSON>" ,
1584
+ " </JSON>" ,
1585
+ " ```" ,
1586
+ " ```json" ,
1587
+ " ```xml" ,
1588
+ };
1539
1589
});
1540
- data.preserved_tokens = {
1541
- " <think>" ,
1542
- " </think>" ,
1543
- " <tool_call>" ,
1544
- " </tool_call>" ,
1545
- " <function" ,
1546
- " <tools>" ,
1547
- " </tools>" ,
1548
- " <response>" ,
1549
- " </response>" ,
1550
- " <function_call>" ,
1551
- " </function_call>" ,
1552
- " <json>" ,
1553
- " </json>" ,
1554
- " <JSON>" ,
1555
- " </JSON>" ,
1556
- " ```" ,
1557
- " ```json" ,
1558
- " ```xml" ,
1559
- };
1560
- });
1590
+ }
1561
1591
1562
1592
return data;
1563
1593
}
@@ -1669,6 +1699,7 @@ static common_chat_params common_chat_templates_apply_jinja(
1669
1699
params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages , /* concat_text= */ !tmpl.original_caps ().requires_typed_content );
1670
1700
params.add_generation_prompt = inputs.add_generation_prompt ;
1671
1701
params.tool_choice = inputs.tool_choice ;
1702
+ params.enable_thinking = inputs.enable_thinking ;
1672
1703
params.grammar = inputs.grammar ;
1673
1704
params.now = inputs.now ;
1674
1705
if (!inputs.json_schema .empty ()) {
@@ -1702,7 +1733,7 @@ static common_chat_params common_chat_templates_apply_jinja(
1702
1733
}
1703
1734
1704
1735
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1705
- if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null () && params. tools . is_array () && params. json_schema . is_null () ) {
1736
+ if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null ()) {
1706
1737
return common_chat_params_init_hermes_2_pro (tmpl, params);
1707
1738
}
1708
1739
@@ -1821,7 +1852,7 @@ static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
1821
1852
}
1822
1853
1823
1854
static void common_chat_parse (common_chat_msg_parser & builder, common_chat_format format) {
1824
- LOG_DBG (" Parsing input with format %s: %s\n " , common_chat_format_name (format). c_str () , builder.input ().c_str ());
1855
+ LOG_DBG (" Parsing input with format %s: %s\n " , common_chat_format_name (format), builder.input ().c_str ());
1825
1856
1826
1857
switch (format) {
1827
1858
case COMMON_CHAT_FORMAT_CONTENT_ONLY:
@@ -1858,7 +1889,7 @@ static void common_chat_parse(common_chat_msg_parser & builder, common_chat_form
1858
1889
common_chat_parse_command_r7b (builder);
1859
1890
break ;
1860
1891
default :
1861
- throw std::runtime_error (" Unsupported format: " + common_chat_format_name (format));
1892
+ throw std::runtime_error (std::string ( " Unsupported format: " ) + common_chat_format_name (format));
1862
1893
}
1863
1894
builder.finish ();
1864
1895
}
0 commit comments