@@ -133,6 +133,7 @@ struct templates_params {
133
133
bool stream;
134
134
std::string grammar;
135
135
bool add_generation_prompt = true ;
136
+ bool enable_thinking = true ;
136
137
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
137
138
};
138
139
@@ -573,7 +574,7 @@ common_chat_templates_ptr common_chat_templates_init(
573
574
return tmpls;
574
575
}
575
576
576
- std::string common_chat_format_name (common_chat_format format) {
577
+ const char * common_chat_format_name (common_chat_format format) {
577
578
switch (format) {
578
579
case COMMON_CHAT_FORMAT_CONTENT_ONLY: return " Content-only" ;
579
580
case COMMON_CHAT_FORMAT_GENERIC: return " Generic" ;
@@ -591,6 +592,15 @@ std::string common_chat_format_name(common_chat_format format) {
591
592
}
592
593
}
593
594
595
+ const char * common_reasoning_format_name (common_reasoning_format format) {
596
+ switch (format) {
597
+ case COMMON_REASONING_FORMAT_NONE: return " none" ;
598
+ case COMMON_REASONING_FORMAT_DEEPSEEK: return " deepseek" ;
599
+ default :
600
+ throw std::runtime_error (" Unknown reasoning format" );
601
+ }
602
+ }
603
+
594
604
static std::string wrap_code_as_arguments (common_chat_msg_parser & builder, const std::string & code) {
595
605
std::string arguments;
596
606
if (builder.is_partial ()) {
@@ -924,7 +934,13 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
924
934
data.prompt = apply (tmpl, adjusted_messages, inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt , {});
925
935
data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
926
936
if (string_ends_with (data.prompt , " <|START_THINKING|>" )) {
927
- data.thinking_forced_open = true ;
937
+ if (!inputs.enable_thinking ) {
938
+ data.prompt += " <|END_THINKING|>" ;
939
+ } else {
940
+ data.thinking_forced_open = true ;
941
+ }
942
+ } else if (!inputs.enable_thinking && string_ends_with (data.prompt , " <|CHATBOT_TOKEN|>" )) {
943
+ data.prompt += " <|START_THINKING|><|END_THINKING|>" ;
928
944
}
929
945
930
946
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -1192,7 +1208,11 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
1192
1208
data.prompt = prompt;
1193
1209
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
1194
1210
if (string_ends_with (data.prompt , " <think>\n " )) {
1195
- data.thinking_forced_open = true ;
1211
+ if (!inputs.enable_thinking ) {
1212
+ data.prompt += " </think>" ;
1213
+ } else {
1214
+ data.thinking_forced_open = true ;
1215
+ }
1196
1216
}
1197
1217
1198
1218
if (inputs.tools .is_array () && !inputs.tools .empty ()) {
@@ -1477,104 +1497,114 @@ static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser
1477
1497
static common_chat_params common_chat_params_init_hermes_2_pro (const common_chat_template & tmpl, const struct templates_params & inputs) {
1478
1498
common_chat_params data;
1479
1499
1480
- data.prompt = apply (tmpl, inputs.messages , inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt );
1500
+ json additional_context = {
1501
+ {" enable_thinking" , inputs.enable_thinking },
1502
+ };
1503
+
1504
+ data.prompt = apply (tmpl, inputs.messages , inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt , additional_context);
1481
1505
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
1482
1506
if (string_ends_with (data.prompt , " <think>\n " )) {
1483
- data.thinking_forced_open = true ;
1507
+ if (!inputs.enable_thinking ) {
1508
+ data.prompt += " </think>" ;
1509
+ } else {
1510
+ data.thinking_forced_open = true ;
1511
+ }
1484
1512
}
1485
1513
1486
- // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1487
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1488
- data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1489
- std::vector<std::string> tool_rules;
1490
- std::vector<std::string> tool_call_alts;
1491
- std::vector<std::string> escaped_names;
1492
- foreach_function (inputs.tools , [&](const json & tool) {
1493
- const auto & function = tool.at (" function" );
1494
- std::string name = function.at (" name" );
1495
- auto parameters = function.at (" parameters" );
1496
- builder.resolve_refs (parameters);
1497
- tool_rules.push_back (builder.add_schema (name + " -call" , {
1498
- {" type" , " object" },
1499
- {" properties" , json {
1500
- {" name" , json {{" const" , name}}},
1501
- {" arguments" , parameters},
1502
- }},
1503
- {" required" , json::array ({" name" , " arguments" })},
1504
- }));
1505
- tool_call_alts.push_back (builder.add_rule (
1506
- name + " -function-tag" ,
1507
- " \" <function\" ( \" =" + name + " \" | \" name=\\\" " + name + " \\\"\" ) \" >\" space " +
1508
- builder.add_schema (name + " -args" , parameters) + " "
1509
- " \" </function>\" space" ));
1514
+ if (!inputs.tools .is_null ()) {
1515
+ // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1516
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1517
+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1518
+ std::vector<std::string> tool_rules;
1519
+ std::vector<std::string> tool_call_alts;
1520
+ std::vector<std::string> escaped_names;
1521
+ foreach_function (inputs.tools , [&](const json & tool) {
1522
+ const auto & function = tool.at (" function" );
1523
+ std::string name = function.at (" name" );
1524
+ auto parameters = function.at (" parameters" );
1525
+ builder.resolve_refs (parameters);
1526
+ tool_rules.push_back (builder.add_schema (name + " -call" , {
1527
+ {" type" , " object" },
1528
+ {" properties" , json {
1529
+ {" name" , json {{" const" , name}}},
1530
+ {" arguments" , parameters},
1531
+ }},
1532
+ {" required" , json::array ({" name" , " arguments" })},
1533
+ }));
1534
+ tool_call_alts.push_back (builder.add_rule (
1535
+ name + " -function-tag" ,
1536
+ " \" <function\" ( \" =" + name + " \" | \" name=\\\" " + name + " \\\"\" ) \" >\" space " +
1537
+ builder.add_schema (name + " -args" , parameters) + " "
1538
+ " \" </function>\" space" ));
1510
1539
1511
- data.grammar_triggers .push_back ({
1512
- COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1513
- " <function=" + name + " >" ,
1540
+ data.grammar_triggers .push_back ({
1541
+ COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1542
+ " <function=" + name + " >" ,
1543
+ });
1544
+ auto escaped_name = regex_escape (name);
1545
+ data.grammar_triggers .push_back ({
1546
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1547
+ " <function\\ s+name\\ s*=\\ s*\" " + escaped_name + " \" " ,
1548
+ });
1549
+ escaped_names.push_back (escaped_name);
1514
1550
});
1515
- auto escaped_name = regex_escape (name);
1551
+ auto any_tool_call = builder.add_rule (" any_tool_call" , " ( " + string_join (tool_rules, " | " ) + " ) space" );
1552
+ std::vector<std::string> alt_tags {
1553
+ any_tool_call,
1554
+ " \" <tool_call>\" space " + any_tool_call + " \" </tool_call>\" " ,
1555
+ // The rest is just to accommodate common "good bad" outputs.
1556
+ " \" <function_call>\" space " + any_tool_call + " \" </function_call>\" " ,
1557
+ " \" <response>\" space " + any_tool_call + " \" </response>\" " ,
1558
+ " \" <tools>\" space " + any_tool_call + " \" </tools>\" " ,
1559
+ " \" <json>\" space " + any_tool_call + " \" </json>\" " ,
1560
+ " \" <xml>\" space " + any_tool_call + " \" </xml>\" " ,
1561
+ " \" <JSON>\" space " + any_tool_call + " \" </JSON>\" " ,
1562
+ };
1563
+ auto wrappable_tool_call = builder.add_rule (" wrappable_tool_call" , " ( " + string_join (alt_tags, " | " ) + " ) space" );
1564
+ tool_call_alts.push_back (wrappable_tool_call);
1565
+ tool_call_alts.push_back (
1566
+ " ( \" ```\\ n\" | \" ```json\\ n\" | \" ```xml\\ n\" ) space " + wrappable_tool_call + " space \" ```\" space " );
1567
+ auto tool_call = builder.add_rule (" tool_call" , string_join (tool_call_alts, " | " ));
1568
+ builder.add_rule (" root" ,
1569
+ std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1570
+ (inputs.parallel_tool_calls ? " (" + tool_call + " )+" : tool_call));
1571
+ // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
1516
1572
data.grammar_triggers .push_back ({
1517
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1518
- " <function\\ s+name\\ s*=\\ s*\" " + escaped_name + " \" " ,
1573
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1574
+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1575
+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1576
+ std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) + (
1577
+ " (\\ s*"
1578
+ " (?:<tool_call>"
1579
+ " |<function"
1580
+ " |(?:```(?:json|xml)?\n\\ s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
1581
+ " \\ s*\\ {\\ s*\" name\"\\ s*:\\ s*\" (?:" + string_join (escaped_names, " |" ) + " )\" "
1582
+ " )"
1583
+ " )[\\ s\\ S]*"
1584
+ ),
1519
1585
});
1520
- escaped_names.push_back (escaped_name);
1521
- });
1522
- auto any_tool_call = builder.add_rule (" any_tool_call" , " ( " + string_join (tool_rules, " | " ) + " ) space" );
1523
- std::vector<std::string> alt_tags {
1524
- any_tool_call,
1525
- " \" <tool_call>\" space " + any_tool_call + " \" </tool_call>\" " ,
1526
- // The rest is just to accommodate common "good bad" outputs.
1527
- " \" <function_call>\" space " + any_tool_call + " \" </function_call>\" " ,
1528
- " \" <response>\" space " + any_tool_call + " \" </response>\" " ,
1529
- " \" <tools>\" space " + any_tool_call + " \" </tools>\" " ,
1530
- " \" <json>\" space " + any_tool_call + " \" </json>\" " ,
1531
- " \" <xml>\" space " + any_tool_call + " \" </xml>\" " ,
1532
- " \" <JSON>\" space " + any_tool_call + " \" </JSON>\" " ,
1533
- };
1534
- auto wrappable_tool_call = builder.add_rule (" wrappable_tool_call" , " ( " + string_join (alt_tags, " | " ) + " ) space" );
1535
- tool_call_alts.push_back (wrappable_tool_call);
1536
- tool_call_alts.push_back (
1537
- " ( \" ```\\ n\" | \" ```json\\ n\" | \" ```xml\\ n\" ) space " + wrappable_tool_call + " space \" ```\" space " );
1538
- auto tool_call = builder.add_rule (" tool_call" , string_join (tool_call_alts, " | " ));
1539
- builder.add_rule (" root" ,
1540
- std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1541
- (inputs.parallel_tool_calls ? " (" + tool_call + " )+" : tool_call));
1542
- // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
1543
- data.grammar_triggers .push_back ({
1544
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1545
- // If thinking_forced_open, then we capture the </think> tag in the grammar,
1546
- // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1547
- std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) + (
1548
- " (\\ s*"
1549
- " (?:<tool_call>"
1550
- " |<function"
1551
- " |(?:```(?:json|xml)?\n\\ s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
1552
- " \\ s*\\ {\\ s*\" name\"\\ s*:\\ s*\" (?:" + string_join (escaped_names, " |" ) + " )\" "
1553
- " )"
1554
- " )[\\ s\\ S]*"
1555
- ),
1586
+ data.preserved_tokens = {
1587
+ " <think>" ,
1588
+ " </think>" ,
1589
+ " <tool_call>" ,
1590
+ " </tool_call>" ,
1591
+ " <function" ,
1592
+ " <tools>" ,
1593
+ " </tools>" ,
1594
+ " <response>" ,
1595
+ " </response>" ,
1596
+ " <function_call>" ,
1597
+ " </function_call>" ,
1598
+ " <json>" ,
1599
+ " </json>" ,
1600
+ " <JSON>" ,
1601
+ " </JSON>" ,
1602
+ " ```" ,
1603
+ " ```json" ,
1604
+ " ```xml" ,
1605
+ };
1556
1606
});
1557
- data.preserved_tokens = {
1558
- " <think>" ,
1559
- " </think>" ,
1560
- " <tool_call>" ,
1561
- " </tool_call>" ,
1562
- " <function" ,
1563
- " <tools>" ,
1564
- " </tools>" ,
1565
- " <response>" ,
1566
- " </response>" ,
1567
- " <function_call>" ,
1568
- " </function_call>" ,
1569
- " <json>" ,
1570
- " </json>" ,
1571
- " <JSON>" ,
1572
- " </JSON>" ,
1573
- " ```" ,
1574
- " ```json" ,
1575
- " ```xml" ,
1576
- };
1577
- });
1607
+ }
1578
1608
1579
1609
return data;
1580
1610
}
@@ -1688,6 +1718,7 @@ static common_chat_params common_chat_templates_apply_jinja(
1688
1718
params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages , /* concat_text= */ !tmpl.original_caps ().requires_typed_content );
1689
1719
params.add_generation_prompt = inputs.add_generation_prompt ;
1690
1720
params.tool_choice = inputs.tool_choice ;
1721
+ params.enable_thinking = inputs.enable_thinking ;
1691
1722
params.grammar = inputs.grammar ;
1692
1723
params.now = inputs.now ;
1693
1724
if (!inputs.json_schema .empty ()) {
@@ -1721,7 +1752,7 @@ static common_chat_params common_chat_templates_apply_jinja(
1721
1752
}
1722
1753
1723
1754
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1724
- if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null () && params. tools . is_array () && params. json_schema . is_null () ) {
1755
+ if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null ()) {
1725
1756
return common_chat_params_init_hermes_2_pro (tmpl, params);
1726
1757
}
1727
1758
@@ -1840,7 +1871,7 @@ static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
1840
1871
}
1841
1872
1842
1873
static void common_chat_parse (common_chat_msg_parser & builder) {
1843
- LOG_DBG (" Parsing input with format %s: %s\n " , common_chat_format_name (builder.syntax ().format ). c_str () , builder.input ().c_str ());
1874
+ LOG_DBG (" Parsing input with format %s: %s\n " , common_chat_format_name (builder.syntax ().format ), builder.input ().c_str ());
1844
1875
1845
1876
switch (builder.syntax ().format ) {
1846
1877
case COMMON_CHAT_FORMAT_CONTENT_ONLY:
0 commit comments