8000 `server`: add `--reasoning-budget 0` to disable thinking (incl. qwen3 w/ enable_thinking:false) by ochafik · Pull Request #13771 · ggml-org/llama.cpp · GitHub
[go: up one dir, main page]

Skip to content

server: add --reasoning-budget 0 to disable thinking (incl. qwen3 w/ enable_thinking:false) #13771

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
May 25, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
server: fix/test add_generation_prompt
  • Loading branch information
ochafik committed May 25, 2025
commit 8a25f79620d9bd3b4e5594cd167a06fbd2573d32
25 changes: 25 additions & 0 deletions tools/server/tests/unit/test_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,28 @@ def test_date_inside_prompt(template_name: str, format: str, tools: list[dict]):

today_str = datetime.date.today().strftime(format)
assert today_str in prompt, f"Expected today's date ({today_str}) in content ({prompt})"


@pytest.mark.parametrize("add_generation_prompt", [False, True])
@pytest.mark.parametrize("template_name,expected_generation_prompt", [
("meta-llama-Llama-3.3-70B-Instruct", "<|start_header_id|>assistant<|end_header_id|>"),
])
def test_add_generation_prompt(template_name: str, expected_generation_prompt: str, add_generation_prompt: bool):
global server
server.jinja = True
server.chat_template_file = f'../../../models/templates/{template_name}.jinja'
server.start(timeout_seconds=TIMEOUT_SERVER_START)

res = server.make_request("POST", "/apply-template", data={
"messages": [
{"role": "user", "content": "What is today?"},
],
"add_generation_prompt": add_generation_prompt,
})
assert res.status_code == 200
prompt = res.body["prompt"]

if add_generation_prompt:
assert expected_generation_prompt in prompt, f"Expected generation prompt ({expected_generation_prompt}) in content ({prompt})"
else:
assert expected_generation_prompt not in prompt, f"Did not expect generation prompt ({expected_generation_prompt}) in content ({prompt})"
1 change: 1 addition & 0 deletions tools/server/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,7 @@ static json oaicompat_chat_params_parse(
inputs.grammar = grammar;
inputs.use_jinja = opt.use_jinja;
inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true);
inputs.reasoning_format = opt.reasoning_format;
if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && body.contains("grammar")) {
throw std::runtime_error("Cannot use custom grammar constraints with tools.");
Expand Down
Loading
0