3
3
4
4
server : ServerProcess
5
5
6
+ TIMEOUT_SERVER_START = 15 * 60
7
+ TIMEOUT_HTTP_REQUEST = 60
8
+
6
9
@pytest .fixture (autouse = True )
7
10
def create_server ():
8
11
global server
@@ -107,8 +110,8 @@ def test_completion_with_required_tool_tiny_fast(template_name: str, tool: dict,
107
110
108
111
@pytest .mark .slow
109
112
@pytest .mark .parametrize ("template_name,tool,argument_key" , [
110
- ("meta-llama-Llama-3.1-8B-Instruct" , TEST_TOOL , "success" ),
111
- ("meta-llama-Llama-3.1-8B-Instruct" , PYTHON_TOOL , "code" ),
113
+ ("meta-llama-Llama-3.1-8B-Instruct" , TEST_TOOL , "success" ),
114
+ ("meta-llama-Llama-3.1-8B-Instruct" , PYTHON_TOOL , "code" ),
112
115
("meetkai-functionary-medium-v3.1" , TEST_TOOL , "success" ),
113
116
("meetkai-functionary-medium-v3.1" , PYTHON_TOOL , "code" ),
114
117
("meetkai-functionary-medium-v3.2" , TEST_TOOL , "success" ),
@@ -131,44 +134,43 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
131
134
132
135
133
136
@pytest .mark .slow
134
- @pytest .mark .parametrize ("tool,argument_key,hf_repo,hf_file, template_override" , [
135
- (TEST_TOOL , "success" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct- Q4_K_M.gguf " , None ),
136
- (PYTHON_TOOL , "code" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct- Q4_K_M.gguf " , None ),
137
- (TEST_TOOL , "success" , "bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it- Q4_K_M.gguf" , None ),
138
- (PYTHON_TOOL , "code" , "bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it- Q4_K_M.gguf" , None ),
139
- (TEST_TOOL , "success" , "bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct- Q4_K_M.gguf" , None ),
140
- (PYTHON_TOOL , "code" , "bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct- Q4_K_M.gguf" , None ),
141
- (TEST_TOOL , "success" , "bartowski/Qwen2.5-7B-Instruct-GGUF" , "Qwen2.5-7B-Instruct- Q4_K_M.gguf" , None ),
142
- (PYTHON_TOOL , "code" , "bartowski/Qwen2.5-7B-Instruct-GGUF" , "Qwen2.5-7B-Instruct- Q4_K_M.gguf" , None ),
143
- (TEST_TOOL , "success" , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF" , "Hermes-2-Pro-Llama-3-8B- Q4_K_M.gguf " , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
144
- (PYTHON_TOOL , "code" , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF" , "Hermes-2-Pro-Llama-3-8B- Q4_K_M.gguf " , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
145
- (TEST_TOOL , "success" , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF" , "Hermes-3-Llama-3.1-8B. Q4_K_M.gguf" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
146
- (PYTHON_TOOL , "code" , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF" , "Hermes-3-Llama-3.1-8B. Q4_K_M.gguf" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
147
- (TEST_TOOL , "success" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF" , "Mistral-Nemo-Instruct-2407- Q4_K_M.gguf " , None ),
148
- (PYTHON_TOOL , "code" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF" , "Mistral-Nemo-Instruct-2407- Q4_K_M.gguf " , None ),
149
- (TEST_TOOL , "success" , "bartowski/functionary-small-v3.2-GGUF" , "functionary-small-v3.2- Q8_0.gguf" , ("meetkai/functionary-medium-v3.2" , None )),
150
- (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF" , "functionary-small-v3.2- Q8_0.gguf" , ("meetkai/functionary-medium-v3.2" , None )),
151
- (TEST_TOOL , "success" , "bartowski/Llama-3.2-3B-Instruct-GGUF" , "Llama-3.2-3B-Instruct- Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
152
- (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-3B-Instruct-GGUF" , "Llama-3.2-3B-Instruct- Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
153
- (TEST_TOOL , "success" , "bartowski/Llama-3.2-1B-Instruct-GGUF" , "Llama-3.2-1B-Instruct- Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
154
- (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-1B-Instruct-GGUF" , "Llama-3.2-1B-Instruct- Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
137
+ @pytest .mark .parametrize ("tool,argument_key,hf_repo,template_override" , [
138
+ (TEST_TOOL , "success" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF: Q4_K_M" , None ),
139
+ (PYTHON_TOOL , "code" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF: Q4_K_M" , None ),
140
+ (TEST_TOOL , "success" , "bartowski/gemma-2-2b-it-GGUF: Q4_K_M" , None ),
141
+ (PYTHON_TOOL , "code" , "bartowski/gemma-2-2b-it-GGUF: Q4_K_M" , None ),
142
+ (TEST_TOOL , "success" , "bartowski/Phi-3.5-mini-instruct-GGUF: Q4_K_M" , None ),
143
+ (PYTHON_TOOL , "code" , "bartowski/Phi-3.5-mini-instruct-GGUF: Q4_K_M" , None ),
144
+ (TEST_TOOL , "success" , "bartowski/Qwen2.5-7B-Instruct-GGUF: Q4_K_M" , None ),
145
+ (PYTHON_TOOL , "code" , "bartowski/Qwen2.5-7B-Instruct-GGUF: Q4_K_M" , None ),
146
+ (TEST_TOOL , "success" , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
147
+ (PYTHON_TOOL , "code" , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
148
+ (TEST_TOOL , "success" , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1
A3E2
-8B" , "tool_use" )),
149
+ (PYTHON_TOOL , "code" , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
150
+ (TEST_TOOL , "success" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF: Q4_K_M" , None ),
151
+ (PYTHON_TOOL , "code" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF: Q4_K_M" , None ),
152
+ (TEST_TOOL , "success" , "bartowski/functionary-small-v3.2-GGUF: Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
153
+ (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF: Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
154
+ (TEST_TOOL , "success" , "bartowski/Llama-3.2-3B-Instruct-GGUF: Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
155
+ (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-3B-Instruct-GGUF: Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
156
+ (TEST_TOOL , "success" , "bartowski/Llama-3.2-1B-Instruct-GGUF: Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
157
+ (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-1B-Instruct-GGUF: Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
155
158
# TODO: fix these
156
- # (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B- Q4_K_M.gguf ", None),
157
- # (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B- Q4_K_M.gguf ", None),
159
+ # (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF: Q4_K_M", None),
160
+ # (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF: Q4_K_M", None),
158
161
])
159
- def test_completion_with_required_tool_real_model (tool : dict , argument_key : str | None , hf_repo : str , hf_file : str , template_override : Tuple [str , str | None ] | None ):
162
+ def test_completion_with_required_tool_real_model (tool : dict , argument_key : str | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
160
163
n_predict = 512
161
164
server .n_slots = 1
162
165
server .jinja = True
163
166
server .n_ctx = 8192
164
167
server .n_predict = n_predict
165
168
server .model_hf_repo = hf_repo
166
- server .model_hf_file = hf_file
167
169
if template_override :
168
170
(template_hf_repo , template_variant ) = template_override
169
171
server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
170
172
assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
171
- server .start ()
173
+ server .start (timeout_seconds = TIMEOUT_SERVER_START )
172
174
res = server .make_request ("POST" , "/chat/completions" , data = {
173
175
"max_tokens" : n_predict ,
174
176
"messages" : [
@@ -181,7 +183,7 @@ def test_completion_with_required_tool_real_model(tool: dict, argument_key: str
181
183
"temperature" : 0.0 ,
182
184
"top_k" : 1 ,
183
185
"top_p" : 1.0 ,
184
- })
186
+ }, timeout = TIMEOUT_HTTP_REQUEST )
185
187
assert res .status_code == 200 , f"Expected status code 200, got { res .status_code } "
186
188
choice = res .body ["choices" ][0 ]
187
189
tool_calls = choice ["message" ].get ("tool_calls" )
@@ -201,7 +203,7 @@ def do_test_completion_without_tool_call(template_name: str, n_predict: int, too
201
203
server .jinja = True
202
204
server .n_predict = n_predict
203
205
server .chat_template_file = f'../../../models/templates/{ template_name } .jinja'
204
- server .start ()
206
+ server .start (timeout_seconds = TIMEOUT_SERVER_START )
205
207
res = server .make_request ("POST" , "/chat/completions" , data = {
206
208
"max_tokens" : n_predict ,
207
209
"messages" : [
@@ -213,7 +215,7 @@ def do_test_completion_without_tool_call(template_name: str, n_predict: int, too
213
215
"temperature" : 0.0 ,
214
216
"top_k" : 1 ,
215
217
"top_p" : 1.0 ,
216
- })
218
+ }, timeout = TIMEOUT_HTTP_REQUEST )
217
219
assert res .status_code == 200 , f"Expected status code 200, got { res .status_code } "
218
220
choice = res .body ["choices" ][0 ]
219
221
assert choice ["message" ].get ("tool_calls" ) is None , f'Expected no tool call in { choice ["message" ]} '
@@ -245,39 +247,38 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
245
247
246
248
247
249
@pytest .mark .slow
248
- @pytest .mark .parametrize ("hf_repo,hf_file, template_override" , [
249
- ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct- Q4_K_M.gguf " , None ),
250
- ("bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it- Q4_K_M.gguf" , None ),
251
- ("bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct- Q4_K_M.gguf" , None ),
252
- ("bartowski/Qwen2.5-7B-Instruct-GGUF" , "Qwen2.5-7B-Instruct- Q4_K_M.gguf" , None ),
253
- ("NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF" , "Hermes-2-Pro-Llama-3-8B- Q4_K_M.gguf " , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
254
- ("NousResearch/Hermes-3-Llama-3.1-8B-GGUF" , "Hermes-3-Llama-3.1-8B. Q4_K_M.gguf" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
255
- ("bartowski/Mistral-Nemo-Instruct-2407-GGUF" , "Mistral-Nemo-Instruct-2407- Q4_K_M.gguf " , None ),
256
- ("bartowski/functionary-small-v3.2-GGUF" , "functionary-small-v3.2- Q8_0.gguf" , ("meetkai/functionary-medium-v3.2" , None )),
257
- ("bartowski/Llama-3.2-3B-Instruct-GGUF" , "Llama-3.2-3B-Instruct- Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
258
- # ("bartowski/Llama-3.2-1B-Instruct-GGUF", "Llama-3.2-1B-Instruct- Q4_K_M.gguf ", ("meta-llama/Llama-3.2-3B-Instruct", None)),
259
- # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B- Q4_K_M.gguf ", None),
250
+ @pytest .mark .parametrize ("hf_repo,template_override" , [
251
+ ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF: Q4_K_M" , None ),
252
+ ("bartowski/gemma-2-2b-it-GGUF: Q4_K_M" , None ),
253
+ ("bartowski/Phi-3.5-mini-instruct-GGUF: Q4_K_M" , None ),
254
+ ("bartowski/Qwen2.5-7B-Instruct-GGUF: Q4_K_M" , None ),
255
+ ("NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
256
+ ("NousResearch/Hermes-3-Llama-3.1-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
257
+ ("bartowski/Mistral-Nemo-Instruct-2407-GGUF: Q4_K_M" , None ),
258
+ ("bartowski/functionary-small-v3.2-GGUF: Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
259
+ ("bartowski/Llama-3.2-3B-Instruct-GGUF: Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
260
+ # ("bartowski/Llama-3.2-1B-Instruct-GGUF: Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
261
+ # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF: Q4_K_M", None),
260
262
])
261
- def test_weather_tool_call (hf_repo : str , hf_file : str , template_override : Tuple [str , str | None ] | None ):
263
+ def test_weather_tool_call (hf_repo : str , template_override : Tuple [str , str | None ] | None ):
262
264
global server
263
265
server .n_slots = 1
264
266
server .jinja = True
265
267
server .n_ctx = 8192
266
268
server .n_predict = 512
267
269
server .model_hf_repo = hf_repo
268
- server .model_hf_file = hf_file
269
270
if template_override :
270
271
(template_hf_repo , template_variant ) = template_override
271
272
server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
272
273
assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
273
- server .start (timeout_seconds = 15 * 60 )
274
+ server .start (timeout_seconds = TIMEOUT_SERVER_START )
274
275
res = server .make_request ("POST" , "/chat/completions" , data = {
275
276
"max_tokens" : 256 ,
276
277
"messages" : [
277
278
{"role" : "user" , "content" : "What is the weather in Istanbul?" },
278
279
],
279
280
"tools" : [WEATHER_TOOL ],
280
- })
281
+ }, timeout = TIMEOUT_HTTP_REQUEST )
281
282
assert res .status_code == 200 , f"Expected status code 200, got { res .status_code } "
282
283
choice = res .body ["choices" ][0 ]
283
284
tool_calls = choice ["message" ].get ("tool_calls" )
@@ -292,32 +293,31 @@ def test_weather_tool_call(hf_repo: str, hf_file: str, template_override: Tuple[
292
293
293
294
294
295
@pytest .mark .slow
295
- @pytest .mark .parametrize ("expected_arguments_override,hf_repo,hf_file, template_override" , [
296
- (None , "bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it- Q4_K_M.gguf" , None ),
297
- (None , "bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct- Q4_K_M.gguf" , None ),
298
- (None , "bartowski/functionary-small-v3.2-GGUF" , "functionary-small-v3.2- Q8_0.gguf" , ("meetkai-functionary-medium-v3.2" , None )),
299
- ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct- Q4_K_M.gguf " , None ),
300
- (None , "bartowski/Llama-3.2-1B-Instruct-GGUF" , "Llama-3.2-1B-Instruct- Q4_K_M.gguf" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
301
- ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF" , "Llama-3.2-3B-Instruct- Q4_K_M.gguf" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
302
- (None , "bartowski/Qwen2.5-7B-Instruct-GGUF" , "Qwen2.5-7B-Instruct- Q4_K_M.gguf" , None ),
303
- (None , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF" , "Hermes-2-Pro-Llama-3-8B- Q4_K_M.gguf" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
304
- (None , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF" , "Hermes-3-Llama-3.1-8B. Q4_K_M.gguf" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
305
- (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF" , "Mistral-Nemo-Instruct-2407- Q4_K_M.gguf " , None ),
306
- # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B- Q4_K_M.gguf ", None),
296
+ @pytest .mark .parametrize ("expected_arguments_override,hf_repo,template_override" , [
297
+ (None , "bartowski/gemma-2-2b-it-GGUF: Q4_K_M" , None ),
298
+ (None , "bartowski/Phi-3.5-mini-instruct-GGUF: Q4_K_M" , None ),
299
+ (None , "bartowski/functionary-small-v3.2-GGUF: Q8_0" , ("meetkai-functionary-medium-v3.2" , None )),
300
+ ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF: Q4_K_M" , None ),
301
+ (None , "bartowski/Llama-3.2-1B-Instruct-GGUF: Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
302
+ ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF: Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
303
+ (None , "bartowski/Qwen2.5-7B-Instruct-GGUF: Q4_K_M" , None ),
304
+ (None , "NousResearch/Hermes-2-Pro-Llama-3-8B: Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
305
+ (None , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF: Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
306
+ (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF: Q4_K_M" , None ),
307
+ # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF: Q4_K_M", None),
307
308
])
308
- def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , hf_file : str , template_override : Tuple [str , str | None ] | None ):
309
+ def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
309
310
global server
310
311
server .n_slots = 1
311
312
server .jinja = True
312
313
server .n_ctx = 8192
313
314
AED3
server .n_predict = 128
314
315
server .model_hf_repo = hf_repo
315
- server .model_hf_file = hf_file
316
316
if template_override :
317
317
(template_hf_repo , template_variant ) = template_override
318
318
server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
319
319
assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
320
- server .start (timeout_seconds = 15 * 60 )
320
+ server .start (timeout_seconds = TIMEOUT_SERVER_START )
321
321
res = server .make_request ("POST" , "/chat/completions" , data = {
322
322
"max_tokens" : 256 ,
323
323
"messages" : [
@@ -329,7 +329,7 @@ def test_hello_world_tool_call(expected_arguments_override: str | None, hf_repo:
329
329
"temperature" : 0.0 ,
330
330
"top_k" : 1 ,
331
331
"top_p" : 1.0 ,
332
- })
332
+ }, timeout = TIMEOUT_HTTP_REQUEST )
333
333
assert res .status_code == 200 , f"Expected status code 200, got { res .status_code } "
334
334
choice = res .body ["choices" ][0 ]
335
335
tool_calls = choice ["message" ].get ("tool_calls" )
0 commit comments