@@ -2322,7 +2322,7 @@ def generate_streaming(tools, functions, function_call, prompt):
2322
2322
prompt = prompt
2323
2323
stops = ["\n " , END_ASSISTANT_TOKEN ]
2324
2324
2325
- completion = create_completion (stop = stops )
2325
+ completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
2326
2326
completion_text = completion ["choices" ][0 ]["text" ]
2327
2327
completion_tokens += completion ["usage" ]["completion_tokens" ]
2328
2328
@@ -2349,7 +2349,7 @@ def generate_streaming(tools, functions, function_call, prompt):
2349
2349
completion_text .split (START_FUNCTION_CALL_TOKEN )[- 1 ][:- 1 ].strip ()
2350
2350
)
2351
2351
grammar = get_grammar (function_calls [- 1 ])
2352
- completion = create_completion (stop = END_FUNCTION_CALL_TOKEN )
2352
+ completion = create_completion (prompt = prompt , stop = END_FUNCTION_CALL_TOKEN , grammar = grammar )
2353
2353
completion_tokens += completion ["usage" ]["completion_tokens" ]
2354
2354
function_bodies .append (completion ["choices" ][0 ]["text" ].strip ())
2355
2355
# If the prompt involves a function call, just append generated parameters to function_bodies
@@ -2363,7 +2363,7 @@ def generate_streaming(tools, functions, function_call, prompt):
2363
2363
function_calls .append (function_call )
2364
2364
grammar = get_grammar (function_call )
2365
2365
stops = [STOP_TOKEN , FROM_TOKEN ]
2366
- completion = create_completion (stop = stops )
2366
+ completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
2367
2367
completion_text = completion ["choices" ][0 ]["text" ]
2368
2368
completion_tokens += completion ["usage" ]["completion_tokens" ]
2369
2369
function_bodies .append (completion_text .strip ())
@@ -2373,7 +2373,7 @@ def generate_streaming(tools, functions, function_call, prompt):
2373
2373
# Generate function name first
2374
2374
grammar = None
2375
2375
stops = CONTENT_TOKEN
2376
- completion = create_completion (stop = stops )
2376
+ completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
2377
2377
completion_text = completion ["choices" ][0 ]["text" ]
2378
2378
completion_tokens += completion ["usage" ]["completion_tokens" ]
2379
2379
function_name = completion_text .strip ()
@@ -2386,7 +2386,7 @@ def generate_streaming(tools, functions, function_call, prompt):
2386
2386
grammar = get_grammar (function_call )
2387
2387
# Generate content
2388
2388
stops = [RECIPIENT_TOKEN , STOP_TOKEN ]
2389
- completion = create_completion (stop = stops )
2389
+ completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
2390
2390
completion_text = completion ["choices" ][0 ]["text" ]
2391
2391
completion_tokens += completion ["usage" ]["completion_tokens" ]
2392
2392
if function_name == "all" :
@@ -2413,7 +2413,7 @@ def generate_streaming(tools, functions, function_call, prompt):
2413
2413
# Check whether the model wants to generate another turn
2414
2414
prompt += completion_text .strip ()
2415
2415
grammar = None
2416
- completion = create_completion (stop = stops )
2416
+ completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
2417
2417
completion_tokens += completion ["usage" ]["completion_tokens" ]
2418
2418
if "<|from|> assistant" in completion ["choices" ][0 ]["text" ] or "<|from|>assistant" in completion ["choices" ][0 ]["text" ]:
2419
2419
prompt += "\n <|from|>assistant\n <|recipient|>"
@@ -3564,4 +3564,4 @@ def chatml_function_calling(
3564
3564
},
3565
3565
}
3566
3566
3567
- raise ValueError ("Automatic streaming tool choice is not supported" )
3567
+ raise ValueError ("Automatic streaming tool choice is not supported" )
0 commit comments