@@ -326,7 +326,7 @@ async def create_completion(
326
326
def iterator () -> Iterator [llama_cpp .CreateCompletionStreamResponse ]:
327
327
yield first_response
328
328
yield from iterator_or_completion
329
- exit_stack .close ()
329
+ exit_stack .aclose ()
330
330
331
331
send_chan , recv_chan = anyio .create_memory_object_stream (10 )
332
332
return EventSourceResponse (
@@ -336,12 +336,13 @@ def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]:
336
336
request = request ,
337
337
inner_send_chan = send_chan ,
338
338
iterator = iterator (),
339
- on_complete = exit_stack .close ,
339
+ on_complete = exit_stack .aclose ,
340
340
),
341
341
sep = "\n " ,
342
342
ping_message_factory = _ping_message_factory ,
343
343
)
344
344
else :
345
+ await exit_stack .aclose ()
345
346
return iterator_or_completion
346
347
347
348
@@ -517,7 +518,7 @@ async def create_chat_completion(
517
518
def iterator () -> Iterator [llama_cpp .ChatCompletionChunk ]:
518
519
yield first_response
519
520
yield from iterator_or_completion
520
- exit_stack .close ()
521
+ exit_stack .aclose ()
521
522
522
523
send_chan , recv_chan = anyio .create_memory_object_stream (10 )
523
524
return EventSourceResponse (
@@ -527,13 +528,13 @@ def iterator() -> Iterator[llama_cpp.ChatCompletionChunk]:
527
528
request = request ,
528
529
inner_send_chan = send_chan ,
529
530
iterator = iterator (),
530
- on_complete = exit_stack .close ,
531
+ on_complete = exit_stack .aclose ,
531
532
),
532
533
sep = "\n " ,
533
534
ping_message_factory = _ping_message_factory ,
534
535
)
535
536
else :
536
- exit_stack .close ()
537
+
471F
await exit_stack .aclose ()
537
538
return iterator_or_completion
538
539
539
540
0 commit comments