@@ -312,10 +312,14 @@ async def create_completion(
312
312
else :
313
313
kwargs ["logits_processor" ].extend (_min_tokens_logits_processor )
314
314
315
- iterator_or_completion : Union [
316
- llama_cpp .CreateCompletionResponse ,
317
- Iterator [llama_cpp .CreateCompletionStreamResponse ],
318
- ] = await run_in_threadpool (llama , ** kwargs )
315
+ try :
316
+ iterator_or_completion : Union [
317
+ llama_cpp .CreateCompletionResponse ,
318
+ Iterator [llama_cpp .CreateCompletionStreamResponse ],
319
+ ] = await run_in_threadpool (llama , ** kwargs )
320
+ except Exception as err :
321
+ exit_stack .close ()
322
+ raise err
319
323
320
324
if isinstance (iterator_or_completion , Iterator ):
321
325
# EAFP: It's easier to ask for forgiveness than permission
@@ -504,9 +508,13 @@ async def create_chat_completion(
504
508
else :
505
509
kwargs ["logits_processor" ].extend (_min_tokens_logits_processor )
506
510
507
- iterator_or_completion : Union [
508
- llama_cpp .ChatCompletion , Iterator [llama_cpp .ChatCompletionChunk ]
509
- ] = await run_in_threadpool (llama .create_chat_completion , ** kwargs )
511
+ try :
512
+ iterator_or_completion : Union [
513
+ llama_cpp .ChatCompletion , Iterator [llama_cpp .ChatCompletionChunk ]
514
+ ] = await run_in_threadpool (llama .create_chat_completion , ** kwargs )
515
+ except Exception as err :
516
+ exit_stack .close ()
517
+ raise err
510
518
511
519
if isinstance (iterator_or_completion , Iterator ):
512
520
# EAFP: It's easier to ask for forgiveness than permission
0 commit comments