8000 Merge branch 'main' into fix/server_llama_call_thread_starvation · gjpower/llama-cpp-python@9e0728b · GitHub
[go: up one dir, main page]

Skip to content

Commit 9e0728b

Browse files
authored
Merge branch 'main' into fix/server_llama_call_thread_starvation
2 parents ad35fc1 + 073b7e4 commit 9e0728b

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

llama_cpp/server/app.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -312,10 +312,14 @@ async def create_completion(
312312
else:
313313
kwargs["logits_processor"].extend(_min_tokens_logits_processor)
314314

315-
iterator_or_completion: Union[
316-
llama_cpp.CreateCompletionResponse,
317-
Iterator[llama_cpp.CreateCompletionStreamResponse],
318-
] = await run_in_threadpool(llama, **kwargs)
315+
try:
316+
iterator_or_completion: Union[
317+
llama_cpp.CreateCompletionResponse,
318+
Iterator[llama_cpp.CreateCompletionStreamResponse],
319+
] = await run_in_threadpool(llama, **kwargs)
320+
except Exception as err:
321+
exit_stack.close()
322+
raise err
319323

320324
if isinstance(iterator_or_completion, Iterator):
321325
# EAFP: It's easier to ask for forgiveness than permission
@@ -504,9 +508,13 @@ async def create_chat_completion(
504508
else:
505509
kwargs["logits_processor"].extend(_min_tokens_logits_processor)
506510

507-
iterator_or_completion: Union[
508-
llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk]
509-
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
511+
try:
512+
iterator_or_completion: Union[
513+
llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk]
514+
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
515+
except Exception as err:
516+
exit_stack.close()
517+
raise err
510518

511519
if isinstance(iterator_or_completion, Iterator):
512520
# EAFP: It's easier to ask for forgiveness than permission

llama_cpp/server/errors.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,6 @@ def error_message_wrapper(
134134
] = None,
135135
) -> Tuple[int, ErrorResponse]:
136136
"""Wraps error message in OpenAI style error response"""
137-
print(f"Exception: {str(error)}", file=sys.stderr)
138-
traceback.print_exc(file=sys.stderr)
139137
if body is not None and isinstance(
140138
body,
141139
(
@@ -149,6 +147,10 @@ def error_message_wrapper(
149147
if match is not None:
150148
return callback(body, match)
151149

150+
# Only print the trace on unexpected exceptions
151+
print(f"Exception: {str(error)}", file=sys.stderr)
152+
traceback.print_exc(file=sys.stderr)
153+
152154
# Wrap other errors as internal server error
153155
return 500, ErrorResponse(
154156
message=str(error),

0 commit comments

Comments
 (0)
0