@@ -85,6 +85,10 @@ class Settings(BaseSettings):
85
85
port : int = Field (
86
86
default = 8000 , description = "Listen port"
87
87
)
88
+ interrupt_requests : bool = Field (
89
+ default = True ,
90
+ description = "Whether to interrupt requests when a new request is received." ,
91
+ )
88
92
89
93
90
94
router = APIRouter ()
@@ -379,7 +383,7 @@ async def event_publisher(inner_send_chan: MemoryObjectSendStream):
379
383
await inner_send_chan .send (dict (data = json .dumps (chunk )))
380
384
if await request .is_disconnected ():
381
385
raise anyio .get_cancelled_exc_class ()()
382
- if llama_outer_lock .locked ():
386
+ if settings . interrupt_requests and llama_outer_lock .locked ():
383
387
await inner_send_chan .send (dict (data = "[DONE]" ))
384
388
raise anyio .get_cancelled_exc_class ()()
385
389
await inner_send_chan .send (dict (data = "[DONE]" ))
@@ -486,6 +490,7 @@ async def create_chat_completion(
486
490
request : Request ,
487
491
body : CreateChatCompletionRequest ,
488
492
llama : llama_cpp .Llama = Depends (get_llama ),
493
+ settings : Settings = Depends (get_settings ),
489
494
) -> Union [llama_cpp .ChatCompletion , EventSourceResponse ]:
490
495
exclude = {
491
496
"n" ,
@@ -511,7 +516,7 @@ async def event_publisher(inner_send_chan: MemoryObjectSendStream):
511
516
await inner_send_chan .send (dict (data = json .dumps (chat_chunk )))
512
517
if await request .is_disconnected ():
513
518
raise anyio .get_cancelled_exc_class ()()
514
- if llama_outer_lock .locked ():
519
+ if settings . interrupt_requests and llama_outer_lock .locked ():
515
520
await inner_send_chan .send (dict (data = "[DONE]" ))
516
521
raise anyio .get_cancelled_exc_class ()()
517
522
await inner_send_chan .send (dict (data = "[DONE]" ))
0 commit comments