8000 Adds timer to avoid saturating CPU usage · isabella232/llama-cpp-python@9c0820d · GitHub
[go: up one dir, main page]

Skip to content

Commit 9c0820d

Browse files
author
juanroesel
committed
Adds timer to avoid saturating CPU usage
1 parent b4083cf commit 9c0820d

File tree

2 files changed

+9
-10
lines changed

2 files changed

+9
-10
lines changed

llama_cpp/_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,11 +153,10 @@ async def monitor_task_queue(status_dict: Dict[str, Union[int, float]]):
153153
a shared status dictionary with the number of tasks that have not
154154
started and the number of tasks that are currently running.
155155
It recursively calls itself to continuously monitor the task queue.
156-
NOTE: There will always be 4 tasks running in the task queue:
156+
NOTE: There will always be 3 tasks running in the task queue:
157157
- LifespanOn.main: Main application coroutine
158158
- Server.serve: Server coroutine
159159
- monitor_task_queue: Task queue monitoring coroutine
160-
- RequestReponseCycle.run_asgi: ASGI single cycle coroutine
161160
Any upcoming requests will be added to the task queue in the form of
162161
another RequestReponseCycle.run_asgi coroutine.
163162
"""
@@ -175,6 +174,8 @@ async def monitor_task_queue(status_dict: Dict[str, Union[int, float]]):
175174
for task in all_tasks
176175
}
177176

177+
await asyncio.sleep(5) # adds a delay of 5 seconds to avoid overloading the CPU
178+
178179
asyncio.create_task(
179180
monitor_task_queue(status_dict)
180181
) # pass status_dict to the next task

llama_cpp/server/app.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -246,22 +246,20 @@ async def authenticate(
246246
response_model=HealthMetrics,
247247
summary="Server's health",
248248
)
249-
async def check_health(
250-
251-
):
252-
# 4 running tasks + new scheduled request
253-
if 0 <= task_queue_status.get("running_tasks_count", 0) <= 5:
249+
async def check_health():
250+
# 3 running tasks + new scheduled request
251+
if 0 <= task_queue_status.get("running_tasks_count", 0) <= 4:
254252
return JSONResponse(
255253
content={"status": "OK", "task_queue_status": task_queue_status}
256254
)
257-
# 1 - 6 scheduled requests
258-
elif 5 < task_queue_status.get("running_tasks_count", 0) <= 10:
255+
# 2 - 6 scheduled requests
256+
elif 4 < task_queue_status.get("running_tasks_count", 0) < 10:
259257
return JSONResponse(
260258
content={"status": "Warning", "task_queue_status": task_queue_status}
261259
)
262260
# 7+ scheduled requests
263261
# TODO: Evaluate if in this case we should manually stop the execution of certain tasks to clear the queue
264-
elif task_queue_status.get("running_tasks_count", 0) > 10:
262+
elif task_queue_status.get("running_tasks_count", 0) >= 10:
265263
return JSONResponse(
266264
content={"status": "Critical", "task_queue_status": task_queue_status}
267265
)

0 commit comments

Comments
 (0)
0