8000 Allow user to set llama config from env vars · coderonion/llama-cpp-python@ffe34cf · GitHub
[go: up one dir, main page]

Skip to content

Commit ffe34cf

Browse files
committed
Allow user to set llama config from env vars
1 parent 05eb208 commit ffe34cf

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

examples/fastapi_server.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
Then visit http://localhost:8000/docs to see the interactive API docs.
1212
1313
"""
14+
import os
1415
import json
1516
from typing import List, Optional, Literal, Union, Iterator
1617

@@ -24,6 +25,13 @@
2425

2526
class Settings(BaseSettings):
2627
model: str
28+
n_ctx: int = 2048
29+
n_batch: int = 2048
30+
n_threads: int = os.cpu_count() or 1
31+
f16_kv: bool = True
32+
use_mlock: bool = True
33+
embedding: bool = True
34+
last_n_tokens_size: int = 64
2735

2836

2937
app = FastAPI(
@@ -40,12 +48,13 @@ class Settings(BaseSettings):
4048
settings = Settings()
4149
llama = llama_cpp.Llama(
4250
settings.model,
43-
f16_kv=True,
44-
use_mlock=True,
45-
embedding=True,
46-
n_threads=6,
47-
n_batch=2048,
48-
n_ctx=2048,
51+
f16_kv=settings.f16_kv,
52+
use_mlock=settings.use_mlock,
53+
embedding=settings.embedding,
54+
n_threads=settings.n_threads,
55+
n_batch=settings.n_batch,
56+
n_ctx=settings.n_ctx,
57+
last_n_tokens_size=settings.last_n_tokens_size,
4958
)
5059

5160

0 commit comments

Comments
 (0)
0