8000 Update server params · GPTprojects/llama-cpp-python@d9bce17 · GitHub
[go: up one dir, main page]

Skip to content

Commit d9bce17

Browse files
committed
Update server params
1 parent 3720c73 commit d9bce17

File tree

1 file changed

+9
-7
lines changed

1 file changed

+9
-7
lines changed

llama_cpp/server/app.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import numpy.typing as npt
2828

2929

30+
# Disable warning for model and model_alias settings
3031
BaseSettings.model_config['protected_namespaces'] = ()
3132

3233

@@ -58,14 +59,10 @@ class Settings(BaseSettings):
5859
description="Split layers across multiple GPUs in proportion.",
5960
)
6061
rope_freq_base: float = Field(
61-
default=10000, ge=1, description="RoPE base frequency"
62+
default=0.0, description="RoPE base frequency"
6263
)
6364
rope_freq_scale: float = Field(
64-
default=1.0, description="RoPE frequency scaling factor"
65-
)
66-
low_vram: bool = Field(
67-
default=False,
68-
description="Whether to use less VRAM. This will reduce performance.",
65+
default=0.0, description="RoPE frequency scaling factor"
6966
)
7067
mul_mat_q: bool = Field(
7168
default=True, description="if true, use experimental mul_mat_q kernels"
@@ -106,6 +103,10 @@ class Settings(BaseSettings):
106103
default=False,
107104
description="Enable NUMA support.",
108105
)
106+
chat_format: str = Field(
107+
default="llama-2",
108+
description="Chat format to use.",
109+
)
109110
cache: bool = Field(
110111
default=False,
111112
description="Use a cache to reduce processing times for evaluated prompts.",
@@ -349,7 +350,6 @@ def create_app(settings: Optional[Settings] = None):
349350
tensor_split=settings.tensor_split,
350351
rope_freq_base=settings.rope_freq_base,
351352
rope_freq_scale=settings.rope_freq_scale,
352-
low_vram=settings.low_vram,
353353
mul_mat_q=settings.mul_mat_q,
354354
f16_kv=settings.f16_kv,
355355
logits_all=settings.logits_all,
@@ -361,6 +361,8 @@ def create_app(settings: Optional[Settings] = None):
361361
last_n_tokens_size=settings.last_n_tokens_size,
362362
lora_base=settings.lora_base,
363363
lora_path=settings.lora_path,
364+
numa=settings.numa,
365+
chat_format=settings.chat_format,
364366
verbose=settings.verbose,
365367
)
366368
if settings.cache:

0 commit comments

Comments
 (0)
0