27
27
import numpy .typing as npt
28
28
29
29
30
+ # Disable warning for model and model_alias settings
30
31
BaseSettings .model_config ['protected_namespaces' ] = ()
31
32
32
33
@@ -58,14 +59,10 @@ class Settings(BaseSettings):
58
59
description = "Split layers across multiple GPUs in proportion." ,
59
60
)
60
61
rope_freq_base : float = Field (
61
- default = 10000 , ge = 1 , description = "RoPE base frequency"
62
+ default = 0.0 , description = "RoPE base frequency"
62
63
)
63
64
rope_freq_scale : float = Field (
64
- default = 1.0 , description = "RoPE frequency scaling factor"
65
- )
66
- low_vram : bool = Field (
67
- default = False ,
68
- description = "Whether to use less VRAM. This will reduce performance." ,
65
+ default = 0.0 , description = "RoPE frequency scaling factor"
69
66
)
70
67
mul_mat_q : bool = Field (
71
68
default = True , description = "if true, use experimental mul_mat_q kernels"
@@ -106,6 +103,10 @@ class Settings(BaseSettings):
106
103
default = False ,
107
104
description = "Enable NUMA support." ,
108
105
)
106
+ chat_format : str = Field (
107
+ default = "llama-2" ,
108
+ description = "Chat format to use." ,
109
+ )
109
110
cache : bool = Field (
110
111
default = False ,
111
112
description = "Use a cache to reduce processing times for evaluated prompts." ,
@@ -349,7 +350,6 @@ def create_app(settings: Optional[Settings] = None):
349
350
tensor_split = settings .tensor_split ,
350
351
rope_freq_base = settings .rope_freq_base ,
351
352
rope_freq_scale = settings .rope_freq_scale ,
352
- low_vram = settings .low_vram ,
353
353
mul_mat_q = settings .mul_mat_q ,
354
354
f16_kv = settings .f16_kv ,
355
355
logits_all = settings .logits_all ,
@@ -361,6 +361,8 @@ def create_app(settings: Optional[Settings] = None):
361
361
last_n_tokens_size = settings .last_n_tokens_size ,
362
362
lora_base = settings .lora_base ,
363
363
lora_path = settings .lora_path ,
364
+ numa = settings .numa ,
365
+ chat_format = settings .chat_format ,
364
366
verbose = settings .verbose ,
365
367
)
366
368
if settings .cache :
0 commit comments