8000 expose RoPE param to server start · MobinX/llama-cpp-python@5ed8bf1 · GitHub
[go: up one dir, main page]

Skip to content

Commit 5ed8bf1

Browse files
author
shutup
committed
expose RoPE param to server start
1 parent 6d8892f commit 5ed8bf1

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

llama_cpp/server/app.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ class Settings(BaseSettings):
3535
default=None,
3636
description="Split layers across multiple GPUs in proportion.",
3737
)
38+
rope_freq_base: float = Field(default=10000, ge=1, description="RoPE base frequency")
39+
rope_freq_scale: float = Field(default=1.0, description="RoPE frequency scaling factor")
3840
seed: int = Field(
3941
default=1337, description="Random seed. -1 for random."
4042
)
@@ -118,6 +120,8 @@ def create_app(settings: Optional[Settings] = None):
118120
model_path=settings.model,
119121
n_gpu_layers=settings.n_gpu_layers,
120122
tensor_split=settings.tensor_split,
123+
rope_freq_base=settings.rope_freq_base,
124+
rope_freq_scale=settings.rope_freq_scale,
121125
seed=settings.seed,
122126
f16_kv=settings.f16_kv,
123127
use_mlock=settings.use_mlock,

0 commit comments

Comments
 (0)
0