Add bindings for custom_rope

randoentity · randoentity · commit 3f8f276f9f79 · 2023-07-10T17:37:46.000+02:00
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -205,6 +205,8 @@ def __init__(
         model_path: str,
         # NOTE: These parameters are likely to change in the future.
         n_ctx: int = 512,
+        rope_freq_base: float = 10000.0,
+        rope_freq_scale: float = 1.0,
         n_parts: int = -1,
         n_gpu_layers: int = 0,
         seed: int = 1337,
@@ -227,6 +229,8 @@ def __init__(
         Args:
             model_path: Path to the model.
             n_ctx: Maximum context size.
+            rope_freq_base: RoPE base frequency.
+            rope_freq_scale: RoPE frequency scale.
             n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
             seed: Random seed. -1 for random.
             f16_kv: Use half-precision for key/value cache.
@@ -253,6 +257,8 @@ def __init__(
 
         self.params = llama_cpp.llama_context_default_params()
         self.params.n_ctx = n_ctx
+        self.params.rope_freq_base = rope_freq_base
+        self.params.rope_freq_scale = rope_freq_scale
         self.params.n_gpu_layers = n_gpu_layers
         self.params.seed = seed
         self.params.f16_kv = f16_kv
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -184,6 +184,8 @@ class llama_context_params(Structure):
     _fields_ = [
         ("seed", c_uint32),
         ("n_ctx", c_int32),
+        ("rope_freq_base", c_float),
+        ("rope_freq_scale", c_float),
         ("n_batch", c_int32),
         ("n_gpu_layers", c_int32),
         ("main_gpu", c_int32),
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 1d1630996920f889cdc08de26cebf2415958540e
+Subproject commit a3b4d932859f4e51ed716bfa1f07e2d2eede2c23