pabl-o-ce
diff --git a/‎.github/workflows/test.yaml
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/test.yaml
Lines changed: 4 additions & 2 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 2 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎llama_cpp/llama_cpp.py
Lines changed: 32 additions & 0 deletions b/‎llama_cpp/llama_cpp.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎vendor/llama.cpp b/‎vendor/llama.cpp
@@ -113,7 +113,7 @@ jobs:
   build-macos:
     needs: download-model
-    runs-on: macos-latest
+    runs-on: macos-14
     strategy:
       matrix:
         python-version: ["3.9", "3.10", "3.11", "3.12"]
@@ -126,6 +126,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
+          architecture: "arm64"
           cache: 'pip'
 
       - name: Restore model cache
@@ -156,7 +157,7 @@ jobs:
 
   build-macos-metal:
     needs: download-model
-    runs-on: macos-latest
+    runs-on: macos-14
     steps:
       - uses: actions/checkout@v4
         with:
@@ -166,6 +167,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: "3.9"
+          architecture: "arm64"
 
       - name: Restore model cache
         uses: actions/cache@v4
 
@@ -70,6 +70,8 @@ if (LLAMA_BUILD)
         set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
         set(GGML_FMA "Off" CACHE BOOL "gml: enable FMA" FORCE)
         set(GGML_F16C "Off" CACHE BOOL "gml: enable F16C" FORCE)
+
+        set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Build architecture for OS X" FORCE)
     endif()
 
     if (APPLE)
 
@@ -221,6 +221,7 @@
 #     LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH   = 24,
 #     LLAMA_VOCAB_PRE_TYPE_EXAONE         = 25,
 #     LLAMA_VOCAB_PRE_TYPE_CHAMELEON      = 26,
+#     LLAMA_VOCAB_PRE_TYPE_MINERVA        = 27,
 # };
 LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
 LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -249,6 +250,7 @@
 LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24
 LLAMA_VOCAB_PRE_TYPE_EXAONE = 25
 LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26
+LLAMA_VOCAB_PRE_TYPE_MINERVA = 27
 
 
 # // note: these values should be synchronized with ggml_rope
@@ -392,12 +394,14 @@
 #     LLAMA_ROPE_SCALING_TYPE_NONE        = 0,
 #     LLAMA_ROPE_SCALING_TYPE_LINEAR      = 1,
 #     LLAMA_ROPE_SCALING_TYPE_YARN        = 2,
+#     LLAMA_ROPE_SCALING_TYPE_LONGROPE    = 3,
 #     LLAMA_ROPE_SCALING_TYPE_MAX_VALUE   = LLAMA_ROPE_SCALING_TYPE_YARN,
 # };
 LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED = -1
 LLAMA_ROPE_SCALING_TYPE_NONE = 0
 LLAMA_ROPE_SCALING_TYPE_LINEAR = 1
 LLAMA_ROPE_SCALING_TYPE_YARN = 2
+LLAMA_ROPE_SCALING_TYPE_LONGROPE = 3
 LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_YARN
 
 # enum llama_pooling_type {
@@ -2933,6 +2937,34 @@ def llama_chat_apply_template(
     ...
 
 
+# // Get list of built-in chat templates
+# LLAMA_API int32_t llama_chat_builtin_templates(const char ** output, size_t len);
+@ctypes_function(
+    "llama_chat_builtin_templates",
+    [
+        ctypes.POINTER(ctypes.c_char_p),
+        ctypes.c_size_t,
+    ],
+    ctypes.c_int32,
+)
+def llama_chat_builtin_templates(
+    output: CtypesArray[bytes],
+    len: Union[ctypes.c_size_t, int],
+    /,
+) -> int:
+    """Get list of built-in chat templates.
+
+    Args:
+        output: Output buffer to store template names.
+        len: Length of the output buffer.
+
+    Returns:
+        Number of templates available.
+        Returns a negative number on error.
+    """
+    ...
+
+
 # //
 # // Sampling API
 # //