8000 Merge branch 'main' into cuda-action-fix · pabl-o-ce/llama-cpp-python@1fbd9c8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1fbd9c8

Browse files
authored
Merge branch 'main' into cuda-action-fix
2 parents 5931ecb + 8988aaf commit 1fbd9c8

File tree

4 files changed

+39
-3
lines changed

4 files changed

+39
-3
lines changed

.github/workflows/test.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ jobs:
113113
114114
build-macos:
115115
needs: download-model
116-
runs-on: macos-latest
116+
runs-on: macos-14
117117
strategy:
118118
matrix:
119119
python-version: ["3.9", "3.10", "3.11", "3.12"]
@@ -126,6 +126,7 @@ jobs:
126126
uses: actions/setup-python@v5
127127
with:
128128
python-version: ${{ matrix.python-version }}
129+
architecture: "arm64"
129130
cache: 'pip'
130131

131132
- name: Restore model cache
@@ -156,7 +157,7 @@ jobs:
156157
157158
build-macos-metal:
158159
needs: download-model
159-
runs-on: macos-latest
160+
runs-on: macos-14
160161
steps:
161162
- uses: actions/checkout@v4
162163
with:
@@ -166,6 +167,7 @@ jobs:
166167
uses: actions/setup-python@v5
167168
with:
168169
python-version: "3.9"
170+
architecture: "arm64"
169171

170172
- name: Restore model cache
171173
uses: actions/cache@v4

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ if (LLAMA_BUILD)
7070
set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
7171
set(GGML_FMA "Off" CACHE BOOL "gml: enable FMA" FORCE)
7272
set(GGML_F16C "Off" CACHE BOOL "gml: enable F16C" FORCE)
73+
74+
set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Build architecture for OS X" FORCE)
7375
endif()
7476

7577
if (APPLE)

llama_cpp/llama_cpp.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@
221221
# LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
222222
# LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
223223
# LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
224+
# LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
224225
# };
225226
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
226227
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -249,6 +250,7 @@
249250
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24
250251
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25
251252
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26
253+
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27
252254

253255

254256
# // note: these values should be synchronized with ggml_rope
@@ -392,12 +394,14 @@
392394
# LLAMA_ROPE_SCALING_TYPE_NONE = 0,
393395
# LLAMA_ROPE_SCALING_TYPE_LINEAR = 1,
394396
# LLAMA_ROPE_SCALING_TYPE_YARN = 2,
397+
# LLAMA_ROPE_SCALING_TYPE_LONGROPE = 3,
395398
# LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_YARN,
396399
# };
397400
LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED = -1
398401
LLAMA_ROPE_SCALING_TYPE_NONE = 0
399402
LLAMA_ROPE_SCALING_TYPE_LINEAR = 1
400403
LLAMA_ROPE_SCALING_TYPE_YARN = 2
404+
LLAMA_ROPE_SCALING_TYPE_LONGROPE = 3
401405
LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_YARN
402406

403407
# enum llama_pooling_type {
@@ -2933,6 +2937,34 @@ def llama_chat_apply_template(
29332937
...
29342938

29352939

2940+
# // Get list of built-in chat templates
2941+
# LLAMA_API int32_t llama_chat_builtin_templates(const char ** output, size_t len);
2942+
@ctypes_function(
2943+
"llama_chat_builtin_templates",
2944+
[
2945+
ctypes.POINTER(ctypes.c_char_p),
2946+
ctypes.c_size_t,
2947+
],
2948+
ctypes.c_int32,
2949+
)
2950+
def llama_chat_builtin_templates(
2951+
output: CtypesArray[bytes],
2952+
len: Union[ctypes.c_size_t, int],
2953+
/,
2954+
) -> int:
2955+
"""Get list of built-in chat templates.
2956+
2957+
Args:
2958+
output: Output buffer to store template names.
2959+
len: Length of the output buffer.
2960+
2961+
Returns:
2962+
Number of templates available.
2963+
Returns a negative number on error.
2964+
"""
2965+
...
2966+
2967+
29362968
# //
29372969
# // Sampling API
29382970
# //

vendor/llama.cpp

0 commit comments

Comments
 (0)
0