8000 Merge branch 'main' into fix/server_llama_call_thread_starvation · gjpower/llama-cpp-python@ad35fc1 · GitHub
[go: up one dir, main page]

Skip to content

Commit ad35fc1

Browse files
authored
Merge branch 'main' into fix/server_llama_call_thread_starvation
2 parents 7bf48e3 + 77a12a3 commit ad35fc1

File tree

10 files changed

+114
-84
lines changed

10 files changed

+114
-84
lines changed

.github/workflows/build-and-release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
shell: cmd
4343

4444
- name: Build wheels
45-
uses: pypa/cibuildwheel@v2.21.1
45+
uses: pypa/cibuildwheel@v2.22.0
4646
env:
4747
# disable repair
4848
CIBW_REPAIR_WHEEL_COMMAND: ""
@@ -69,7 +69,7 @@ jobs:
6969
platforms: linux/arm64
7070

7171
- name: Build wheels
72-
uses: pypa/cibuildwheel@v2.21.1
72+
uses: pypa/cibuildwheel@v2.22.0
7373
env:
7474
CIBW_SKIP: "*musllinux* pp*"
7575
CIBW_REPAIR_WHEEL_COMMAND: ""

.github/workflows/build-wheels-cuda.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
$matrix = @{
2323
'os' = @('ubuntu-latest', 'windows-2019')
2424
'pyver' = @("3.9", "3.10", "3.11", "3.12")
25-
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.0")
25+
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.1")
2626
'releasetag' = @("basic")
2727
}
2828
@@ -59,7 +59,7 @@ jobs:
5959
cache: 'pip'
6060

6161
- name: Setup Mamba
62-
uses: conda-incubator/setup-miniconda@v3.0.4
62+
uses: conda-incubator/setup-miniconda@v3.1.0
6363
with:
6464
activate-environment: "build"
6565
python-version: ${{ matrix.pyver }}

.github/workflows/build-wheels-metal.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
shell: cmd
4444

4545
- name: Build wheels
46-
uses: pypa/cibuildwheel@v2.21.1
46+
uses: pypa/cibuildwheel@v2.22.0
4747
env:
4848
# disable repair
4949
CIBW_REPAIR_WHEEL_COMMAND: ""

.github/workflows/generate-index-from-release.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ jobs:
4444
./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
4545
./scripts/releases-to-pep-503.sh index/whl/cu123 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu123$'
4646
./scripts/releases-to-pep-503.sh index/whl/cu124 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$'
47-
./scripts/releases-to-pep-503.sh index/whl/cu125 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu125$'
4847
./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$'
4948
- name: Upload artifact
5049
uses: actions/upload-pages-artifact@v3

.github/workflows/test.yaml

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,11 @@ jobs:
5151
path: ~/.cache/huggingface/hub
5252
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
5353
- name: Install dependencies (Linux/MacOS)
54-
if: runner.os != 'Windows'
5554
run: |
5655
python -m pip install --upgrade pip
5756
python -m pip install uv
5857
python -m uv pip install -e .[all] --verbose
5958
shell: bash
60-
- name: Install dependencies (Windows)
61-
if: runner.os == 'Windows'
62-
run: |
63-
python -m pip install --upgrade pip
64-
python -m pip install uv
65-
python -m uv pip install -e .[all] --verbose
66-
shell: cmd
6759
- name: Test with pytest
6860
run: |
6961
python -m pytest
@@ -90,30 +82,21 @@ jobs:
9082
with:
9183
path: ~/.cache/huggingface/hub
9284
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
93-
94-
- name: Install dependencies (Linux/MacOS)
95-
if: runner.os != 'Windows'
96-
run: |
97-
python -m pip install --upgrade pip
98-
python -m pip install uv
99-
python -m uv pip install -e .[all] --verbose
100-
shell: bash
10185

10286
- name: Install dependencies (Windows)
103-
if: runner.os == 'Windows'
10487
run: |
10588
python -m pip install --upgrade pip
10689
python -m pip install uv
10790
python -m uv pip install -e .[all] --verbose
108-
shell: cmd
91+
shell: cmd
10992

11093
- name: Test with pytest
11194
run: |
11295
python -m pytest
11396
11497
build-macos:
11598
needs: download-model
116-
runs-on: macos-latest
99+
runs-on: macos-13
117100
strategy:
118101
matrix:
119102
python-version: ["3.9", "3.10", "3.11", "3.12"]
@@ -128,35 +111,33 @@ jobs:
128111
python-version: ${{ matrix.python-version }}
129112
cache: 'pip'
130113

114+
- name: System Info
115+
run: |
116+
uname -a
117+
sysctl -n machdep.cpu.brand_string
118+
python3 -c "import platform; print(platform.machine(), platform.architecture())"
119+
131120
- name: Restore model cache
132121
uses: actions/cache@v4
133122
with:
134123
path: ~/.cache/huggingface/hub
135124
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
136125

137126
- name: Install dependencies (Linux/MacOS)
138-
if: runner.os != 'Windows'
139127
run: |
140-
python -m pip install --upgrade pip
141-
python -m pip install uv
142-
python -m uv pip install -e .[all] --verbose
128+
python3 -m pip install --upgrade pip
129+
python3 -m pip install uv
130+
python3 -m uv pip install -e .[all] --verbose
131+
CMAKE_ARGS="-DLLAMA_METAL=off" python3 -m uv pip install .[all] --verbose
143132
shell: bash
144133

145-
- name: Install dependencies (Windows)
146-
if: runner.os == 'Windows'
147-
run: |
148-
python -m pip install --upgrade pip
149-
python -m pip install uv
150-
python -m uv pip install -e .[all] --verbose
151-
shell: cmd
152-
153134
- name: Test with pytest
154135
run: |
155-
python -m pytest
136+
python3 -m pytest
156137
157138
build-macos-metal:
158139
needs: download-model
159-
runs-on: macos-latest
140+
runs-on: macos-13
160141
steps:
161142
- uses: actions/checkout@v4
162143
with:
@@ -167,25 +148,24 @@ jobs:
167148
with:
168149
python-version: "3.9"
169150

151+
- name: System Info
152+
run: |
153+
uname -a
154+
sysctl -n machdep.cpu.brand_string
155+
python3 -c "import platform; print(platform.machine(), platform.architecture())"
156+
170157
- name: Restore model cache
171158
uses: actions/cache@v4
172159
with:
173160
path: ~/.cache/huggingface/hub
174161
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
175162

176-
- name: Install dependencies (Linux/MacOS)
177-
if: runner.os != 'Windows'
163+
- name: Install dependencies
178164
run: |
179-
python -m pip install --upgrade pip
180-
python -m pip install uv
181-
CMAKE_ARGS="-DLLAMA_METAL=on" python -m uv pip install .[all] --verbose
165+
python3 -m pip install --upgrade pip
166+
CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install .[all] --verbose
182167
shell: bash
183168

184-
- name: Install dependencies (Windows)
185-
if: runner.os == 'Windows'
186-
run: |
187-
python -m pip install --upgrade pip
188-
CMAKE_ARGS="-DGGML_METAL=on" python -m pip install .[all] --verbose
189169
- name: Test with pytest
190170
run: |
191-
python -m pytest
171+
python3 -m pytest

CMakeLists.txt

Lines changed: 29 additions & 12 deletions
F438
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,35 @@ if (LLAMA_BUILD)
6262
# Enable building of the common library
6363
set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build llama.cpp common library" FORCE)
6464

65-
# Building llama
66-
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
67-
# Need to disable these llama.cpp flags on Apple x86_64,
68-
# otherwise users may encounter invalid instruction errors
69-
set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE)
70-
set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
71-
set(GGML_FMA "Off" CACHE BOOL "gml: enable FMA" FORCE)
72-
set(GGML_F16C "Off" CACHE BOOL "gml: enable F16C" FORCE)
73-
endif()
74-
65+
# Architecture detection and settings for Apple platforms
7566
if (APPLE)
76-
set(GGML_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
67+
# Get the target architecture
68+
execute_process(
69+
COMMAND uname -m
70+
OUTPUT_VARIABLE HOST_ARCH
71+
OUTPUT_STRIP_TRAILING_WHITESPACE
72+
)
73+
74+
# If CMAKE_OSX_ARCHITECTURES is not set, use the host architecture
75+
if(NOT CMAKE_OSX_ARCHITECTURES)
76+
set(CMAKE_OSX_ARCHITECTURES ${HOST_ARCH} CACHE STRING "Build architecture for macOS" FORCE)
77+
endif()
78+
79+
message(STATUS "Host architecture: ${HOST_ARCH}")
80+
message(STATUS "Target architecture: ${CMAKE_OSX_ARCHITECTURES}")
81+
82+
# Configure based on target architecture
83+
if(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64")
84+
# Intel Mac settings
85+
set(GGML_AVX "OFF" CACHE BOOL "ggml: enable AVX" FORCE)
86+
set(GGML_AVX2 "OFF" CACHE BOOL "ggml: enable AVX2" FORCE)
87+
set(GGML_FMA "OFF" CACHE BOOL "ggml: enable FMA" FORCE)
88+
set(GGML_F16C "OFF" CACHE BOOL "ggml: enable F16C" FORCE)
89+
endif()
90+
91+
# Metal settings (enable for both architectures)
92+
set(GGML_METAL "ON" CACHE BOOL "ggml: enable Metal" FORCE)
93+
set(GGML_METAL_EMBED_LIBRARY "ON" CACHE BOOL "ggml: embed metal library" FORCE)
7794
endif()
7895

7996
add_subdirectory(vendor/llama.cpp)
@@ -128,7 +145,7 @@ if (LLAMA_BUILD)
128145
# Building llava
129146
add_subdirectory(vendor/llama.cpp/examples/llava)
130147
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
131-
# Set CUDA_ARCHITECTURES to OFF on Windows
148+
132149
if (WIN32)
133150
set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
134151
endif()

llama_cpp/llama.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,7 +1141,7 @@ def _create_completion(
11411141
stopping_criteria: Optional[StoppingCriteriaList] = None,
11421142
logits_processor: Optional[LogitsProcessorList] = None,
11431143
grammar: Optional[LlamaGrammar] = None,
1144-
logit_bias: Optional[Dict[str, float]] = None,
1144+
logit_bias: Optional[Dict[int, float]] = None,
11451145
) -> Union[
11461146
Iterator[CreateCompletionResponse], Iterator[CreateCompletionStreamResponse]
11471147
]:
@@ -1761,7 +1761,7 @@ def create_completion(
17611761
stopping_criteria: Optional[StoppingCriteriaList] = None,
17621762
logits_processor: Optional[LogitsProcessorList] = None,
17631763
grammar: Optional[LlamaGrammar] = None,
1764-
logit_bias: Optional[Dict[str, float]] = None,
1764+
logit_bias: Optional[Dict[int, float]] = None,
17651765
) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]:
17661766
"""Generate text from a prompt.
17671767
@@ -1858,7 +1858,7 @@ def __call__(
18581858
stopping_criteria: Optional[StoppingCriteriaList] = None,
18591859
logits_processor: Optional[LogitsProcessorList] = None,
18601860
grammar: Optional[LlamaGrammar] = None,
1861-
logit_bias: Optional[Dict[str, float]] = None,
1861+
logit_bias: Optional[Dict[int, float]] = None,
18621862
) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]:
18631863
"""Generate text from a prompt.
18641864
@@ -1951,7 +1951,7 @@ def create_chat_completion(
19511951
model: Optional[str] = None,
19521952
logits_processor: Optional[LogitsProcessorList] = None,
19531953
grammar: Optional[LlamaGrammar] = None,
1954-
logit_bias: Optional[Dict[str, float]] = None,
1954+
logit_bias: Optional[Dict[int, float]] = None,
19551955
logprobs: Optional[bool] = None,
19561956
top_logprobs: Optional[int] = None,
19571957
) -> Union[
@@ -2073,7 +2073,7 @@ def __getstate__(self):
20732073
use_mlock=self.model_params.use_mlock,
20742074
kv_overrides=self.kv_overrides,
20752075
# Context Params
2076-
seed=self.context_params.seed,
2076+
seed=self._seed,
20772077
n_ctx=self.context_params.n_ctx,
20782078
n_batch=self.n_batch,
20792079
n_ubatch=self.context_params.n_ubatch,

0 commit comments

Comments
 (0)
0