8000 Merge branch 'main' of https://github.com/ducky777/llama-cpp-python · abetlen/llama-cpp-python@3c75cbe · GitHub
[go: up one dir, main page]

Skip to content

Commit 3c75cbe

Browse files
committed
2 parents bcc3faf + c89be28 commit 3c75cbe

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+8680
-3544
lines changed

.github/ISSUE_TEMPLATE/bug_report.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ Try the following:
6464
1. `git clone https://github.com/abetlen/llama-cpp-python`
6565
2. `cd llama-cpp-python`
6666
3. `rm -rf _skbuild/` # delete any old builds
67-
4. `python setup.py develop`
67+
4. `python -m pip install .`
6868
5. `cd ./vendor/llama.cpp`
6969
6. Follow [llama.cpp's instructions](https://github.com/ggerganov/llama.cpp#build) to `cmake` llama.cpp
7070
7. Run llama.cpp's `./main` with the same arguments you previously passed to llama-cpp-python and see if you can reproduce the issue. If you can, [log an issue with llama.cpp](https://github.com/ggerganov/llama.cpp/issues)

.github/workflows/build-and-release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
steps:
1717
- uses: actions/checkout@v3
1818
with:
19-
submodules: "true"
19+
submodules: "recursive"
2020

2121
# Used to host cibuildwheel
2222
- uses: actions/setup-python@v3
@@ -48,7 +48,7 @@ jobs:
4848
steps:
4949
- uses: actions/checkout@v3
5050
with:
51-
submodules: "true"
51+
submodules: "recursive"
5252
- uses: actions/setup-python@v3
5353
with:
5454
python-version: "3.8"

.github/workflows/build-docker.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
- name: Checkout
1515
uses: actions/checkout@v3
1616
with:
17-
submodules: "true"
17+
submodules: "recursive"
1818

1919
- name: Set up QEMU
2020
uses: docker/setup-qemu-action@v2

.github/workflows/publish-to-test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
steps:
1919
- uses: actions/checkout@v3
2020
with:
21-
submodules: "true"
21+
submodules: "recursive"
2222
- name: Set up Python
2323
uses: actions/setup-python@v4
2424
with:

.github/workflows/publish.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
steps:
1313
- uses: actions/checkout@v3
1414
with:
15-
submodules: "true"
15+
submodules: "recursive"
1616
- name: Set up Python
1717
uses: actions/setup-python@v4
1818
with:

.github/workflows/test.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
steps:
2020
- uses: actions/checkout@v4
2121
with:
22-
submodules: "true"
22+
submodules: "recursive"
2323
- name: Set up Python ${{ matrix.python-version }}
2424
uses: actions/setup-python@v4
2525
with:
@@ -42,7 +42,7 @@ jobs:
4242
steps:
4343
- uses: actions/checkout@v3
4444
with:
45-
submodules: "true"
45+
submodules: "recursive"
4646
- name: Set up Python ${{ matrix.python-version }}
4747
uses: actions/setup-python@v4
4848
with:
@@ -65,7 +65,7 @@ jobs:
6565
steps:
6666
- uses: actions/checkout@v3
6767
with:
68-
submodules: "true"
68+
submodules: "recursive"
6969
- name: Set up Python ${{ matrix.python-version }}
7070
uses: actions/setup-python@v4
7171
with:
@@ -85,7 +85,7 @@ jobs:
8585
# steps:
8686
# - uses: actions/checkout@v3
8787
# with:
88-
# submodules: "true"
88+
# submodules: "recursive"
8989
# - name: Set up Python 3.8
9090
# uses: actions/setup-python@v4
9191
# with:
@@ -112,7 +112,7 @@ jobs:
112112
steps:
113113
- uses: actions/checkout@v3
114114
with:
115-
submodules: "true"
115+
submodules: "recursive"
116116
- name: Set up Python 3.8
117117
uses: actions/setup-python@v4
118118
with:

.gitignore

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
*.local
2+
3+
.python-version
4+
5+
.vscode/
6+
7+
_skbuild/
8+
9+
.envrc
10+
.direnv
11+
12+
models/
13+
114
# Byte-compiled / optimized / DLL files
215
__pycache__/
316
*.py[cod]

CHANGELOG.md

Lines changed: 228 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,231 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.57]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@ac9ee6a4ad740bc1ee484ede43e9f92b5af244c1
13+
- fix: set default embedding pooling type to unspecified by @abetlen in 4084aabe867b8ec2aba1b22659e59c9318b0d1f3
14+
- fix: Fix and optimize functionary chat handler by @jeffrey-fong in #1282
15+
- fix: json mode for basic chat formats by @abetlen in 20e6815252d0efd9f015f7adbf108faaf36e3f3c
16+
17+
## [0.2.56]
18+
19+
- feat: Update llama.cpp to ggerganov/llama.cpp@c2101a2e909ac7c08976d414e64e96c90ee5fa9e
20+
- feat(server): Add endpoints for tokenize, detokenize and count tokens by @felipelo in #1136
21+
- feat: Switch embed to llama_get_embeddings_seq by @iamlemec in #1263
22+
- fix: Fixed json strings grammar by blacklisting character control set by @ExtReMLapin in d02a9cf16ff88ad011e2eb1ce29f4d9400f13cd1
23+
- fix: Check for existence of clip model path by @kejcao in #1264
24+
25+
## [0.2.55]
26+
27+
- feat: Update llama.cpp to ggerganov/9731134296af3a6839cd682e51d9c2109a871de5
28+
- docs: fix small typo in README: 'model know how' -> 'model knows how' by @boegel in #1244
29+
30+
## [0.2.54]
31+
32+
- feat: Update llama.cpp to ggerganov/llama.cpp@cb49e0f8c906e5da49e9f6d64a57742a9a241c6a
33+
- docs: fix typo in README.md embeddings example by @iamlemec in #1232
34+
35+
## [0.2.53]
36+
37+
- feat: Update llama.cpp to ggerganov/llama.cpp@cb49e0f8c906e5da49e9f6d64a57742a9a241c6a
38+
- fix: eos/bos_token set correctly for Jinja2ChatFormatter and automatic chat formatter by @CISC in #1230
39+
40+
## [0.2.52]
41+
42+
- feat: Update llama.cpp to ggerganov/llama.cpp@a33e6a0d2a66104ea9a906bdbf8a94d050189d91
43+
- fix: Llava15ChatHandler (this function takes at least 4 arguments) by @abetlen in 8383a9e5620f5df5a88f62da16813eac200dd706
44+
45+
## [0.2.51]
46+
47+
- feat: Update llama.cpp to ggerganov/llama.cpp@c39373398803c669056304090050fe3f44b41bf9
48+
- fix: Restore type hints for low-level api by @abetlen in 19234aa0dbd0c3c87656e65dd2b064665371925b
49+
50+
## [0.2.50]
51+
52+
- docs: Update Functionary OpenAI Server Readme by @jeffrey-fong in #1193
53+
- fix: LlamaHFTokenizer now receives pre_tokens by @abetlen in 47bad30dd716443652275099fa3851811168ff4a
54+
55+
## [0.2.49]
56+
57+
- fix: module 'llama_cpp.llama_cpp' has no attribute 'c_uint8' in Llama.save_state by @abetlen in db776a885cd4c20811f22f8bd1a27ecc71dba927
58+
- feat: Auto detect Mixtral's slightly different format by @lukestanley in #1214
59+
60+
## [0.2.48]
61+
62+
- feat: Update llama.cpp to ggerganov/llama.cpp@15499eb94227401bdc8875da6eb85c15d37068f7
63+
- feat: Add Google's Gemma formatting via chat_format="gemma" by @alvarobartt in #1210
64+
- feat: support minItems/maxItems in JSON grammar converter by @nopperl in 3921e10770996d95a9eb22c8248bacef39f69365
65+
- fix: Update from_pretrained defaults to match hf_hub_download and pull to local cache folder by @abetlen in e6d6260a91b7831733f7d1f73c7af46a3e8185ed
66+
- fix: Raise exceptions when llama model or context fails to load by @abetlen in dd22010e85265ae840c76ec835d67a29ed852722
67+
- docs: Update README.md to fix pip install llama cpp server by @audip in #1187
68+
69+
## [0.2.47]
70+
71+
- feat: Update llama.cpp to ggerganov/llama.cpp@973053d8b0d04809836b3339a50f68d9c842de90
72+
73+
## [0.2.46]
74+
75+
- feat: Update llama.cpp to ggerganov/llama.cpp@ba2135ccae7462470b3865c6e41d2e1d734eac05
76+
- feat: Pull models directly from huggingface by @abetlen in #1206
77+
- feat(low-level-api): Improve API static type-safety and performance. Low level api functions are positional args only now. by @abetlen in #1205
78+
79+
## [0.2.45]
80+
81+
- feat: Update llama.cpp to ggerganov/llama.cpp@89febfed9322c8849520dc63c93ee4f5fd72556e
82+
83+
## [0.2.44]
84+
85+
- feat: Update llama.cpp to ggerganov/llama.cpp@4524290e87b8e107cc2b56e1251751546f4b9051
86+
- fix: create_embedding broken response for input type str by @abetlen in 0ce66bc080fe537590b05b24bf442480bf2dd045
87+
- fix: Use '\n' seperator for EventSourceResponse by @khimaros in #1188
88+
- fix: Incorporate embedding pooling layer fixes by @iamlemec in #1194
89+
90+
## [0.2.43]
91+
92+
- feat: Update llama.cpp to ggerganov/llama.cpp@8084d554406b767d36b3250b3b787462d5dd626f
93+
- feat: Support batch embeddings by @iamlemec in #1186
94+
- fix: submodule kompute is not included in sdist by @abetlen in 7dbbfdecadebe7750be650d9409959640ff9a460
95+
- fix: fix: Update openbuddy prompt format by @abetlen in 07a783779a62a4aac0b11161c7e0eb983ff215f8
96+
97+
## [0.2.42]
98+
99+
- feat: Update llama.cpp to ggerganov/llama.cpp@ea9c8e11436ad50719987fa23a289c74b7b40d40
100+
- fix: sample idx off-by-one error for logit_processors by @lapp0 in #1179
101+
- fix: chat formatting bugs in `chatml-function-calling` by @abetlen in 4b0e3320bd8c2c209e29978d0b21e2e471cc9ee3 and 68fb71b6a26a1e57331868f959b47ab4b87851e1
102+
103+
## [0.2.41]
104+
105+
- feat: Update llama.cpp to ggerganov/llama.cpp@895407f31b358e3d9335e847d13f033491ec8a5b
106+
- fix: Don't change order of json schema object properties in generated grammar unless prop_order is passed by @abetlen in d1822fed6b706f38bd1ff0de4dec5baaa3cf84fa
107+
108+
## [0.2.40]
109+
110+
- feat: Update llama.cpp to ggerganov/llama.cpp@3bdc4cd0f595a6096cca4a64aa75ffa8a3503465
111+
- feat: Generic chatml Function Calling using chat_format="chatml-function-calling"` by @abetlen in #957
112+
- fix: Circular dependancy preventing early Llama object free by @notwa in #1176
113+
- docs: Set the correct command for compiling with syscl support by @akarshanbiswas in #1172
114+
- feat: use gpu backend for clip if available by @iamlemec in #1175
115+
116+
## [0.2.39]
117+
118+
- feat: Update llama.cpp to ggerganov/llama.cpp@b08f22c882a1443e6b97081f3ce718a4d1a741f8
119+
- fix: Fix destructor logging bugs by using llama_log_callback to avoid suppress_stdout_stderr by @abetlen in 59760c85eddc72dfcc1839f43760ef72c23d6874
120+
121+
## [0.2.38]
122+
123+
- feat: Update llama.cpp to ggerganov/llama.cpp@1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
124+
- feat: Add speculative decoding by @abetlen in #1120
125+
- fix: Pass raise_exception and add_generation_prompt to jinja2 chat template by @abetlen in 078cca0361bf5a94d2cf52ed04980d20e32d6f95
126+
127+
## [0.2.37]
128+
129+
- feat: Update llama.cpp to ggerganov/llama.cpp@fea4fd4ba7f6b754ac795387b275e1a014a77bde
130+
- feat: Automatically set chat format from gguf by @abetlen in #1110
131+
132+
## [0.2.36]
133+
134+
- feat: Update llama.cpp to ggerganov/llama.cpp@2aed77eb06a329f0d82bb1c467f4244904d4073f
135+
- feat: Add mistral instruct chat format as "mistral-instruct" by @Rafaelblsilva in #799
136+
137+
## [0.2.35]
138+
139+
- feat: Update llama.cpp to ggerganov/llama.cpp@d2f650cb5b04ee2726663e79b47da5efe196ce00
140+
141+
## [0.2.34]
142+
143+
- feat: Update llama.cpp to ggerganov/llama.cpp@6db2b41a76ee78d5efdd5c3cddd5d7ad3f646855
144+
- feat: Add json schema mode by @abetlen in #1122
145+
146+
## [0.2.33]
147+
148+
- feat: Update llama.cpp to ggerganov/llama.cpp@faa3526a1eba458120987ed8269e5616385a76f4
149+
- feat(server): include llama-cpp-python version in openapi spec by @abetlen in cde7514c3d28e6d52f272614e9957208c344dde5
150+
- fix: use both eos and bos tokens as stop sequences for hf-tokenizer-config chat format. by @abetlen in 5b982d0f8c6f35242c8862ffdce00e17cea0b44f
151+
- fix: GGUF metadata KV overrides, re #1011 by @phiharri in #1116
152+
- fix: llama_log_set should be able to accept null pointer by @abetlen in c970d41a85381fd55235136f123422df0bf0c7e7
153+
154+
## [0.2.32]
155+
156+
- feat: Update llama.cpp to ggerganov/llama.cpp@504dc37be8446fb09b1ede70300250ad41be32a2
157+
- fix: from_json_schema oneof/anyof bug by @jndiogo in d3f5528ca8bcb9d69d4f27e21631e911f1fb9bfe
158+
- fix: pass chat handler not chat formatter for huggingface autotokenizer and tokenizer_config formats by @abetlen in 24f39454e91cf5dddbc4b6041aead4accc7c7a2d
159+
- feat: Add add_generation_prompt option for jinja2chatformatter by @abetlen in 7f3209b1eb4ad3260ba063801fab80a8c25a2f4c
160+
- feat: Add Jinja2ChatFormatter by @abetlen in be09318c26add8674ce494ae7cc480cce72a4146
161+
- feat: Expose gguf model metadata in metadata property by @abetlen in 5a34c57e5479e50c99aba9b38218cc48e6560b81
162+
163+
## [0.2.31]
164+
165+
- feat: Update llama.cpp to ggerganov/llama.cpp@a5cacb22b2114fd9adf61c00cbb237384d86bced
166+
- fix: Mirostat sampling now passes correct type to ctypes and tracks state during generation by @abetlen in 3babe3512cb95743108f2b595210c38ed6f1b904
167+
- fix: Python3.8 support in server by @abetlen in 141293a75b564a8699e0acba1da24d9aa1cf0ab1
168+
169+
## [0.2.30]
170+
171+
- feat: Update llama.cpp to ggerganov/llama.cpp@57e2a7a52a819883f40dada8a2edc24ecf48186b
172+
- feat(server): Add ability to load chat format from huggingface autotokenizer or tokenizer_config.json files by @abetlen in b8fc1c7d83ad4a9207c707ba1d954fe580286a01
173+
- feat: Integration of Jinja2 Templating for chat formats by @teleprint-me in #875
174+
- fix: Offload KQV by default by @abetlen in 48c3b77e6f558a9899de0e1155c7dc0c7958d8e8
175+
- fix: Support Accept text/event-stream in chat and completion endpoints, resolves #1083 by @aniljava in #1088
176+
- fix(cli): allow passing n_ctx=0 to openAI API server args to use model n_ctx_train field per #1015 by @K-Mistele in #1093
177+
178+
## [0.2.29]
179+
180+
- feat: Update llama.cpp to ggerganov/llama.cpp@4483396751c79dea540808b9cb9238245d06da2b
181+
- feat: Add split_mode option by @abetlen in 84615adbc6855c8384807c42f0130f9a1763f99d
182+
- feat: Implement GGUF metadata KV overrides by @phiharri in #1011
183+
- fix: Avoid "LookupError: unknown encoding: ascii" when open() called in a destructor by @yieldthought in #1012
184+
- fix: Fix low_level_api_chat_cpp example to match current API by @aniljava in #1086
185+
- fix: Fix Pydantic model parsing by @DeNeutoy in #1087
186+
187+
## [0.2.28]
188+
189+
- feat: Update llama.cpp to ggerganov/llama.cpp@6efb8eb30e7025b168f3fda3ff83b9b386428ad6
190+
- feat: Add ability to pass in penalize_nl param by @shankinson in #1068
191+
- fix: print_grammar to stderr by @turian in #1052
192+
193+
## [0.2.27]
194+
195+
- feat: Update llama.cpp to ggerganov/llama.cpp@b3a7c20b5c035250257d2b62851c379b159c899a
196+
- feat: Add `saiga` chat format by @femoiseev in #1050
197+
- feat: Added `chatglm3` chat format by @xaviviro in #1059
198+
- fix: Correct typo in README.md by @qeleb in (#1058)
199+
200+
## [0.2.26]
201+
202+
- feat: Update llama.cpp to ggerganov/llama.cpp@f6793491b5af6da75edad34d6f503ef86d31b09f
203+
204+
## [0.2.25]
205+
206+
- feat(server): Multi model support by @D4ve-R in #931
207+
- feat(server): Support none defaulting to infinity for completions by @swg in #111
208+
- feat(server): Implement openai api compatible authentication by @docmeth2 in #1010
209+
- fix: text_offset of multi-token characters by @twaka in #1037
210+
- fix: ctypes bindings for kv override by @phiharri in #1011
211+
- fix: ctypes definitions of llama_kv_cache_view_update and llama_kv_cache_view_free. by @e-c-d in #1028
212+
213+
## [0.2.24]
214+
215+
- feat: Update llama.cpp to ggerganov/llama.cpp@0e18b2e7d0b5c0a509ea40098def234b8d4a938a
216+
- feat: Add offload_kqv option to llama and server by @abetlen in 095c65000642a3cf73055d7428232fb18b73c6f3
217+
- feat: n_ctx=0 now uses the n_ctx_train of the model by @DanieleMorotti in #1015
218+
- feat: logits_to_logprobs supports both 2-D and 3-D logits arrays by @kddubey in #1002
219+
- fix: Remove f16_kv, add offload_kqv fields in low level and llama apis by @brandonrobertz in #1019
220+
- perf: Don't convert logprobs arrays to lists by @kddubey in #1021
221+
- docs: Fix README.md functionary demo typo by @evelynmitchell in #996
222+
- examples: Update low_level_api_llama_cpp.py to match current API by @jsoma in #1023
223+
224+
## [0.2.23]
225+
226+
- Update llama.cpp to ggerganov/llama.cpp@948ff137ec37f1ec74c02905917fa0afc9b97514
227+
- Add qwen chat format by @yhfgyyf in #1005
228+
- Add support for running the server with SSL by @rgerganov in #994
229+
- Replace logits_to_logprobs implementation with numpy equivalent to llama.cpp by @player1537 in #991
230+
- Fix UnsupportedOperation: fileno in suppress_stdout_stderr by @zocainViken in #961
231+
- Add Pygmalion chat format by @chiensen in #986
232+
- README.md multimodal params fix by @zocainViken in #967
233+
- Fix minor typo in README by @aniketmaurya in #958
234+
10235
## [0.2.22]
11236

12237
- Update llama.cpp to ggerganov/llama.cpp@8a7b2fa528f130631a5f43648481596ab320ed5a
@@ -103,7 +328,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
103328

104329
## [0.2.11]
105330

106-
- Fix bug in `llama_model_params` object has no attribute `logits_all` by @abetlen in d696251fbe40015e8616ea7a7d7ad5257fd1b896
331+
- Fix bug in `llama_model_params` object has no attribute `logits_all` by @abetlen in d696251fbe40015e8616ea7a7d7ad5257fd1b896
107332

108333
## [0.2.10]
109334

@@ -291,7 +516,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
291516

292517
## [0.1.60]
293518

294-
NOTE: This release was deleted due to a bug with the packaging system that caused pip installations to fail.
519+
NOTE: This release was deleted due to a bug with the packaging system that caused pip installations to fail.
295520

296521
- Truncate max_tokens in create_completion so requested tokens doesn't exceed context size.
297522
- Temporarily disable cache for completion requests
@@ -315,4 +540,4 @@ NOTE: This release was deleted due to a bug with the packaging system that caus
315540
- (misc) Added first version of the changelog
316541
- (server) Use async routes
317542
- (python-api) Use numpy for internal buffers to reduce memory usage and improve performance.
318-
- (python-api) Performance bug in stop sequence check slowing down streaming.
543+
- (python-api) Performance bug in stop sequence check slowing down streaming.

CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ if (LLAMA_BUILD)
1717
set(LLAMA_FMA "Off" CACHE BOOL "llama: enable FMA" FORCE)
1818
set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE)
1919
endif()
20+
21+
if (APPLE AND CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
22+
set(LLAMA_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
23+
endif()
24+
2025
add_subdirectory(vendor/llama.cpp)
2126
install(
2227
TARGETS llama
@@ -46,6 +51,14 @@ if (LLAMA_BUILD)
4651
)
4752

4853
if (LLAVA_BUILD)
54+
if (LLAMA_CUBLAS)
55+
add_compile_definitions(GGML_USE_CUBLAS)
56+
endif()
57+
58+
if (LLAMA_METAL)
59+
add_compile_definitions(GGML_USE_METAL)
60+
endif()
61+
4962
# Building llava
5063
add_subdirectory(vendor/llama.cpp/examples/llava)
5164
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")

0 commit comments

Comments
 (0)
0