8000 Merge branch 'main' into cuda · Smartappli/llama-cpp-python@6e3a5e6 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6e3a5e6

Browse files
authored
Merge branch 'main' into cuda
2 parents 561013f + 1547202 commit 6e3a5e6

File tree

7 files changed

+25
-11
lines changed

7 files changed

+25
-11
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.73]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@25c6e82e7a1ad25a42b0894e87d9b5c557409516
13+
- fix: Clear kv cache at beginning of image chat formats to avoid bug when image is evaluated first by @abetlen in ac55d0a175115d1e719672ce1cb1bec776c738b1
14+
15+
## [0.2.72]
16+
17+
- fix(security): Remote Code Execution by Server-Side Template Injection in Model Metadata by @retr0reg in b454f40a9a1787b2b5659cd2cb00819d983185df
18+
- fix(security): Update remaining jinja chat templates to use immutable sandbox by @CISC in #1441
19+
1020
## [0.2.71]
1121

1222
- feat: Update llama.cpp to ggerganov/llama.cpp@911b3900dded9a1cfe0f0e41b82c7a29baf3a217

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ if (LLAMA_BUILD)
5151
)
5252

5353
if (LLAVA_BUILD)
54-
if (LLAMA_CUBLAS)
54+
if (LLAMA_CUBLAS OR LLAMA_CUDA)
5555
add_compile_definitions(GGML_USE_CUBLAS)
56+
add_compile_definitions(GGML_USE_CUDA)
5657
endif()
5758

5859
if (LLAMA_METAL)

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ build.debug:
1616
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug" python3 -m pip install --verbose --config-settings=cmake.verbose=true --config-settings=logging.level=INFO --config-settings=install.strip=false --editable .
1717

1818
build.cuda:
19-
CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --verbose -e .
19+
CMAKE_ARGS="-DLLAMA_CUDA=on" python3 -m pip install --verbose -e .
2020

2121
build.opencl:
2222
CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install --verbose -e .

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,7 @@ llm = Llama.from_pretrained(
550550
n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
551551
)
552552

553-
respoonse = llm.create_chat_completion(
553+
response = llm.create_chat_completion(
554554
messages = [
555555
{
556556
"role": "user",

llama_cpp/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.2.71"
4+
__version__ = "0.2.73"

llama_cpp/llama_chat_format.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union, Protocol, cast
1212

1313
import jinja2
14+
from jinja2.sandbox import ImmutableSandboxedEnvironment
1415

1516
import numpy as np
1617
import numpy.typing as npt
@@ -191,7 +192,7 @@ def __init__(
191192
self.add_generation_prompt = add_generation_prompt
192193
self.stop_token_ids = set(stop_token_ids) if stop_token_ids is not None else None
193194

194-
self._environment = jinja2.Environment(
195+
self._environment = ImmutableSandboxedEnvironment(
195196
loader=jinja2.BaseLoader(),
196197
trim_blocks=True,
197198
lstrip_blocks=True,
@@ -684,8 +685,7 @@ def hf_tokenizer_config_to_chat_formatter(
684685
assert isinstance(tokenizer_config["eos_token"], str)
685686
eos_token = tokenizer_config["eos_token"]
686687

687-
env = jinja2.Environment(
688-
loader=jinja2.BaseLoader(),
688+
env = ImmutableSandboxedEnvironment(
689689
trim_blocks=True,
690690
lstrip_blocks=True,
691691
).from_string(chat_template)
@@ -2602,7 +2602,10 @@ def __call__(
26022602
messages = [llama_types.ChatCompletionRequestSystemMessage(role="system", content=self.DEFAULT_SYSTEM_MESSAGE)] + messages
26032603

26042604
image_urls = self.get_image_urls(messages)
2605-
template = jinja2.Template(self.CHAT_FORMAT)
2605+
template = ImmutableSandboxedEnvironment(
2606+
trim_blocks=True,
2607+
lstrip_blocks=True,
2608+
).from_string(self.CHAT_FORMAT)
26062609
text = template.render(
26072610
messages=messages,
26082611
add_generation_prompt=True,
@@ -2634,6 +2637,7 @@ def embed_image_bytes(image_bytes: bytes):
26342637

26352638
# Evaluate prompt
26362639
llama.reset()
2640+
llama._ctx.kv_cache_clear()
26372641
for type_, value in split_text:
26382642
if type_ == "text":
26392643
tokens = llama.tokenize(value.encode("utf8"), add_bos=False, special=True)
@@ -3241,8 +3245,7 @@ def chatml_function_calling(
32413245
"{% endfor %}"
32423246
"{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
32433247
)
3244-
template_renderer = jinja2.Environment(
3245-
loader=jinja2.BaseLoader(),
3248+
template_renderer = ImmutableSandboxedEnvironment(
32463249
autoescape=jinja2.select_autoescape(["html", "xml"]),
32473250
undefined=jinja2.StrictUndefined,
32483251
).from_string(function_calling_template)

vendor/llama.cpp

0 commit comments

Comments
 (0)
0