Smartappli
diff --git a/‎CHANGELOG.md
Lines changed: 10 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 10 additions & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 2 additions & 1 deletion b/‎CMakeLists.txt
Lines changed: 2 additions & 1 deletion
diff --git a/‎Makefile
Lines changed: 1 addition & 1 deletion b/‎Makefile
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama_cpp/__init__.py
Lines changed: 1 addition & 1 deletion b/‎llama_cpp/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama_cpp/llama_chat_format.py
Lines changed: 9 additions & 6 deletions b/‎llama_cpp/llama_chat_format.py
Lines changed: 9 additions & 6 deletions
diff --git a/‎vendor/llama.cpp b/‎vendor/llama.cpp
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.73]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@25c6e82e7a1ad25a42b0894e87d9b5c557409516
+- fix: Clear kv cache at beginning of image chat formats to avoid bug when image is evaluated first by @abetlen in ac55d0a175115d1e719672ce1cb1bec776c738b1
+
+## [0.2.72]
+
+- fix(security): Remote Code Execution by Server-Side Template Injection in Model Metadata by @retr0reg in b454f40a9a1787b2b5659cd2cb00819d983185df
+- fix(security): Update remaining jinja chat templates to use immutable sandbox by @CISC in #1441
+
 ## [0.2.71]
 
 - feat: Update llama.cpp to ggerganov/llama.cpp@911b3900dded9a1cfe0f0e41b82c7a29baf3a217
 
@@ -51,8 +51,9 @@ if (LLAMA_BUILD)
     )
 
     if (LLAVA_BUILD)
-        if (LLAMA_CUBLAS)
+        if (LLAMA_CUBLAS OR LLAMA_CUDA)
             add_compile_definitions(GGML_USE_CUBLAS)
+            add_compile_definitions(GGML_USE_CUDA)
         endif()
 
         if (LLAMA_METAL)
 
@@ -16,7 +16,7 @@ build.debug:
 	CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug" python3 -m pip install --verbose --config-settings=cmake.verbose=true --config-settings=logging.level=INFO --config-settings=install.strip=false  --editable .
 
 build.cuda:
-	CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --verbose -e .
+	CMAKE_ARGS="-DLLAMA_CUDA=on" python3 -m pip install --verbose -e .
 
 build.opencl:
 	CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install --verbose -e .
 
@@ -550,7 +550,7 @@ llm = Llama.from_pretrained(
   n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
 )
 
-respoonse = llm.create_chat_completion(
+response = llm.create_chat_completion(
     messages = [
         {
             "role": "user",
 
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-__version__ = "0.2.71"
+__version__ = "0.2.73"
@@ -11,6 +11,7 @@
 from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union, Protocol, cast
 
 import jinja2
+from jinja2.sandbox import ImmutableSandboxedEnvironment
 
 import numpy as np
 import numpy.typing as npt
@@ -191,7 +192,7 @@ def __init__(
         self.add_generation_prompt = add_generation_prompt
         self.stop_token_ids = set(stop_token_ids) if stop_token_ids is not None else None
 
-        self._environment = jinja2.Environment(
+        self._environment = ImmutableSandboxedEnvironment(
             loader=jinja2.BaseLoader(),
             trim_blocks=True,
             lstrip_blocks=True,
@@ -684,8 +685,7 @@ def hf_tokenizer_config_to_chat_formatter(
     assert isinstance(tokenizer_config["eos_token"], str)
     eos_token = tokenizer_config["eos_token"]
 
-    env = jinja2.Environment(
-        loader=jinja2.BaseLoader(),
+    env = ImmutableSandboxedEnvironment(
         trim_blocks=True,
         lstrip_blocks=True,
     ).from_string(chat_template)
@@ -2602,7 +2602,10 @@ def __call__(
             messages = [llama_types.ChatCompletionRequestSystemMessage(role="system", content=self.DEFAULT_SYSTEM_MESSAGE)] + messages
 
         image_urls = self.get_image_urls(messages)
-        template = jinja2.Template(self.CHAT_FORMAT)
+        template = ImmutableSandboxedEnvironment(
+            trim_blocks=True,
+            lstrip_blocks=True,
+        ).from_string(self.CHAT_FORMAT)
         text = template.render(
             messages=messages,
             add_generation_prompt=True,
@@ -2634,6 +2637,7 @@ def embed_image_bytes(image_bytes: bytes):
 
         # Evaluate prompt
         llama.reset()
+        llama._ctx.kv_cache_clear()
         for type_, value in split_text:
             if type_ == "text":
                 tokens = llama.tokenize(value.encode("utf8"), add_bos=False, special=True)
@@ -3241,8 +3245,7 @@ def chatml_function_calling(
         "{% endfor %}"
         "{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
     )
-    template_renderer = jinja2.Environment(
-        loader=jinja2.BaseLoader(),
+    template_renderer = ImmutableSandboxedEnvironment(
         autoescape=jinja2.select_autoescape(["html", "xml"]),
         undefined=jinja2.StrictUndefined,
     ).from_string(function_calling_template)
Original file line number	Diff line number	Diff line change
`@@ -51,8 +51,9 @@ if (LLAMA_BUILD)`
`51`	`51`	`)`
`52`	`52`
`53`	`53`	`if (LLAVA_BUILD)`
`54`		`- if (LLAMA_CUBLAS)`
	`54`	`+ if (LLAMA_CUBLAS OR LLAMA_CUDA)`
`55`	`55`	`add_compile_definitions(GGML_USE_CUBLAS)`
	`56`	`+ add_compile_definitions(GGML_USE_CUDA)`
`56`	`57`	`endif()`
`57`	`58`
`58`	`59`	`if (LLAMA_METAL)`
Original file line number	Diff line number	Diff line change
`@@ -550,7 +550,7 @@ llm = Llama.from_pretrained(`
`550`	`550`	`n_ctx=2048, # n_ctx should be increased to accommodate the image embedding`
`551`	`551`	`)`
`552`	`552`
`553`		`-respoonse = llm.create_chat_completion(`
	`553`	`+response = llm.create_chat_completion(`
`554`	`554`	`messages = [`
`555`	`555`	`{`
`556`	`556`	`"role": "user",`