8000 remove dangling image · ngxson/llama.cpp@eeda075 · GitHub
[go: up one dir, main page]

Skip to content

Commit eeda075

Browse files
committed
remove dangling image
1 parent aaebc33 commit eeda075

File tree

2 files changed

+32
-20
lines changed

2 files changed

+32
-20
lines changed

tools/server/tests/unit/test_vision_api.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,19 @@ def create_server():
2020

2121

2222
@pytest.mark.parametrize(
23-
"image_url, success, re_content",
23+
"prompt, image_url, success, re_content",
2424
[
2525
# test model is trained on CIFAR-10, but it's quite dumb due to small size
26-
(IMG_URL_0, True, "(cat)+"),
27-
("IMG_BASE64_0", True, "(cat)+"), # exceptional, so that we don't cog up the log
28-
(IMG_URL_ 10000 1, True, "(frog)+"),
29-
("malformed", False, None),
30-
("https://google.com/404", False, None), # non-existent image
31-
("https://ggml.ai", False, None), # non-image data
26+
("What is this:\n", IMG_URL_0, True, "(cat)+"),
27+
("What is this:\n", "IMG_BASE64_0", True, "(cat)+"), # exceptional, so that we don't cog up the log
28+
("What is this:\n", IMG_URL_1, True, "(frog)+"),
29+
("Test test\n", IMG_URL_1, True, "(frog)+"), # test invalidate cache
30+
("What is this:\n", "malformed", False, None),
31+
("What is this:\n", "https://google.com/404", False, None), # non-existent image
32+
("What is this:\n", "https://ggml.ai", False, None), # non-image data
3233
]
3334
)
34-
def test_vision_chat_completion(image_url, success, re_content):
35+
def test_vision_chat_completion(prompt, image_url, success, re_content):
3536
global server
3637
server.start(timeout_seconds=60) # vision model may take longer to load due to download size
3738
if image_url == "IMG_BASE64_0":
@@ -41,7 +42,7 @@ def test_vision_chat_completion(image_url, success, re_content):
4142
"top_k": 1,
4243
"messages": [
4344
{"role": "user", "content": [
44-
{"type": "text", "text": "What is this:\n"},
45+
{"type": "text", "text": prompt},
4546
{"type": "image_url", "image_url": {
4647
"url": image_url,
4748
}},

tools/server/utils.hpp

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,17 +1155,28 @@ struct server_tokens {
11551155

11561156
void resize(size_t n) {
11571157
GGML_ASSERT(n <= tokens.size());
1158-
// we throw an error if we try to remove a token in the middle of an image
1159-
// for ex. with input of 5 text tokens and 2 images:
1160-
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
1161-
// n 1 2 3 4 5 6 7 8 9 10
1162-
// allowed to resize ^ ^
1163-
// disallowed to resize ^ ^ ^
1164-
if (n > 0) {
1165-
llama_token last_token = tokens[n - 1];
1166-
// make sure we never remove tokens in the middle of an image
1167-
if (last_token == LLAMA_TOKEN_NULL) {
1168-
find_chunk(n - 1); // will throw an error if the token is not begin-of-chunk
1158+
if (has_mtmd) {
1159+
// we throw an error if we try to remove a token in the middle of an image
1160+
// for ex. with input of 5 text tokens and 2 images:
1161+
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
1162+
// n 1 2 3 4 5 6 7 8 9 10
1163+
// allowed to resize ^ ^
1164+
// disallowed to resize ^ ^ ^
1165+
if (n > 0) {
1166+
llama_token last_token = tokens[n - 1];
1167+
// make sure we never remove tokens in the middle of an image
1168+
if (last_token == LLAMA_TOKEN_NULL) {
1169+
find_chunk(n - 1); // will throw an error if the token is not begin-of-chunk
1170+
}
1171+
}
1172+
// remove all image chunks that are not used anymore
1173+
for (auto it = map_pos_to_image.begin(); it != map_pos_to_image.end(); ) {
1174+
llama_pos pos = it->first;
1175+
if (pos >= (llama_pos)n) {
1176+
it = map_pos_to_image.erase(it);
1177+
} else {
1178+
++it;
1179+
}
11691180
}
11701181
}
11711182
tokens.resize(n);

0 commit comments

Comments
 (0)
0