8000 llama : do not allocate KV cache for "vocab_only == true" (#682) · unbounded/llama.cpp@81040f1 · GitHub
[go: up one dir, main page]

Skip to content 10000

Commit 81040f1

Browse files
authored
llama : do not allocate KV cache for "vocab_only == true" (ggml-org#682)
Fixes sanitizer CI
1 parent c4f89d8 commit 81040f1

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1608,7 +1608,7 @@ struct llama_context * llama_init_from_file(
16081608
}
16091609

16101610
// reserve memory for context buffers
1611-
{
1611+
if (!params.vocab_only) {
16121612
if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, ctx->model.hparams.n_ctx)) {
16131613
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
16141614
llama_free(ctx);

0 commit comments

Comments
 (0)
0