8000
We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 23e1e54 commit 0b73da5Copy full SHA for 0b73da5
include/llama.h
@@ -692,13 +692,14 @@ extern "C" {
692
// This will be applied:
693
// - lazily on next llama_decode()
694
// - explicitly with llama_kv_self_update()
695
+ // TODO: deprecate and always update the cache lazily [TAG: API_KV_NO_DEFRAG]
696
LLAMA_API void llama_kv_self_defrag(struct llama_context * ctx);
697
698
// Check if the context supports KV cache shifting
699
LLAMA_API bool llama_kv_self_can_shift(const struct llama_context * ctx);
700
701
// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
- // TODO: deprecate and always update the cache lazily
702
703
LLAMA_API void llama_kv_self_update(struct llama_context * ctx);
704
705
//
0 commit comments