refactor: rename *_is_hybrid -> *_is_hybrid_recurrent

The implementation of the hybrid cache intentionally does not specify the types of the child caches, so there was a naming mismatch with these predicate functions that used "hybrid" to imply "hybrid recurrent." Branch: HybridCache Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
ggml-org · gabe-l-hart · May 20, 2025 · May 20, 2025 · May 27, 2025 · May 23, 2025
commit e6ff93a201a664b775d3406cc46e8dc60d2f1c85
diff --git a/include/llama.h b/include/llama.h
@@ -555,7 +555,7 @@ extern "C" {
     LLAMA_API bool llama_model_is_recurrent(const struct llama_model * model);
 
     // Returns true if the model is hybrid-recurrent (like Jamba, Bamba, etc.)
-    LLAMA_API bool llama_model_is_hybrid(const struct llama_model * model);
+    LLAMA_API bool llama_model_is_hybrid_recurrent(const struct llama_model * model);
 
     // Returns 0 on success
     LLAMA_API uint32_t llama_model_quantize(

diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
@@ -1759,7 +1759,7 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
     }
 }
 
-bool llm_arch_is_hybrid(const llm_arch & arch) {
+bool llm_arch_is_hybrid_recurrent(const llm_arch & arch) {
     // TODO: There are currently no hybrid models! Once there are, this will be
     //  the place to identify them
     switch (arch) {

diff --git a/src/llama-arch.h b/src/llama-arch.h
@@ -438,4 +438,4 @@ llm_arch llm_arch_from_string(const std::string & name);
 const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
 
 bool llm_arch_is_recurrent(const llm_arch& arch);
-bool llm_arch_is_hybrid(const llm_arch& arch);
+bool llm_arch_is_hybrid_recurrent(const llm_arch& arch);
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -13210,7 +13210,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
         // checks
         default:
             {
-                if (llm_arch_is_hybrid(arch)) {
+                if (llm_arch_is_hybrid_recurrent(arch)) {
                     // make vectors of recurrent and non-recurrent layer indices
                     std::vector<size_t> recurrent_layers;
                     std::vector<size_t> unified_layers;
@@ -13859,8 +13859,8 @@ bool llama_model_is_recurrent(const llama_model * model) {
     return llm_arch_is_recurrent(model->arch);
 }
 
-bool llama_model_is_hybrid(const llama_model * model) {
-    return llm_arch_is_hybrid(model->arch);
+bool llama_model_is_hybrid_recurrent(const llama_model * model) {
+    return llm_arch_is_hybrid_recurrent(model->arch);
 }
 
 const std::vector<std::pair<std::string, ggml_tensor *>> & llama_internal_get_tensor_map(const llama_model * model) {