small fixes

ggml-org · CISC · May 30, 2025 · May 30, 2025 · May 30, 2025 · May 30, 2025
commit 2b2cf6db8f2971dfc60d920eb520d6a1c68a6669
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -3920,6 +3920,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
         if name.startswith("distilbert."):
             name = name[11:]
 
+        # These layers act as MLM head, so we don't need them
         if name.startswith("vocab_"):
             return []
 

diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -2116,9 +2116,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
             case LLM_ARCH_NOMIC_BERT_MOE:
                 {
                     tok_embd     = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD,  "weight"), {n_embd, n_vocab}, 0);
-                    if (arch != LLM_ARCH_DISTIL_BERT) {
-                        type_embd    = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, 0);
-                    }
+                    type_embd    = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, TENSOR_NOT_REQUIRED);
 
                     if (arch == LLM_ARCH_BERT || arch == LLM_ARCH_DISTIL_BERT) {
                         pos_embd = create_tensor(tn(LLM_TENSOR_POS_EMBD,    "weight"), {n_embd, n_ctx_train}, 0);