Limit enable_t_mac to take effect on INT_N only.

ggml-org · QingtaoLi1 · Oct 10, 2024 · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024
commit f84d25dd8fcf706e357b79ceda1437273d9b76ee
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -1798,8 +1798,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
         ]):
             # transform weight into 1/0/-1 (in fp32)
             data_torch = self.weight_quant(data_torch)
-            if self.enable_t_mac:
-                # transform weight into T-MAC I2 format
+            if self.enable_t_mac and self.ftype == gguf.LlamaFileType.MOSTLY_INT_N:
+                # transform weight into T-MAC INT_N format
                 from t_mac.model_utils import preprocess_for_t_mac
                 data = LazyTorchTensor.to_eager(data_torch).numpy()
                 scale = np.max(np.abs(data))