feat: Update llama.cpp

abetlen · abetlen · commit 5585f8afe1ae · 2024-12-08T22:40:19.000-05:00
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -344,9 +344,9 @@
 #     LLAMA_FTYPE_MOSTLY_IQ4_XS        = 30, // except 1d tensors
 #     LLAMA_FTYPE_MOSTLY_IQ1_M         = 31, // except 1d tensors
 #     LLAMA_FTYPE_MOSTLY_BF16          = 32, // except 1d tensors
-#     LLAMA_FTYPE_MOSTLY_Q4_0_4_4      = 33, // except 1d tensors
-#     LLAMA_FTYPE_MOSTLY_Q4_0_4_8      = 34, // except 1d tensors
-#     LLAMA_FTYPE_MOSTLY_Q4_0_8_8      = 35, // except 1d tensors
+#     //LLAMA_FTYPE_MOSTLY_Q4_0_4_4      = 33, // removed from gguf files, use Q4_0 and runtime repack
+#     //LLAMA_FTYPE_MOSTLY_Q4_0_4_8      = 34, // removed from gguf files, use Q4_0 and runtime repack
+#     //LLAMA_FTYPE_MOSTLY_Q4_0_8_8      = 35, // removed from gguf files, use Q4_0 and runtime repack
 #     LLAMA_FTYPE_MOSTLY_TQ1_0         = 36, // except 1d tensors
 #     LLAMA_FTYPE_MOSTLY_TQ2_0         = 37, // except 1d tensors
 #
@@ -382,9 +382,9 @@
 LLAMA_FTYPE_MOSTLY_IQ4_XS = 30
 LLAMA_FTYPE_MOSTLY_IQ1_M = 31
 LLAMA_FTYPE_MOSTLY_BF16 = 32
-LLAMA_FTYPE_MOSTLY_Q4_0_4_4 = 33
-LLAMA_FTYPE_MOSTLY_Q4_0_4_8 = 34
-LLAMA_FTYPE_MOSTLY_Q4_0_8_8 = 35
+# LLAMA_FTYPE_MOSTLY_Q4_0_4_4 = 33
+# LLAMA_FTYPE_MOSTLY_Q4_0_4_8 = 34
+# LLAMA_FTYPE_MOSTLY_Q4_0_8_8 = 35
 LLAMA_FTYPE_MOSTLY_TQ1_0 = 36
 LLAMA_FTYPE_MOSTLY_TQ2_0 = 37
 LLAMA_FTYPE_GUESSED = 1024
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 7736837d62efed1dbebfe579472fca041eda12d6
+Subproject commit ce8784bdb153ff7794dde5a50b0ebfa51baa6171