@@ -58,193 +58,77 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi
58
58
59
59
| Model Name | FP16 | Q8_0 | Q4_0 |
60
60
| :----------------------------| :-----:| :----:| :----:|
61
- | AquilaChat2-7B | √ | √ | √ |
62
- | Baichuan-7b | √ | √ | √ |
63
- | Baichuan2-7B-Chat | √ | √ | √ |
64
- | bitnet_b1_58-large | √ | √ | √ |
65
- | bloom-560m | √ | x | √ |
66
- | bloomz-alpaca-560m | √ | x | √ |
67
- | c4ai-command-r-35B-v01 | x | x | x |
68
- | chatglm3-6B | x | x | x |
69
- | chinese-alpaca-2-1.3b | √ | √ | √ |
70
- | CodeShell-7B | √ | √ | √ |
71
- | deepseek-ai_deepseek-coder-1.3B-base | x | x | x |
72
- | deepseek-ai_DeepSeek-V2-Lite | x | x | x |
73
- | deepseek-coder-6.7B-instruct | x | x | x |
74
- | DeepSeek-V2-Lite-64x1.5B | x | x | x |
75
- | falcon-7b-instruct | √ | √ | √ |
76
- | flan-t5-large | √ | √ | √ |
77
- | gemma-2-9b-it | √ | √ | √ |
78
- | glm-4-9B | x | x | x |
79
- | gpt2 | √ | √ | √ |
80
- | Gpt2-163M | √ | √ | √ |
81
- | granite-3B-code-instruct | √ | √ | √ |
61
+ | Llama-2 | √ | √ | √ |
62
+ | Llama-3 | √ | √ | √ |
63
+ | Mistral-7B | √ | √ | √ |
64
+ | Mistral MOE | x | x | x |
65
+ | DBRX | x | x | x |
66
+ | Falcon | √ | √ | √ |
67
+ | Chinese LLaMA/Alpaca | √ | √ | √ |
68
+ | Vigogne(French) | √ | √ | √ |
69
+ | BERT | √ | √ | √ |
70
+ | Koala | √ | √ | √ |
71
+ | Baichuan | √ | √ | √ |
72
+ | Aquila 1 & 2 | √ | √ | √ |
73
+ | Starcoder models | √ | √ | √ |
74
+ | Refact | √ | √ | √ |
75
+ | MPT | √ | √ | √ |
76
+ | Bloom | √ | √ | √ |
77
+ | Yi models | √ | √ | √ |
78
+ | stablelm models | √ | √ | √ |
79
+ | DeepSeek models | x | x | x |
80
+ | Qwen models | √ | √
6D40
| √ |
81
+ | PLaMo-13B | √ | √ | √ |
82
+ | Phi models | √ | √ | √ |
83
+ | PhiMoE | x | x | x |
84
+ | GPT-2 | √ | √ | √ |
85
+ | Orion | √ | √ | √ |
86
+ | InternlLM2 | √ | √ | √ |
87
+ | CodeShell | √ | √ | √ |
88
+ | Gemma | √ | √ | √ |
89
+ | Mamba | √ | √ | √ |
90
+ | Xverse | √ | √ | √ |
91
+ | command-r models | √ | √ | √ |
92
+ | Grok-1 | x | x | x |
93
+ | SEA-LION | √ | √ | √ |
82
94
| GritLM-7B | √ | √ | √ |
83
- | internlm2_5-7b-chat | √ | √ | √ |
84
- | koala-7B-HF | √ | √ | √ |
85
- | Llama-2-7b-chat-hf | √ | √ | √ |
86
- | Llama-3-Smaug-8B | √ | √ | √ |
87
- | Llama2-Chinese-7b-Chat | √ | √ | √ |
88
- | Llama3-8B | √ | √ | √ |
89
- | Llama3-8b-chinese | √ | √ | √ |
90
- | mamba-130m-hf | √ | √ | √ |
91
- | Mistral-7B-Instruct-v0.2 | √ | √ | √ |
92
- | Mixtral-8x7B-Instruct-v0.1 | x | √ | √ |
93
- | mpt-7B | √ | √ | √ |
94
- | OLMo-1B-hf | √ | √ | √ |
95
- | OpenELM-3B-Instruct | √ | √ | √ |
96
- | Orion-14b-base | √ | √ | √ |
97
- | phi1 | x | x | x |
98
- | phi2 | x | x | x |
99
- | Phi-3-mini-4k-instruct | √ | √ | √ |
100
- | plamo-13b | √ | √ | √ |
101
- | pythia-70M | x | x | x |
102
- | Qwen-7B | √ | √ | √ |
103
- | Qwen2-1.5B-Instruct | √ | x | √ |
104
- | Refact-1_6B-fim | √ | √ | √ |
105
- | SmolLM-135M | √ | √ | √ |
106
- | stablelm-zephyr | x | x | x |
107
- | stablelm-2-zephyr-1_6b | x | x | x |
108
- | starcoderbase-1b | √ | √ | √ |
109
- | starcoder2-3b | √ | √ | √ |
110
- | vigogne-7b-chat | √ | √ | √ |
111
- | xverse-7b-chat | √ | √ | √ |
112
- | Yi-6b-Chat | √ | √ | √ |
113
- | snowflake-arctic-embed | √ | × | × |
114
- | all-minilm | √ | × | × |
115
- | granite-embedding | √ | × | × |
116
- | smollm | √ | √ | √ |
117
- | smollm2 | √ | √ | √ |
118
- | nomic-embed-text | √ | × | × |
119
- | qwen2 | √ | √ | √ |
120
- | reader-lm | √ | √ | √ |
121
- | qwen2.5 | √ | √ | √ |
122
- | qwen2.5-coder | √ | √ | √ |
123
- | qwen | √ | √ | √ |
124
- | paraphrase-multilingual | √ | × | × |
125
- | tinydolphin | √ | √ | √ |
126
- | tinyllama | √ | √ | √ |
127
- | mxbai-embed-large | √ | × | × |
128
- | bge-large | √ | × | × |
129
- | starcoder | √ | √ | √ |
130
- | granite3-moe | √ | √ | √ |
131
- | llama3 | √ | √ | √ |
132
- | deepseek-coder | √ | √ | √ |
133
- | granite3 | √ | √ | √ |
134
- | moondream | √ | √ | √ |
135
- | yi-coder | √ | √ | √ |
136
- | llama-guard3 | √ | √ | √ |
137
- | qwen2-math | √ | √ | √ |
138
- | stablelm2 | × | √ | √ |
139
- | sailor2 | √ | √ | × |
140
- | gemma3 | √ | √ | × |
141
- | internlm2 | √ | √ | √ |
142
- | bge-m3 | √ | × | × |
143
- | granite3-dense | √ | √ | √ |
144
- | codegemma | √ | √ | √ |
145
- | phi | √ | √ | √ |
146
- | dolphin-phi | × | √ | √ |
147
- | stable-code | √ | √ | √ |
148
- | stablelm-zephyr | √ | √ | √ |
149
- | gemma2 | √ | √ | √ |
150
- | shieldgemma | × | √ | √ |
151
- | gemma | √ | √ | √ |
152
- | starcoder2 | √ | √ | √ |
153
- | falcon3 | √ | √ | × |
154
- | deepseek-r1 | √ | √ | × |
155
- | deepscaler | √ | √ | × |
156
- | hermes3 | √ | √ | √ |
157
- | orca-mini | √ | √ | √ |
158
- | granite-code | √ | √ | √ |
159
- | opencoder | √ | √ | × |
160
- | nuextract | √ | √ | √ |
161
- | phi3 | √ | √ | √ |
162
- | phi3.5 | √ | √ | √ |
163
- | nemotron-mini | √ | √ | √ |
164
- | granite3-guardian | √ | √ | × |
165
- | exaone3.5 | √ | √ | × |
166
- | exaone-deep | √ | √ | × |
167
- | yi | √ | √ | √ |
168
- | smallthinker | √ | √ | × |
169
- | yarn-llama2 | √ | √ | √ |
170
- | xwinlm | √ | √ | √ |
171
- | wizard-vicuna-uncensored | √ | √ | √ |
172
- | vicuna | √ | √ | √ |
173
- | stable-beluga | √ | √ | √ |
174
- | nous-hermes | √ | √ | √ |
175
- | medllama2 | √ | √ | √ |
176
- | llama2-uncensored | √ | √ | √ |
177
- | meditron | √ | √ | √ |
178
- | llava | √ | √ | √ |
179
- | magicoder | √ | √ | √ |
180
- | wizardlm | √ | √ | √ |
181
- | wizard-math | √ | √ | √ |
182
- | wizardcoder | √ | √ | √ |
183
- | orca2 | √ | √ | √ |
184
- | codellama | √ | √ | √ |
185
- | duckdb-nsql | √ | √ | √ |
186
- | llama2 | √ | √ | √ |
187
- | deepseek-llm | √ | √ | √ |
188
- | phi4-mini | √ | √ | × |
189
- | samantha-mistral | × | √ | √ |
190
- | yarn-mistral | √ | √ | √ |
191
- | sqlcoder | √ | √ | √ |
192
- | neural-chat | √ | √ | √ |
193
- | bakllava | √ | √ | √ |
194
- | wizardlm2 | √ | √ | √ |
195
- | dolphin-mistral | √ | √ | √ |
196
- | mistral-openorca | √ | √ | √ |
197
- | openhermes | √ | √ | √ |
198
- | mistrallite | √ | √ | √ |
199
- | notus | √ | √ | √ |
200
- | zephyr | √ | √ | √ |
201
- | mistral | √ | √ | √ |
202
- | openchat | √ | √ | √ |
203
- | mathstral | √ | √ | √ |
204
- | codeqwen | √ | √ | √ |
205
- | falcon | √ | √ | √ |
206
- | dolphincoder | √ | √ | √ |
207
- | minicpm-v | √ | √ | √ |
208
- | bespoke-minicheck | √ | √ | √ |
209
- | llama3-chatqa | √ | √ | √ |
210
- | llama3-gradient | √ | √ | √ |
211
- | dolphin-llama3 | √ | √ | √ |
212
- | llama3-groq-tool-use | × | √ | √ |
213
- | llama-pro | √ | √ | √ |
214
- | aya | × | √ | √ |
215
- | aya-expanse | √ | √ | √ |
216
- | codegeex4 | × | √ | √ |
217
- | glm4 | √ | √ | √ |
218
- | solar | √ | √ | √ |
219
- | nous-hermes2 | √ | √ | √ |
220
- | falcon2 | √ | √ | √ |
221
- | mistral-nemo | √ | √ | √ |
222
- | llama2-chinese | √ | √ | × |
223
- | wizard-vicuna | √ | √ | √ |
224
- | codeup | √ | √ | √ |
225
- | open-orca-platypus2 | √ | √ | √ |
226
- | nexusraven | √ | √ | √ |
227
- | everythinglm | √ | √ | √ |
228
- | llava-phi3 | √ | × | × |
229
- | starling-lm | √ | √ | × |
230
- | olmo2 | √ | √ | × |
231
- | marco-o1 | × | √ | × |
232
- | openthinker | √ | â
1241
ˆš | × |
233
- | dolphin3 | √ | √ | × |
234
- | tulu3 | √ | √ | × |
235
- | command-r7b | √ | √ | × |
236
- | command-r7b-arabic | √ | √ | × |
237
- | deepseek-v2 | × | √ | √ |
238
- | deepseek-coder-v2 | × | √ | √ |
239
- | codestral | × | √ | √ |
240
- | mistral-small | × | √ | √ |
241
- | wizardlm-uncensored | √ | √ | × |
242
- | phi4 | × | √ | × |
243
- | llava-llama3 | √ | × | × |
244
- | command-r | × | × | √ |
245
- | phind-codellama | × | × | √ |
246
- | codebooga | × | × | √ |
247
- | alfred | × | × | √ |
95
+ | OLMo | √ | √ | √ |
96
+ | OLMo 2 | √ | √ | √ |
97
+ | OLMoE | x | x | x |
98
+ | Granite models | √ | √ | √ |
99
+ | GPT-NeoX + Pythia | x | x | x |
100
+ | Snowflake-Arctic MoE | x | × | × |
101
+ | Smaug | √ | √ | √ |
102
+ | Poro 34B | √ | √ | √ |
103
+ | Bitnet b1.58 models | √ | √ | √ |
104
+ | Flan-T5 | √ | √ | √ |
105
+ | Open Elm models | √ | √ | √ |
106
+ | chatGLM3-6B + ChatGLM4-9b + GLMEdge-1.5b + GLMEdge-4b | √ | √ | √ |
107
+ | GLM-4-0414 | √ | √ | √ |
108
+ | SmolLM | √ | √ | √ |
109
+ | EXAONE-3.0-7.8B-Instruct | √ | √ | √ |
110
+ | FalconMamba Models | √ | √ | √ |
111
+ | Jais Models | x | x | x |
112
+ | Bielik-11B-v2.3 | √ | √ | √ |
113
+ | RWKV-6 | x | x | x |
114
+ | QRWKV-6 | x | x | x |
115
+ | GigaChat-20B-A3B | x | x | x |
116
+ | Trillion-7B-preview | √ | √ | √ |
117
+ | Ling models | √ | √ | √ |
118
+
119
+
120
+ ** Multimodal**
121
+ | LLaVA 1.5 models, LLaVA 1.6 models | √ | √ | √ |
122
+ | BakLLaVA | x | x | x |
123
+ | Obsidian | x | x | x |
124
+ | ShareGPT4V | x | x | x |
125
+ | MobileVLM 1.7B/3B models | x | x | x |
126
+ | Yi-VL | x | x | x |
127
+ | Mini CPM | √ | √ | √ |
128
+ | Moondream | √ | √ | √ |
129
+ | Bunny | x | x | x |
130
+ | GLM-EDGE | x | x | x |
131
+ | Qwen2-VL | √ | √ | √ |
248
132
249
133
250
134
0 commit comments