8000
We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4c32832 commit 259469cCopy full SHA for 259469c
src/llama-graph.cpp
@@ -1287,6 +1287,10 @@ ggml_tensor * llm_graph_context::build_attn(
1287
1288
if (wo) {
1289
cur = build_lora_mm(wo, cur);
1290
+ if (arch == LLM_ARCH_GLM4) {
1291
+ // GLM4 seems to have numerical issues with half-precision accumulators
1292
+ ggml_mul_mat_set_prec(cur, GGML_PREC_F32);
1293
+ }
1294
}
1295
1296
if (wo_b) {
@@ -1367,10 +1371,6 @@ ggml_tensor * llm_graph_context::build_attn(
1367
1371
1368
1372
1369
1373
1370
- if (arch == LLM_ARCH_GLM4) {
- // GLM4 seems to have numerical issues with half-precision accumulators
- ggml_mul_mat_set_prec(cur, GGML_PREC_F32);
- }
1374
1375
1376
0 commit comments