10000
We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 056eb74 commit 91a8ee6Copy full SHA for 91a8ee6
src/llama-graph.cpp
@@ -659,6 +659,28 @@ ggml_tensor * llm_graph_context::build_ffn(
659
cur = ggml_mul(ctx0, x0, x1);
660
cb(cur, "ffn_mul", il);
661
} break;
662
+ case LLM_FFN_GEGLU:
663
+ {
664
+ // Split into two equal parts
665
+ int64_t split_point = cur->ne[0] / 2;
666
+ ggml_tensor * output_ffn_up = ggml_cont(ctx0, ggml_view_2d(
667
+ ctx0, cur, split_point,
668
+ cur->ne[1], cur->nb[1], 0
669
+ ));
670
+ ggml_tensor * output_ffn_gate = ggml_cont(ctx0, ggml_view_2d(
671
672
+ cur->ne[1], cur->nb[1],
673
+ split_point * ggml_element_size(cur)
674
675
+
676
+ // Apply GELU activation function to the first part
677
+ output_ffn_up = ggml_gelu(ctx0, output_ffn_up);
678
+ cb(output_ffn_up, "ffn_gelu", il);
679
680
+ // Element-wise multiplication between the activated part and the gate part
681
+ cur = ggml_mul(ctx0, output_ffn_up, output_ffn_gate);
682
+ cb(cur, "ffn_geglu", il);
683
+ } break;
684
}
685
686
if (gate && type_gate == LLM_FFN_PAR) {
src/llama-graph.h
@@ -36,6 +36,7 @@ enum llm_ffn_op_type {
36
LLM_FFN_RELU,
37
LLM_FFN_RELU_SQR,
38
LLM_FFN_SWIGLU,
39
+ LLM_FFN_GEGLU,
40
};
41
42
enum llm_ffn_gate_type {
0 commit comments