8000 [falcon] Fix Falcon for rw-1b model by akawrykow · Pull Request #2887 · ggml-org/llama.cpp · GitHub
[go: up one dir, main page]

Skip to content

[falcon] Fix Falcon for rw-1b model #2887

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
add bias
  • Loading branch information
akawrykow committed Aug 30, 2023
commit bc84ba39647f9d971512947cd09cf8f573ce930a
21 changes: 21 additions & 0 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2675,6 +2675,11 @@ static struct ggml_cgraph * llm_build_falcon(
cur = ggml_mul_mat(ctx0, model.layers[il].wqkv, cur);
offload_func_kq(cur);

if (model.layers[il].wqkv_b) { // Falcon-RW-1B
cur = ggml_add(ctx0, cur, model.layers[il].wqkv_b);
offload_func(cur);
}

// Note that the strides for Kcur, Vcur are set up so that the
// resulting views are misaligned with the tensor's storage
// (by applying the K/V offset we shift the tensor's original
Expand Down Expand Up @@ -2786,6 +2791,12 @@ static struct ggml_cgraph * llm_build_falcon(

cur = ggml_mul_mat(ctx0, model.layers[il].wo, cur);
offload_func(cur);

if (model.layers[il].wo_b) { // Falcon-RW-1B
cur = ggml_add(ctx0, cur, model.layers[il].wo_b);
offload_func(cur);
}

ggml_set_name(cur, "result_wo");
}

Expand All @@ -2798,10 +2809,20 @@ static struct ggml_cgraph * llm_build_falcon(
cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF);
offload_func(cur);

if (model.layers[il].w3_b) { // Falcon-RW-1B
cur = ggml_add(ctx0, cur, model.layers[il].w3_b);
offload_func(cur);
}

cur = ggml_gelu(ctx0, cur);
offload_func(cur);
cur = ggml_mul_mat(ctx0, model.layers[il].w2, cur);
offload_func(cur);

if (model.layers[il].w2_b) { // Falcon-RW-1B
cur = ggml_add(ctx0, cur, model.layers[il].w2_b);
offload_func(cur);
}
}

cur = ggml_add(ctx0, cur, attn_out);
Expand Down
0