8000 Revert "hparams : support models for which all layers use SWA (#13682)" · Nexesenex/croco.cpp@15c7f77 · GitHub
[go: up one dir, main page]

Skip to content

Commit 15c7f77

Browse files
committed
Revert "hparams : support models for which all layers use SWA (ggml-org#13682)"
This reverts commit 8e186ef.
1 parent 5e3336e commit 15c7f77

File tree

2 files changed

+2
-13
lines changed

2 files changed

+2
-13
lines changed

src/llama-hparams.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ uint32_t llama_hparams::n_embd_v_s() const {
7272

7373
bool llama_hparams::is_swa(uint32_t il) const {
7474
if (il < n_layer) {
75-
return n_swa_pattern == 0 || (il % n_swa_pattern < (n_swa_pattern - 1));
75+
return n_swa > 0 && n_swa_pattern > 0 && il % n_swa_pattern < (n_swa_pattern - 1);
7676
}
7777

7878
GGML_ABORT("fatal error");

src/llama-hparams.h

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -104,18 +104,7 @@ struct llama_hparams {
104104
llama_swa_type swa_type = LLAMA_SWA_TYPE_NONE;
105105

106106
uint32_t n_swa = 0; // the size of the sliding window (0 - no SWA)
107-
uint32_t n_swa_pattern = 1; // this value n means that every nth layer is dense (i.e. non-SWA)
108-
// by default n == 1, all layers are dense
109-
// note that if n_swa_pattern == 0, all layers are SWA
110-
// example: n_swa_pattern = 3
111-
// il == 0: swa
112-
// il == 1: swa
113-
// il == 2: dense
114-
// il == 3: swa
115-
// il == 4: swa
116-
// il == 5: dense
117-
// il == 6: swa
118-
// etc ...
107+
uint32_t n_swa_pattern = 1; // by default, all layers use non-sliding-window attention
119108

120109
// for State Space Models
121110
uint32_t ssm_d_conv = 0;

0 commit comments

Comments
 (0)
0