8000 llama : add RobertaForSequenceClassification reranker support (#13875) · ggml-org/llama.cpp@6385b84 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6385b84

Browse files
authored
llama : add RobertaForSequenceClassification reranker support (#13875)
1 parent 1b8fb81 commit 6385b84

File tree

6 files changed

+24
-8
lines changed

6 files changed

+24
-8
lines changed

convert_hf_to_gguf.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3695,6 +3695,10 @@ def set_gguf_parameters(self):
36953695
self.gguf_writer.add_causal_attention(False)
36963696
self._try_set_pooling_type()
36973697

3698+
if cls_out_labels := self.hparams.get("id2label"):
3699+
key_name = gguf.Keys.Classifier.OUTPUT_LABELS.format(arch = gguf.MODEL_ARCH_NAMES[self.model_arch])
3700+
self.gguf_writer.add_array(key_name, [v for k, v in sorted(cls_out_labels.items())])
3701+
36983702
def set_vocab(self):
36993703
tokens, toktypes, tokpre = self.get_vocab_base()
37003704
self.vocab_size = len(tokens)
@@ -3745,12 +3749,13 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
37453749
if name.startswith("cls.seq_relationship"):
37463750
return []
37473751

3748-
# For BertForSequenceClassification (direct projection layer)
3749-
if name == "classifier.weight":
3750-
name = "classifier.out_proj.weight"
3752+
if self.hparams.get("id2label"):
3753+
# For BertForSequenceClassification (direct projection layer)
3754+
if name == "classifier.weight":
3755+
name = "classifier.out_proj.weight"
37513756

3752-
if name == "classifier.bias":
3753-
name = "classifier.out_proj.bias"
3757+
if name == "classifier.bias":
3758+
name = "classifier.out_proj.bias"
37543759

37553760
return [(self.map_tensor_name(name), data_torch)]
37563761

@@ -3846,7 +3851,7 @@ def _xlmroberta_set_vocab(self) -> None:
38463851
self.gguf_writer.add_add_eos_token(True)
38473852

38483853

3849-
@ModelBase.register("RobertaModel")
3854+
@ModelBase.register("RobertaModel", "RobertaForSequenceClassification")
38503855
class RobertaModel(BertModel):
38513856
model_arch = gguf.MODEL_ARCH.BERT
38523857

gguf-py/gguf/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ class ConvNext:
177177
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
178178
BLOCK_COUNT = "{arch}.convnext.block_count"
179179

180+
class Classifier:
181+
OUTPUT_LABELS = "{arch}.classifier.output_labels"
182+
180183
class Tokenizer:
181184
MODEL = "tokenizer.ggml.model"
182185
PRE = "tokenizer.ggml.pre"

src/llama-arch.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
174174
{ LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
175175
{ LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
176176

177+
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
178+
177179
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
178180
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
179181
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },

src/llama-arch.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,8 @@ enum llm_kv {
213213
LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
214214
LLM_KV_CONVNEXT_BLOCK_COUNT,
215215

216+
LLM_KV_CLASSIFIER_OUTPUT_LABELS,
217+
216218
// deprecated:
217219
LLM_KV_TOKENIZER_PREFIX_ID,
218220
LLM_KV_TOKENIZER_SUFFIX_ID,

src/llama-hparams.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ struct llama_hparams {
131131
bool attn_soft_cap = false;
132132
bool use_kq_norm = true;
133133

134+
// for Classifiers
135+
uint32_t n_cls_out = 1;
136+
134137
// llama4
135138
uint32_t n_moe_layer_step = 0;
136139
uint32_t n_no_rope_layer_step = 4;

src/llama-model.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
683683
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
684684
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.c 6D4E ausal_attn);
685685
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
686+
ml.get_arr_n(LLM_KV_CLASSIFIER_OUTPUT_LABELS, hparams.n_cls_out, false);
686687

687688
switch (hparams.n_layer) {
688689
case 3:
@@ -2121,8 +2122,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
21212122
cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
21222123
cls_b = create_tensor(tn(LLM_TENSOR_CLS, "bias"), {n_embd}, TENSOR_NOT_REQUIRED);
21232124

2124-
cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, 1}, TENSOR_NOT_REQUIRED);
2125-
cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {1}, TENSOR_NOT_REQUIRED);
2125+
cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
2126+
cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
21262127
}
21272128

21282129
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);

0 commit comments

Comments
 (0)
0