10000 add ncrf model · flypythoncom/ner-bert@ae638ae · GitHub
[go: up one dir, main page]

Skip to content

Commit ae638ae

Browse files
committed
add ncrf model
1 parent c8d50e5 commit ae638ae

File tree

3 files changed

+85
-5
lines changed

3 files changed

+85
-5
lines changed

modules/layers/decoders.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from torch import nn
55
from .layers import Linears, MultiHeadAttention
66
from .crf import CRF
7+
from .ncrf import NCRF
78

89

910
class CRFDecoder(nn.Module):
@@ -555,3 +556,49 @@ def create(cls, label_size, intent_size,
555556
return cls(label_size=label_size, intent_size=intent_size,
556557
embedding_dim=embedding_dim, hidden_dim=hidden_dim,
557558
rnn_layers=rnn_layers, dropout_p=dropout_p, pad_idx=pad_idx, use_cuda=use_cuda)
559+
560+
561+
class AttnNCRFJointDecoder(nn.Module):
562+
def __init__(self,
563+
crf, label_size, input_dim, intent_size, input_dropout=0.5,
564+
key_dim=64, val_dim=64, num_heads=3, nbest=8):
565+
super(AttnNCRFJointDecoder, self).__init__()
566+
self.input_dim = input_dim
567+
self.attn = MultiHeadAttention(key_dim, val_dim, input_dim, num_heads, input_dropout)
568+
self.linear = Linears(in_features=input_dim,
569+
out_features=label_size,
570+
hiddens=[input_dim // 2])
571+
self.crf = crf
572+
self.label_size = label_size
573+
self.intent_size = intent_size
574+
self.intent_out = PoolingLinearClassifier(input_dim, intent_size, input_dropout)
575+
self.intent_loss = nn.CrossEntropyLoss()
576+
self.nbest = nbest
577+
578+
def forward_model(self, inputs, labels_mask=None):
579+
batch_size, seq_len, input_dim = inputs.size()
580+
inputs, hidden = self.attn(inputs, inputs, inputs, labels_mask)
581+
intent_output = self.intent_out(inputs)
582+
output = inputs.contiguous().view(-1, self.input_dim)
583+
# Fully-connected layer
584+
output = self.linear.forward(output)
585+
output = output.view(batch_size, seq_len, self.label_size)
586+
return output, intent_output
587+
588+
def forward(self, inputs, labels_mask):
589+
self.eval()
590+
logits, intent_output = self.forward_model(inputs)
591+
_, preds = self.crf._viterbi_decode_nbest(logits, labels_mask, self.nbest)
592+
self.train()
593+
return preds, intent_output.argmax(-1)
594+
595+
def score(self, inputs, labels_mask, labels, cls_ids):
596+
logits, intent_output = self.forward_model(inputs)
597+
crf_score = self.crf.neg_log_likelihood_loss(logits, labels_mask, labels) / logits.shape[0]
598+
return crf_score + self.intent_loss(intent_output, cls_ids)
599+
600+
@classmethod
601+
def create(cls, label_size, input_dim, intent_size, input_dropout=0.5, key_dim=64,
602+
val_dim=64, num_heads=3, use_cuda=True, nbest=8):
603+
return cls(NCRF(label_size + 2, use_cuda), label_size, input_dim, intent_size, input_dropout,
604+
key_dim, val_dim, num_heads, nbest)

modules/layers/ncrf.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@ def log_sum_exp(vec, m_size):
2626
max_score = torch.gather(vec, 1, idx.view(-1, 1, m_size)).view(-1, 1, m_size) # B * M
2727
return max_score.view(-1, m_size) + torch.log(torch.sum(torch.exp(vec - max_score.expand_as(vec)), 1)).view(-1, m_size) # B * M
2828

29+
2930
class NCRF(nn.Module):
3031

3132
def __init__(self, tagset_size, gpu):
32-
super(CRF, self).__init__()
33+
super(NCRF, self).__init__()
3334
print("build CRF...")
3435
self.gpu = gpu
3536
# Matrix of transition parameters. Entry i,j is the score of transitioning *to* i *from* j.
@@ -101,7 +102,6 @@ def _calculate_PZ(self, feats, mask):
101102
final_partition = cur_partition[:, STOP_TAG]
102103
return final_partition.sum(), scores
103104

104-
105105
def _viterbi_decode(self, feats, mask):
106106
"""
107107
input:
@@ -196,13 +196,10 @@ def _viterbi_decode(self, feats, mask):
196196
decode_idx = decode_idx.transpose(1,0)
197197
return path_score, decode_idx
198198

199-
200-
201199
def forward(self, feats):
202200
path_score, best_path = self._viterbi_decode(feats)
203201
return path_score, best_path
204202

205-
206203
def _score_sentence(self, scores, mask, tags):
207204
"""
208205
input:

modules/models/bert_models.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,3 +283,39 @@ def create(cls,
283283
label_size, intent_size, dec_embedding_dim, dec_hidden_dim,
284284
dec_rnn_layers, input_dropout, pad_idx, use_cuda)
285285
return cls(encoder, decoder, use_cuda)
286+
287+
288+
class BertBiLSTMAttnNCRFJoint(NerModel):
289+
290+
def forward(self, batch):
291+
output, _ = self.encoder(batch)
292+
return self.decoder(output, batch[-2])
293+
294+
def score(self, batch):
295+
output, _ = self.encoder(batch)
296+
return self.decoder.score(output, batch[-2], batch[-1], batch[-3])
297+
298+
@classmethod
299+
def create(cls,
300+
label_size, intent_size,
301+
# BertEmbedder params
302+
bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
303+
freeze=True,
304+
# BertBiLSTMEncoder params
305+
enc_hidden_dim=128, rnn_layers=1,
306+
# AttnCRFDecoder params
307+
key_dim=64, val_dim=64, num_heads=3,
308+
input_dropout=0.5,
309+
# Global params
310+
use_cuda=True,
311+
# Meta
312+
meta_dim=None):
313+
embedder = BertEmbedder.create(
314+
bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
315+
if meta_dim is None:
316+
encoder = BertBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
317+
else:
318+
encoder = BertMetaBiLSTMEncoder.create(embedder, meta_dim, enc_hidden_dim, rnn_layers, use_cuda)
319+
decoder = AttnNCRFJointDecoder.create(
320+
label_size, encoder.output_dim, intent_size, input_dropout, key_dim, val_dim, num_heads, use_cuda)
321+
return cls(encoder, decoder, use_cuda)

0 commit comments

Comments
 ()
0