flypythoncom
diff --git a/‎modules/layers/decoders.py
Lines changed: 47 additions & 0 deletions b/‎modules/layers/decoders.py
Lines changed: 47 additions & 0 deletions
diff --git a/‎modules/layers/ncrf.py
Lines changed: 2 additions & 5 deletions b/‎modules/layers/ncrf.py
Lines changed: 2 additions & 5 deletions
diff --git a/‎modules/models/bert_models.py
Lines changed: 36 additions & 0 deletions b/‎modules/models/bert_models.py
Lines changed: 36 additions & 0 deletions
@@ -4,6 +4,7 @@
 from torch import nn
 from .layers import Linears, MultiHeadAttention
 from .crf import CRF
+from .ncrf import NCRF
 
 
 class CRFDecoder(nn.Module):
@@ -555,3 +556,49 @@ def create(cls, label_size, intent_size,
         return cls(label_size=label_size, intent_size=intent_size,
                    embedding_dim=embedding_dim, hidden_dim=hidden_dim,
                    rnn_layers=rnn_layers, dropout_p=dropout_p, pad_idx=pad_idx, use_cuda=use_cuda)
+
+
+class AttnNCRFJointDecoder(nn.Module):
+    def __init__(self,
+                 crf, label_size, input_dim, intent_size, input_dropout=0.5,
+                 key_dim=64, val_dim=64, num_heads=3, nbest=8):
+        super(AttnNCRFJointDecoder, self).__init__()
+        self.input_dim = input_dim
+        self.attn = MultiHeadAttention(key_dim, val_dim, input_dim, num_heads, input_dropout)
+        self.linear = Linears(in_features=input_dim,
+                              out_features=label_size,
+                              hiddens=[input_dim // 2])
+        self.crf = crf
+        self.label_size = label_size
+        self.intent_size = intent_size
+        self.intent_out = PoolingLinearClassifier(input_dim, intent_size, input_dropout)
+        self.intent_loss = nn.CrossEntropyLoss()
+        self.nbest = nbest
+
+    def forward_model(self, inputs, labels_mask=None):
+        batch_size, seq_len, input_dim = inputs.size()
+        inputs, hidden = self.attn(inputs, inputs, inputs, labels_mask)
+        intent_output = self.intent_out(inputs)
+        output = inputs.contiguous().view(-1, self.input_dim)
+        # Fully-connected layer
+        output = self.linear.forward(output)
+        output = output.view(batch_size, seq_len, self.label_size)
+        return output, intent_output
+
+    def forward(self, inputs, labels_mask):
+        self.eval()
+        logits, intent_output = self.forward_model(inputs)
+        _, preds = self.crf._viterbi_decode_nbest(logits, labels_mask, self.nbest)
+        self.train()
+        return preds, intent_output.argmax(-1)
+
+    def score(self, inputs, labels_mask, labels, cls_ids):
+        logits, intent_output = self.forward_model(inputs)
+        crf_score = self.crf.neg_log_likelihood_loss(logits, labels_mask, labels) / logits.shape[0]
+        return crf_score + self.intent_loss(intent_output, cls_ids)
+
+    @classmethod
+    def create(cls, label_size, input_dim, intent_size, input_dropout=0.5, key_dim=64,
+               val_dim=64, num_heads=3, use_cuda=True, nbest=8):
+        return cls(NCRF(label_size + 2, use_cuda), label_size, input_dim, intent_size, input_dropout,
+                   key_dim, val_dim, num_heads, nbest)
@@ -26,10 +26,11 @@ def log_sum_exp(vec, m_size):
     max_score = torch.gather(vec, 1, idx.view(-1, 1, m_size)).view(-1, 1, m_size)  # B * M
     return max_score.view(-1, m_size) + torch.log(torch.sum(torch.exp(vec - max_score.expand_as(vec)), 1)).view(-1, m_size)  # B * M
 
+
 class NCRF(nn.Module):
 
     def __init__(self, tagset_size, gpu):
-        super(CRF, self).__init__()
+        super(NCRF, self).__init__()
         print("build CRF...")
         self.gpu = gpu
         # Matrix of transition parameters.  Entry i,j is the score of transitioning *to* i *from* j.
@@ -101,7 +102,6 @@ def _calculate_PZ(self, feats, mask):
         final_partition = cur_partition[:, STOP_TAG]
         return final_partition.sum(), scores
 
-
     def _viterbi_decode(self, feats, mask):
         """
             input:
@@ -196,13 +196,10 @@ def _viterbi_decode(self, feats, mask):
         decode_idx = decode_idx.transpose(1,0)
         return path_score, decode_idx
 
-
-
     def forward(self, feats):
         path_score, best_path = self._viterbi_decode(feats)
         return path_score, best_path
 
-
     def _score_sentence(self, scores, mask, tags):
         """
             input:
 
@@ -283,3 +283,39 @@ def create(cls,
             label_size, intent_size, dec_embedding_dim, dec_hidden_dim,
             dec_rnn_layers, input_dropout, pad_idx, use_cuda)
         return cls(encoder, decoder, use_cuda)
+
+
+class BertBiLSTMAttnNCRFJoint(NerModel):
+
+    def forward(self, batch):
+        output, _ = self.encoder(batch)
+        return self.decoder(output, batch[-2])
+
+    def score(self, batch):
+        output, _ = self.encoder(batch)
+        return self.decoder.score(output, batch[-2], batch[-1], batch[-3])
+
+    @classmethod
+    def create(cls,
+               label_size, intent_size,
+               # BertEmbedder params
+               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
+               freeze=True,
+               # BertBiLSTMEncoder params
+               enc_hidden_dim=128, rnn_layers=1,
+               # AttnCRFDecoder params
+               key_dim=64, val_dim=64, num_heads=3,
+               input_dropout=0.5,
+               # Global params
+               use_cuda=True,
+               # Meta
+               meta_dim=None):
+        embedder = BertEmbedder.create(
+            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
+        if meta_dim is None:
+            encoder = BertBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
+        else:
+            encoder = BertMetaBiLSTMEncoder.create(embedder, meta_dim, enc_hidden_dim, rnn_layers, use_cuda)
+        decoder = AttnNCRFJointDecoder.create(
+            label_size, encoder.output_dim, intent_size, input_dropout, key_dim, val_dim, num_heads, use_cuda)
+        return cls(encoder, decoder, use_cuda)