fix bugs

Ubuntu · Ubuntu · commit 730cd1700513 · 2019-01-14T16:14:32.000Z
diff --git a/modules/data/bert_data.py b/modules/data/bert_data.py
@@ -135,6 +135,7 @@ def collate_fn(self, data):
 def get_data(
         df, tokenizer, label2idx=None, max_seq_len=424, pad="<pad>", cls2idx=None,
         is_cls=False, is_meta=False):
+    tqdm_notebook = tqdm
     if label2idx is None:
         label2idx = {pad: 0, '[CLS]': 1, '[SEP]': 2}
     features = []
@@ -173,8 +174,8 @@ def get_data(
         bert_tokens.append("[CLS]")
         bert_labels.append("[CLS]")
         orig_tokens = []
-        orig_tokens.extend(text.split())
-        labels = labels.split()
+        orig_tokens.extend(str(text).split())
+        labels = str(labels).split()
         pad_idx = label2idx[pad]
         assert len(orig_tokens) == len(labels)
         prev_label = ""
diff --git a/modules/train/train.py b/modules/train/train.py
@@ -16,7 +16,8 @@ def train_step(dl, model, optimizer, lr_scheduler=None, clip=None, num_epoch=1):
     model.train()
     epoch_loss = 0
     idx = 0
-    for batch in tqdm_notebook(dl, total=len(dl), leave=False):
+    pr = tqdm_notebook(dl, total=len(dl), leave=False)
+    for batch in pr:
         idx += 1
         model.zero_grad()
         loss = model.score(batch)
@@ -25,7 +26,9 @@ def train_step(dl, model, optimizer, lr_scheduler=None, clip=None, num_epoch=1):
             _ = torch.nn.utils.clip_grad_norm(model.parameters(), clip)
         optimizer.step()
         optimizer.zero_grad()
-        epoch_loss += loss.data.cpu().tolist()
+        loss = loss.data.cpu().tolist()
+        epoch_loss += loss
+        pr.set_description("train loss: {}".format(epoch_loss / idx))
         if lr_scheduler is not None:
             lr_scheduler.step()
         # torch.cuda.empty_cache()
@@ -133,7 +136,7 @@ def predict(dl, model, id2label, id2cls=None):
 class NerLearner(object):
     def __init__(self, model, data, best_model_path, lr=0.001, betas=list([0.8, 0.9]), clip=5,
                  verbose=True, sup_labels=None, t_total=-1, warmup=0.1, weight_decay=0.01):
-        if ipython_info():
+        if ipython_info() or True:
             global tqdm_notebook
             tqdm_notebook = tqdm
         self.model = model
diff --git a/modules/utils/utils.py b/modules/utils/utils.py
@@ -13,7 +13,7 @@ def voting_choicer(tok_map, labels):
     for origin_idx in tok_map:
 
         vote_labels = Counter(
-            ["I_" + l.split("_")[1] if l not in ["[SEP]", "[CLS]"] else "B_O" for l in labels[prev_idx:origin_idx]])
+            ["I_" + l.split("_")[1] if l not in ["[SEP]", "[CLS]"] else "I_O" for l in labels[prev_idx:origin_idx]])
         # vote_labels = Counter(c)
         lb = sorted(list(vote_labels), key=lambda x: vote_labels[x])
         if len(lb):