facebookresearch
diff --git a/‎mmf/configs/zoo/datasets.yaml
+9-2 b/‎mmf/configs/zoo/datasets.yaml
+9-2
diff --git a/‎mmf/configs/zoo/models.yaml
+6-6 b/‎mmf/configs/zoo/models.yaml
+6-6
diff --git a/‎mmf/datasets/builders/stvqa/dataset.py
-1 b/‎mmf/datasets/builders/stvqa/dataset.py
-1
diff --git a/‎mmf/datasets/builders/textvqa/dataset.py
+18-6 b/‎mmf/datasets/builders/textvqa/dataset.py
+18-6
diff --git a/‎mmf/datasets/processors/processors.py
+1-5 b/‎mmf/datasets/processors/processors.py
+1-5
diff --git a/‎tools/scripts/features/extract_features_vmb.py
+3-3 b/‎tools/scripts/features/extract_features_vmb.py
+3-3
@@ -81,12 +81,12 @@ stvqa:
 
 textvqa:
   defaults:
-    version: 0.5.5_2020_04_28
+    version: 0.5.5_2020_06_30
     resources:
       features:
       - url: mmf://datasets/textvqa/defaults/features/features.tar.gz
         file_name: features.tar.gz
-        hashcode: f78f6a8e3c7c9c40c8034303457ebcbd5f2bf999cf8aeeba0f6b6f5403dd7bcf
+        hashcode: 960f69c651f5c6532b4e9d8310597aa8d21c65074768c9702ea822e4b2867a0d
       annotations:
       - url: mmf://datasets/textvqa/defaults/annotations/annotations.tar.gz
         file_name: annotations.tar.gz
@@ -95,6 +95,13 @@ textvqa:
       - url: mmf://datasets/textvqa/defaults/extras.tar.gz
         file_name: extras.tar.gz
         hashcode: 68dc3b4aeffe4dce24ebb5d373baf0c304fbfbec73f0c1550118a85e90286e38
+  caffe2:
+    version: 0.5.5_2020_04_28
+    resources:
+      features:
+      - url: mmf://datasets/textvqa/defaults/features/features.tar.gz
+        file_name: features.tar.gz
+        hashcode: f78f6a8e3c7c9c40c8034303457ebcbd5f2bf999cf8aeeba0f6b6f5403dd7bcf
   ocr_en:
     version: 0.5.5_2020_04_28
     resources:
 
@@ -204,25 +204,25 @@ m4c:
   textvqa:
     with_stvqa:
       zoo_requirements: detectron.vmb_weights
-      version: 1.0_2020_04_29
+      version: 1.0_2020_06_30
       resources:
       - url: mmf://models/m4c/m4c.textvqa.with_stvqa.tar.gz
         file_name: m4c.textvqa.with_stvqa.tar.gz
-        hashcode: 2dfd024cb144bb70843033d11dec064bc84937f22fd6561653ce1acd7610285e
+        hashcode: 0bf3b2d2fc4ed92994044404feb83869bf64c3bb8077e4472ae45927986fe51f
     ocr_ml:
       zoo_requirements: detectron.vmb_weights
-      version: 1.0_2020_04_29
+      version: 1.0_2020_07_02
       resources:
       - url: mmf://models/m4c/m4c.textvqa.ocr_ml.tar.gz
         file_name: m4c.textvqa.ocr_ml.tar.gz
-        hashcode: 4734dbf1816ffe378f08e69bd85b8cc0e4b2abe9564626505ff93e1ea7aea741
+        hashcode: a2ac0584bce7f12bd01e014584404eca511390c1e1637e60027a93b2ddc104ae
     alone:
       zoo_requirements: detectron.vmb_weights
-      version: 1.0_2020_04_29
+      version: 1.0_2020_07_02
       resources:
       - url: mmf://models/m4c/m4c.textvqa.alone.tar.gz
         file_name: m4c.textvqa.alone.tar.gz
-        hashcode: 84243f5d626a8211ea6a1ae409c72aaa24645ccd43883d4af4aa903d70d45f06
+        hashcode: 359468f25a88b3bf51f5ba15075c011eea0a7d7c76ab4bc547b66631b0cb9098
     defaults: ${m4c.textvqa.with_stvqa}
   stvqa:
     defaults:
 
@@ -18,5 +18,4 @@ def preprocess_sample_info(self, sample_info):
             feature_path = append + "/" + feature_path
 
         sample_info["feature_path"] = feature_path
-
         return sample_info
@@ -15,8 +15,23 @@ def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
         self.use_ocr_info = self.config.use_ocr_info
 
     def preprocess_sample_info(self, sample_info):
+        path = self._get_path_based_on_index(self.config, "annotations", self._index)
+        # NOTE, TODO: Code duplication w.r.t to STVQA, revisit
+        # during dataset refactor to support variable dataset classes
+        if "stvqa" in path:
+            feature_path = sample_info["feature_path"]
+            append = "train"
+
+            if self.dataset_type == "test":
+                append = "test_task3"
+
+            if not feature_path.startswith(append):
+                feature_path = append + "/" + feature_path
+
+            sample_info["feature_path"] = feature_path
+            return sample_info
         # COCO Annotation DBs have corrext feature_path
-        if "COCO" not in sample_info["feature_path"]:
+        elif "COCO" not in sample_info["feature_path"]:
             sample_info["feature_path"] = sample_info["image_path"].replace(
                 ".jpg", ".npy"
             )
@@ -82,7 +97,6 @@ def __getitem__(self, idx):
             current_sample.image_id = str(sample_info["image_id"])
         else:
             current_sample.image_id = sample_info["image_id"]
-
         if self._use_features is True:
             features = self.features_db[idx]
             current_sample.update(features)
@@ -104,7 +118,7 @@ def __getitem__(self, idx):
         return current_sample
 
     def add_sample_details(self, sample_info, sample):
-        sample.image_id = object_to_byte_tensor(sample_info["image_id"])
+        sample.image_id = object_to_byte_tensor(sample.image_id)
 
         # 1. Load text (question words)
         question_str = (
@@ -198,10 +212,8 @@ def add_sample_details(self, sample_info, sample):
         return sample
 
     def add_answer_info(self, sample_info, sample):
-        if "answers" not in sample_info:
-            return sample
         # Load real answers from sample_info
-        answers = sample_info.get("answers", None)
+        answers = sample_info.get("answers", [])
         answer_processor_arg = {"answers": answers}
 
         answer_processor_arg["tokens"] = sample.pop("ocr_tokens", [])
 
@@ -558,11 +558,7 @@ def __call__(self, item):
 
         if "answer_tokens" in item:
             tokens = item["answer_tokens"]
-        elif (
-            "answers" in item
-            and item["answers"] is not None
-            and len(item["answers"]) > 0
-        ):
+        elif "answers" in item and item["answers"] is not None:
             if self.preprocessor is None:
                 raise AssertionError(
                     "'preprocessor' must be defined if you "
 
@@ -22,7 +22,7 @@
 from maskrcnn_benchmark.utils.model_serialization import load_state_dict
 from PIL impo
9A78
rt Image
 
-from mmf.utils.general import download_file
+from mmf.utils.download import download
 
 
 class FeatureExtractor:
@@ -46,8 +46,8 @@ def _try_downloading_necessities(self):
             print("Downloading model and configuration")
             self.args.model_file = self.MODEL_URL.split("/")[-1]
             self.args.config_file = self.CONFIG_URL.split("/")[-1]
-            download_file(self.MODEL_URL)
-            download_file(self.CONFIG_URL)
+            download(self.MODEL_URL, ".", self.args.model_file)
+            download(self.CONFIG_URL, ".", self.args.config_file)
 
     def get_parser(self):
         parser = argparse.ArgumentParser()