8000 [feat,fix] New textvqa features, retrained model, some fixes (#375) · facebookresearch/mmf@47da75a · GitHub
[go: up one dir, main page]

Skip to content

Commit 47da75a

Browse files
apsdehalfacebook-github-bot
authored andcommitted
[feat,fix] New textvqa features, retrained model, some fixes (#375)
Summary: - Adds new features for TextVQA based on maskrcnn benchmark script which now also contains extra info as required to fix #328 - Adds retrained model using these new features on VMB, on test-std this model gets 41.16% higher than the previous model - Fixes the issue with the answer processor and in general predictions on TextVQA for multiple models (LoRRA and M4C) - Fixes joint_stvqa script to properly load features for STVQA. The fix is hacky as of now and requires some refactoring in datasets to properly support dataset class attributes - Fixes the broken extract features scripts to use latest code and functions as well downlaod functionalities Pull Request resolved: #375 Test Plan: Tested extensively on TextVQA with LoRRA as well as M4C Reviewed By: ronghanghu Differential Revision: D22321533 Pulled By: apsdehal fbshipit-source-id: fa97a7d56a333c6e1b6ffb1103e69cbaf7b1d783
1 parent b7287eb commit 47da75a

File tree

6 files changed

+37
-23
lines changed

6 files changed

+37
-23
lines changed

mmf/configs/zoo/datasets.yaml

+9-2
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,12 @@ stvqa:
8181

8282
textvqa:
8383
defaults:
84-
version: 0.5.5_2020_04_28
84+
version: 0.5.5_2020_06_30
8585
resources:
8686
features:
8787
- url: mmf://datasets/textvqa/defaults/features/features.tar.gz
8888
file_name: features.tar.gz
89-
hashcode: f78f6a8e3c7c9c40c8034303457ebcbd5f2bf999cf8aeeba0f6b6f5403dd7bcf
89+
hashcode: 960f69c651f5c6532b4e9d8310597aa8d21c65074768c9702ea822e4b2867a0d
9090
annotations:
9191
- url: mmf://datasets/textvqa/defaults/annotations/annotations.tar.gz
9292
file_name: annotations.tar.gz
@@ -95,6 +95,13 @@ textvqa:
9595
- url: mmf://datasets/textvqa/defaults/extras.tar.gz
9696
file_name: extras.tar.gz
9797
hashcode: 68dc3b4aeffe4dce24ebb5d373baf0c304fbfbec73f0c1550118a85e90286e38
98+
caffe2:
99+
version: 0.5.5_2020_04_28
100+
resources:
101+
features:
102+
- url: mmf://datasets/textvqa/defaults/features/features.tar.gz
103+
file_name: features.tar.gz
104+
hashcode: f78f6a8e3c7c9c40c8034303457ebcbd5f2bf999cf8aeeba0f6b6f5403dd7bcf
98105
ocr_en:
99106
version: 0.5.5_2020_04_28
100107
resources:

mmf/configs/zoo/models.yaml

+6-6
Original file line numberDiff line numberDiff line change
@@ -204,25 +204,25 @@ m4c:
204204
textvqa:
205205
with_stvqa:
206206
zoo_requirements: detectron.vmb_weights
207-
version: 1.0_2020_04_29
207+
version: 1.0_2020_06_30
208208
resources:
209209
- url: mmf://models/m4c/m4c.textvqa.with_stvqa.tar.gz
210210
file_name: m4c.textvqa.with_stvqa.tar.gz
211-
hashcode: 2dfd024cb144bb70843033d11dec064bc84937f22fd6561653ce1acd7610285e
211+
hashcode: 0bf3b2d2fc4ed92994044404feb83869bf64c3bb8077e4472ae45927986fe51f
212212
ocr_ml:
213213
zoo_requirements: detectron.vmb_weights
214-
version: 1.0_2020_04_29
214+
version: 1.0_2020_07_02
215215
resources:
216216
- url: mmf://models/m4c/m4c.textvqa.ocr_ml.tar.gz
217217
file_name: m4c.textvqa.ocr_ml.tar.gz
218-
hashcode: 4734dbf1816ffe378f08e69bd85b8cc0e4b2abe9564626505ff93e1ea7aea741
218+
hashcode: a2ac0584bce7f12bd01e014584404eca511390c1e1637e60027a93b2ddc104ae
219219
alone:
220220
zoo_requirements: detectron.vmb_weights
221-
version: 1.0_2020_04_29
221+
version: 1.0_2020_07_02
222222
resources:
223223
- url: mmf://models/m4c/m4c.textvqa.alone.tar.gz
224224
file_name: m4c.textvqa.alone.tar.gz
225-
hashcode: 84243f5d626a8211ea6a1ae409c72aaa24645ccd43883d4af4aa903d70d45f06
225+
hashcode: 359468f25a88b3bf51f5ba15075c011eea0a7d7c76ab4bc547b66631b0cb9098
226226
defaults: ${m4c.textvqa.with_stvqa}
227227
stvqa:
228228
defaults:

mmf/datasets/builders/stvqa/dataset.py

-1
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,4 @@ def preprocess_sample_info(self, sample_info):
1818
feature_path = append + "/" + feature_path
1919

2020
sample_info["feature_path"] = feature_path
21-
2221
return sample_info

mmf/datasets/builders/textvqa/dataset.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,23 @@ def __init__(self, config, dataset_type, imdb_file_index, *args, **kwargs):
1515
self.use_ocr_info = self.config.use_ocr_info
1616

1717
def preprocess_sample_info(self, sample_info):
18+
path = self._get_path_based_on_index(self.config, "annotations", self._index)
19+
# NOTE, TODO: Code duplication w.r.t to STVQA, revisit
20+
# during dataset refactor to support variable dataset classes
21+
if "stvqa" in path:
22+
feature_path = sample_info["feature_path"]
23+
append = "train"
24+
25+
if self.dataset_type == "test":
26+
append = "test_task3"
27+
28+
if not feature_path.startswith(append):
29+
feature_path = append + "/" + feature_path
30+
31+
sample_info["feature_path"] = feature_path
32+
return sample_info
1833
# COCO Annotation DBs have corrext feature_path
19-
if "COCO" not in sample_info["feature_path"]:
34+
elif "COCO" not in sample_info["feature_path"]:
2035
sample_info["feature_path"] = sample_info["image_path"].replace(
2136
".jpg", ".npy"
2237
)
@@ -82,7 +97,6 @@ def __getitem__(self, idx):
8297
current_sample.image_id = str(sample_info["image_id"])
8398
else:
8499
current_sample.image_id = sample_info["image_id"]
85-
86100
if self._use_features is True:
87101
features = self.features_db[idx]
88102
current_sample.update(features)
@@ -104,7 +118,7 @@ def __getitem__(self, idx):
104118
return current_sample
105119

106120
def add_sample_details(self, sample_info, sample):
107-
sample.image_id = object_to_byte_tensor(sample_info["image_id"])
121+
sample.image_id = object_to_byte_tensor(sample.image_id)
108122

109123
# 1. Load text (question words)
110124
question_str = (
@@ -198,10 +212,8 @@ def add_sample_details(self, sample_info, sample):
198212
return sample
199213

200214
def add_answer_info(self, sample_info, sample):
201-
if "answers" not in sample_info:
202-
return sample
203215
# Load real answers from sample_info
204-
answers = sample_info.get("answers", None)
216+
answers = sample_info.get("answers", [])
205217
answer_processor_arg = {"answers": answers}
206218

207219
answer_processor_arg["tokens"] = sample.pop("ocr_tokens", [])

mmf/datasets/processors/processors.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -558,11 +558,7 @@ def __call__(self, item):
558558

559559
if "answer_tokens" in item:
560560
tokens = item["answer_tokens"]
561-
elif (
562-
"answers" in item
563-
and item["answers"] is not None
564-
and len(item["answers"]) > 0
565-
):
561+
elif "answers" in item and item["answers"] is not None:
566562
if self.preprocessor is None:
567563
raise AssertionError(
568564
"'preprocessor' must be defined if you "

tools/scripts/features/extract_features_vmb.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from maskrcnn_benchmark.utils.model_serialization import load_state_dict
2323
from PIL impo 9A78 rt Image
2424

25-
from mmf.utils.general import download_file
25+
from mmf.utils.download import download
2626

2727

2828
class FeatureExtractor:
@@ -46,8 +46,8 @@ def _try_downloading_necessities(self):
4646
print("Downloading model and configuration")
4747
self.args.model_file = self.MODEL_URL.split("/")[-1]
4848
self.args.config_file = self.CONFIG_URL.split("/")[-1]
49-
download_file(self.MODEL_URL)
50-
download_file(self.CONFIG_URL)
49+
download(self.MODEL_URL, ".", self.args.model_file)
50+
download(self.CONFIG_URL, ".", self.args.config_file)
5151

5252
def get_parser(self):
5353
parser = argparse.ArgumentParser()

0 commit comments

Comments
 (0)
0