8000 Update on "Add _foreach_add_(TensorList tensors, Scalar scalar) " · pytorch/pytorch@1a5bab5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1a5bab5

Browse files
author
Iurii Zdebskyi
committed
Update on "Add _foreach_add_(TensorList tensors, Scalar scalar) "
**Motivation** [GitHub issue](#38655) Current PyTorch optimizer implementations are not efficient in cases when we work with a lot of small feature tensors. Starting a lot of kernels slows down the whole process. We need to reduce the number of kernels that we start. As an example, we should be looking at [NVIDIAs Apex](https://github.com/NVIDIA/apex). In order to track progress, we will pick PyTorchs DCGAN model with Adam optimizer and once the optimizer is reimplemented with tensor lists, benchmark the model performance against original model version, Apexs version with original Adam optimizer and it’s FusedAdam optimizer. [First PR: Add private API to support tensor lists: _foreach_add(TensorList tensors, Scalar scalar)](#41554). **In this PR** - Adding a `std::vector<Tensor> _foreach_add_(TensorList tensors, Scalar scalar)` API - Resolving some additional comments from previous [PR](#41554). **Tests** Tested via unit tests **TODO** 1. Properly handle empty lists **Plan for the next PRs** 1. APIs - Binary Ops for list with Scalar - Binary Ops for list with list - Unary Ops for list 2. Rewrite PyTorch optimizers to use for-each operators in order to get performance gains. [ghstack-poisoned]
2 parents cf29686 + 27934ee commit 1a5bab5

File tree

104 files changed

+3271
-1215
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+3271
-1215
lines changed

.circleci/cimodel/data/pytorch_build_definitions.py

Lines changed: 73 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@
22
from dataclasses import dataclass, field
33
from typing import List, Optional
44

5-
from cimodel.data.pytorch_build_data import TopLevelNode, CONFIG_TREE_DATA
65
import cimodel.data.dimensions as dimensions
76
import cimodel.lib.conf_tree as conf_tree
87
import cimodel.lib.miniutils as miniutils
8+
from cimodel.data.pytorch_build_data import CONFIG_TREE_DATA, TopLevelNode
99
from cimodel.data.simple.util.branch_filters import gen_filter_dict
10-
from cimodel.data.simple.util.docker_constants import gen_docker_image_path
11-
10+
from cimodel.data.simple.util.docker_constants import gen_docker_image
1211

1312

1413
@dataclass
@@ -27,7 +26,7 @@ class Conf:
2726
restrict_phases: Optional[List[str]] = None
2827
gpu_resource: Optional[str] = None
2928
dependent_tests: List = field(default_factory=list)
30-
parent_build: Optional['Conf'] = None
29+
parent_build: Optional["Conf"] = None
3130
is_libtorch: bool = False
3231
is_important: bool = False
3332
parallel_backend: Optional[str] = None
@@ -64,18 +63,26 @@ def get_parms(self, for_docker):
6463
return result
6564

6665
def gen_docker_image_path(self):
67-
6866
parms_source = self.parent_build or self
6967
base_build_env_name = "-".join(parms_source.get_parms(True))
68+
image_name, _ = gen_docker_image(base_build_env_name)
69+
return miniutils.quote(image_name)
7070

71-
image_path = gen_docker_image_path(base_build_env_name)
72-
return miniutils.quote(image_path)
71+
def gen_docker_image_requires(self):
72+
parms_source = self.parent_build or self
73+
base_build_env_name = "-".join(parms_source.get_parms(True))
74+
_, requires = gen_docker_image(base_build_env_name)
75+
return miniutils.quote(requires)
7376

7477
def get_build_job_name_pieces(self, build_or_test):
7578
return self.get_parms(False) + [build_or_test]
7679

7780
def gen_build_name(self, build_or_test):
78-
return ("_".join(map(str, self.get_build_job_name_pieces(build_or_test)))).replace(".", "_").replace("-", "_")
81+
return (
82+
("_".join(map(str, self.get_build_job_name_pieces(build_or_test))))
83+
.replace(".", "_")
84+
.replace("-", "_")
85+
)
7986

8087
def get_dependents(self):
8188
return self.dependent_tests or []
@@ -116,12 +123,13 @@ def gen_workflow_job(self, phase):
116123
job_name = "pytorch_linux_test"
117124
else:
118125
job_name = "pytorch_linux_build"
126+
job_def["requires"] = [self.gen_docker_image_requires()]
119127

120128
if not self.is_important:
121129
job_def["filters"] = gen_filter_dict()
122130
job_def.update(self.gen_workflow_params(phase))
123131

124-
return {job_name : job_def}
132+
return {job_name: job_def}
125133

126134

127135
# TODO This is a hack to special case some configs just for the workflow list
@@ -131,20 +139,26 @@ def __init__(self, name, parent_build=None):
131139
self.parent_build = parent_build
132140

133141
def gen_workflow_job(self, phase):
134-
return {self.gen_build_name(phase): {"requires": [self.parent_build.gen_build_name("build")]}}
142+
return {
143+
self.gen_build_name(phase): {
144+
"requires": [self.parent_build.gen_build_name("build")]
145+
}
146+
}
135147

136148
def gen_build_name(self, _):
137149
return self.name
138150

139151
class DocPushConf(object):
140-
def __init__(self, name, parent_build=None):
152+
def __init__(self, name, parent_build=None, branch="master"):
141153
self.name = name
142154
self.parent_build = parent_build
155+
self.branch = branch
143156

144157
def gen_workflow_job(self, phase):
145158
return {
146159
"pytorch_doc_push": {
147160
"name": self.name,
161+
"branch": self.branch,
148162
"requires": [self.parent_build],
149163
"context": "org-member",
150164
"filters": gen_filter_dict(branches_list=["nightly"])
@@ -183,12 +197,40 @@ def gen_dependent_configs(xenial_parent_config):
183197
def gen_docs_configs(xenial_parent_config):
184198
configs = []
185199

186-
for x in ["pytorch_python_doc_build", "pytorch_cpp_doc_build"]:
187-
conf = HiddenConf(x, parent_build=xenial_parent_config)
188-
configs.append(conf)
189-
configs.append(DocPushConf(x.replace("build", "push"), x))
200+
configs.append(
201+
HiddenConf(
202+
"pytorch_python_doc_build",
203+
parent_build=xenial_parent_config
204+
)
205+
)
206+
configs.append(
207+
DocPushConf(
208+
"pytorch_python_doc_push",
209+
parent_build="pytorch_python_doc_build",
210+
branch="site",
211+
)
212+
)
190213

191-
configs.append(HiddenConf("pytorch_doc_test", parent_build=xenial_parent_config))
214+
configs.append(
215+
HiddenConf(
216+
"pytorch_cpp_doc_build",
217+
parent_build=xenial_parent_config
218+
)
219+
)
220+
configs.append(
221+
DocPushConf(
222+
"pytorch_cpp_doc_push",
223+
parent_build="pytorch_cpp_doc_build",
224+
branch="master",
225+
)
226+
)
227+
228+
configs.append(
229+
HiddenConf(
230+
"pytorch_doc_test",
231+
parent_build=xenial_parent_config
232+
)
233+
)
192234
return configs
193235

194236

@@ -251,7 +293,7 @@ def instantiate_configs():
251293
parms_list.append(gcc_version)
252294

253295
# TODO: This is a nasty special case
254-
if gcc_version == 'clang5' and not is_xla:
296+
if gcc_version == "clang5" and not is_xla:
255297
parms_list.append("asan")
256298
python_version = fc.find_prop("pyver")
257299
parms_list[0] = fc.find_prop("abbreviated_pyver")
@@ -290,20 +332,25 @@ def instantiate_configs():
290332

291333
# run docs builds on "pytorch-linux-xenial-py3.6-gcc5.4". Docs builds
292334
# should run on a CPU-only build that runs on all PRs.
293-
if distro_name == 'xenial' and fc.find_prop("pyver") == '3.6' \
294-
and cuda_version is None \
295-
and parallel_backend is None \
296-
and compiler_name == 'gcc' \
297-
and fc.find_prop('compiler_version') == '5.4':
335+
if (
336+
distro_name == "xenial"
337+
and fc.find_prop("pyver") == "3.6"
338+
and cuda_version is None
339+
and parallel_backend is None
340+
and compiler_name == "gcc"
341+
and fc.find_prop("compiler_version") == "5.4"
342+
):
298343
c.dependent_tests = gen_docs_configs(c)
299344

300345
if cuda_version == "10.1" and python_version == "3.6" and not is_libtorch:
301346
c.dependent_tests = gen_dependent_configs(c)
302347

303-
if (compiler_name == "gcc"
304-
and compiler_version == "5.4"
305-
and not is_libtorch
306-
and parallel_backend is None):
348+
if (
349+
compiler_name == "gcc"
350+
and compiler_version == "5.4"
351+
and not is_libtorch
352+
and parallel_backend is None
353+
):
307354
bc_breaking_check = Conf(
308355
"backward-compatibility-check",
309356
[],

.circleci/cimodel/data/simple/android_definitions.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import cimodel.data.simple.util.branch_filters as branch_filters
2-
from cimodel.data.simple.util.docker_constants import DOCKER_IMAGE_NDK
2+
from cimodel.data.simple.util.docker_constants import (
3+
DOCKER_IMAGE_NDK, DOCKER_REQUIREMENT_NDK
4+
)
35

46

57
class AndroidJob:
@@ -34,6 +36,7 @@ def gen_tree(self):
3436
"name": full_job_name,
3537
"build_environment": "\"{}\"".format(build_env_name),
3638
"docker_image": "\"{}\"".format(DOCKER_IMAGE_NDK),
39+
"requires": [DOCKER_REQUIREMENT_NDK]
3740
}
3841

3942
if self.is_master_only:

.circleci/cimodel/data/simple/bazel_definitions.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from cimodel.data.simple.util.docker_constants import DOCKER_IMAGE_GCC7
1+
from cimodel.data.simple.util.docker_constants import (
2+
DOCKER_IMAGE_GCC7,
3+
DOCKER_REQUIREMENT_GCC7
4+
)
25

36

47
def gen_job_name(phase):
@@ -38,7 +41,10 @@ def gen_tree(self):
3841
full_job_name = gen_job_name(self.phase)
3942
build_env_name = "-".join(build_env_parts)
4043

41-
extra_requires = [gen_job_name("build")] if self.phase == "test" else []
44+
extra_requires = (
45+
[gen_job_name("build")] if self.phase == "test" else
46+
[DOCKER_REQUIREMENT_GCC7]
47+
)
4248

4349
props_dict = {
4450
"build_environment": build_env_name,

.circleci/cimodel/data/simple/mobile_definitions.py

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,23 @@
44

55
import cimodel.lib.miniutils as miniutils
66
import cimodel.data.simple.util.branch_filters
7-
from cimodel.data.simple.util.docker_constants import DOCKER_IMAGE_ASAN, DOCKER_IMAGE_NDK
7+
from cimodel.data.simple.util.docker_constants import (
8+
DOCKER_IMAGE_ASAN,
9+
DOCKER_REQUIREMENT_ASAN,
10+
DOCKER_IMAGE_NDK,
11+
DOCKER_REQUIREMENT_NDK
12+
)
813

914

1015
class MobileJob:
11-
def __init__(self, docker_image, variant_parts, is_master_only=False):
16+
def __init__(
17+
self,
18+
docker_image,
19+
docker_requires,
20+
variant_parts,
21+
is_master_only=False):
1222
self.docker_image = docker_image
23+
self.docker_requires = docker_requires
1324
self.variant_parts = variant_parts
1425
self.is_master_only = is_master_only
1526

@@ -30,6 +41,7 @@ def gen_tree(self):
3041
"build_environment": build_env_name,
3142
"build_only": miniutils.quote(str(int(True))),
3243
"docker_image": self.docker_image,
44+
"requires": self.docker_requires,
3345
"name": full_job_name,
3446
}
3547

@@ -40,14 +52,32 @@ def gen_tree(self):
4052

4153

4254
WORKFLOW_DATA = [
43-
MobileJob(DOCKER_IMAGE_ASAN, ["build"]),
44-
MobileJob(DOCKER_IMAGE_ASAN, ["custom", "build", "static"]),
55+
MobileJob(
56+
DOCKER_IMAGE_ASAN,
57+
[DOCKER_REQUIREMENT_ASAN],
58+
["build"]
59+
),
60+
MobileJob(
61+
DOCKER_IMAGE_ASAN,
62+
[DOCKER_REQUIREMENT_ASAN],
63+
["custom", "build", "static"]
64+
),
4565

4666
# Use LLVM-DEV toolchain in android-ndk-r19c docker image
47-
MobileJob(DOCKER_IMAGE_NDK, ["custom", "build", "dynamic"]),
67+
MobileJob(
68+
DOCKER_IMAGE_NDK,
69+
[DOCKER_REQUIREMENT_NDK],
70+
["custom", "build", "dynamic"]
71+
),
4872

4973
# Use LLVM-DEV toolchain in android-ndk-r19c docker image
50-
MobileJob(DOCKER_IMAGE_NDK, ["code", "analysis"]),
74+
# Most of this CI is already covered by "mobile-custom-build-dynamic" job
75+
MobileJob(
76+
DOCKER_IMAGE_NDK,
77+
[DOCKER_REQUIREMENT_NDK],
78+
["code", "analysis"],
79+
True
80+
),
5181
]
5282

5383

.circleci/cimodel/data/simple/nightly_android.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from cimodel.data.simple.util.docker_constants import DOCKER_IMAGE_NDK
1+
from cimodel.data.simple.util.docker_constants import (
2+
DOCKER_IMAGE_NDK,
3+
DOCKER_REQUIREMENT_NDK
4+
)
25

36

47
class AndroidNightlyJob:
@@ -48,12 +51,13 @@ def gen_tree(self):
4851

4952
return [{self.template_name: props_dict}]
5053

54+
BASE_REQUIRES = [DOCKER_REQUIREMENT_NDK]
5155

5256
WORKFLOW_DATA = [
53-
AndroidNightlyJob(["x86_32"], "pytorch_linux_build"),
54-
AndroidNightlyJob(["x86_64"], "pytorch_linux_build"),
55-
AndroidNightlyJob(["arm", "v7a"], "pytorch_linux_build"),
56-
AndroidNightlyJob(["arm", "v8a"], "pytorch_linux_build"),
57+
AndroidNightlyJob(["x86_32"], "pytorch_linux_build", requires=BASE_REQUIRES),
58+
AndroidNightlyJob(["x86_64"], "pytorch_linux_build", requires=BASE_REQUIRES),
59+
AndroidNightlyJob(["arm", "v7a"], "pytorch_linux_build", requires=BASE_REQUIRES),
60+
AndroidNightlyJob(["arm", "v8a"], "pytorch_linux_build", requires=BASE_REQUIRES),
5761
AndroidNightlyJob(["android_gradle"], "pytorch_android_gradle_build",
5862
with_docker=False,
5963
requires=[
Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,33 @@
11
AWS_DOCKER_HOST = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
22

3-
# ARE YOU EDITING THIS NUMBER? MAKE SURE YOU READ THE GUIDANCE AT THE
4-
# TOP OF .circleci/config.yml
5-
DOCKER_IMAGE_TAG = "ab1632df-fa59-40e6-8c23-98e004f61148"
3+
def gen_docker_image(container_type):
4+
return (
5+
"/".join([AWS_DOCKER_HOST, "pytorch", container_type]),
6+
f"docker-{container_type}",
7+
)
68

9+
def gen_docker_image_requires(image_name):
10+
return [f"docker-{image_name}"]
711

8-
def gen_docker_image_path(container_type, container_tag=DOCKER_IMAGE_TAG):
9-
return "/".join([
10-
AWS_DOCKER_HOST,
11-
"pytorch",
12-
container_type + ":" + container_tag,
13-
])
1412

13+
DOCKER_IMAGE_BASIC, DOCKER_REQUIREMENT_BASE = gen_docker_image(
14+
"pytorch-linux-xenial-py3.6-gcc5.4"
15+
)
1516

16-
DOCKER_IMAGE_BASIC = gen_docker_image_path("pytorch-linux-xenial-py3.6-gcc5.4")
17+
DOCKER_IMAGE_CUDA_10_2, DOCKER_REQUIREMENT_CUDA_10_2 = gen_docker_image(
18+
"pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
19+
)
1720

18-
DOCKER_IMAGE_CUDA_10_2 = gen_docker_image_path("pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7")
21+
DOCKER_IMAGE_GCC7, DOCKER_REQUIREMENT_GCC7 = gen_docker_image(
22+
"pytorch-linux-xenial-py3.6-gcc7"
23+
)
1924

20-
DOCKER_IMAGE_GCC7 = gen_docker_image_path("pytorch-linux-xenial-py3.6-gcc7")
2125

22-
def gen_mobile_docker_name(specifier):
26+
def gen_mobile_docker(specifier):
2327
container_type = "pytorch-linux-xenial-py3-clang5-" + specifier
24-
return gen_docker_image_path(container_type)
28+
return gen_docker_image(container_type)
2529

2630

27-
DOCKER_IMAGE_ASAN = gen_mobile_docker_name("asan")
31+
DOCKER_IMAGE_ASAN, DOCKER_REQUIREMENT_ASAN = gen_mobile_docker("asan")
2832

29-
DOCKER_IMAGE_NDK = gen_mobile_docker_name("android-ndk-r19c")
33+
DOCKER_IMAGE_NDK, DOCKER_REQUIREMENT_NDK = gen_mobile_docker("android-ndk-r19c")

0 commit comments

Comments
 (0)
0