8000 Update on "[DataPipe] Enforcing single valid iterator for IterDataPip… · pytorch/pytorch@db9862f · GitHub
[go: up one dir, main page]

Skip to content

Commit db9862f

Browse files
committed
Update on "[DataPipe] Enforcing single valid iterator for IterDataPipes with single DataPipe as output"
This PR introduces the single iterator constraint for `IterDataPipe`, focusing on `IterDataPipe` that produces one single `IterDataPipe` as output. This constraint is necessary because having multiple iterators referencing the same `IterDataPipe` can lead to incoherent internal state (e.g. `shuffler` may not produce sensible outputs), such that we cannot save a snapshot of the DataPipe or have determinism mechanisms. Fixes part of pytorch/data#45 The expected behavior of single iterator per DataPipe is described in details within the linked issue. Please review the changes in `torch/utils/data/datapipes/_typing.py` to see if the implementation is sensible. The new test `TestIterDataPipeSingletonConstraint` in `test_datapipe.py` illustrates the behaviors that the constraint should impose. Please comment there if any behavior seems unexpected/unclear. Differential Revision: [D33344609](https://our.internmc.facebook.com/intern/diff/D33344609) [ghstack-poisoned]
2 parents 9824b8a + 9252407 commit db9862f

File tree

106 files changed

+5495
-871
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+5495
-871
lines changed

.buckconfig.oss

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[buildfile]
2+
name = BUILD.buck
3+
4+
[repositories]
5+
bazel_skylib = third_party/bazel-skylib/
6+
7+
[download]
8+
in_build = true
9+
10+
[cxx]
11+
cxxflags = -std=c++17
12+
should_remap_host_platform = true
13+
14+
[project]
15+
default_flavors_mode=all

.circleci/docker/common/install_conda.sh

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,6 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
100100
# Install some other packages, including those needed for Python test reporting
101101
pip_install -r /opt/conda/requirements-ci.txt
102102

103-
# Install numba only on python-3.8 or below
104-
# For numba issue see https://github.com/pytorch/pytorch/issues/51511
105-
if [[ $(python -c "import sys; print(int(sys.version_info < (3, 9)))") == "1" ]]; then
106-
pip_install numba==0.54.1
107-
else
108-
pip_install numba==0.49.0
109-
fi
110-
111103
# Update scikit-learn to a python-3.8 compatible version
112104
if [[ $(python -c "import sys; print(int(sys.version_info >= (3, 8)))") == "1" ]]; then
113105
pip_install -U scikit-learn

.circleci/docker/common/install_user.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ set -ex
66
# jenkins user as ec2-user should have the same user-id
77
echo "jenkins:x:1000:1000::/var/lib/jenkins:" >> /etc/passwd
88
echo "jenkins:x:1000:" >> /etc/group
9+
# Needed on focal or newer
10+
echo "jenkins:*:19110:0:99999:7:::" >>/etc/shadow
911

1012
# Create $HOME
1113
mkdir -p /var/lib/jenkins
@@ -19,3 +21,6 @@ chown jenkins:jenkins /usr/local
1921
# Allow sudo
2022
# TODO: Maybe we shouldn't
2123
echo 'jenkins ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/jenkins
24+
25+
# Test that sudo works
26+
sudo -u jenkins sudo -v

.circleci/docker/requirements-ci.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,12 @@ mypy==0.812
9898
#Pinned versions: 1.10.0.post1
9999
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
100100

101-
#numba
101+
numba==0.49.0 ; python_version < "3.9"
102+
numba==0.54.1 ; python_version == "3.9"
102103
#Description: Just-In-Time Compiler for Numerical Functions
103104
#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
104105
#test that import: test_numba_integration.py
106+
#For numba issue see https://github.com/pytorch/pytorch/issues/51511
105107

106108
#numpy
107109
#Description: Provides N-dimensional arrays and linear algebra

.github/workflows/buck_build_test.yml

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,54 @@ jobs:
6363
wget https://github.com/facebook/buck/releases/download/v2021.01.12.01/buck.2021.01.12.01_all.deb
6464
sudo apt install ./buck.2021.01.12.01_all.deb
6565
66-
- name: Build Buck target
66+
- name: Download third party libraries and generate wrappers
6767
run: |
68-
echo -e "[buildfile]\n name = BUILD.buck\n" > .buckconfig
69-
buck build --keep-going //third_party:
68+
sh scripts/buck_setup.sh
69+
70+
- name: Build glog
71+
run: |
72+
buck build third_party:glog
73+
74+
- name: Build C10
75+
run: |
76+
buck build c10:c10
77+
78+
- name: Build cpuinfo
79+
run: |
80+
buck build third_party:cpuinfo
81+
82+
- name: Build pthreadpool
83+
run: |
84+
buck build third_party:pthreadpool
85+
86+
- name: Build XNNPACK
87+
run: |
88+
buck build third_party:XNNPACK
89+
90+
- name: Build QNNPACK
91+
run: |
92+
buck build aten/src/ATen/native/quantized/cpu/qnnpack/... --keep-going
93+
94+
- name: Build aten_cpu
95+
run: |
96+
buck build :aten_cpu
97+
98+
- name: Build torch_mobile_core
99+
run: |
100+
buck build :torch_mobile_core
101+
102+
- name: Build torch_mobile_all_ops
103+
run: |
104+
buck build :torch_mobile_all_ops
105+
106+
- name: Build mobile benchmark
107+
run: |
108+
buck build :ptmobile_benchmark
109+
110+
- name: Run lite interpreter model
111+
run: |
112+
buck run :ptmobile_benchmark -- --model=ios/TestApp/models/mobilenet_v2.ptl --input_dims=1,3,224,224 --input_type=float
113+
114+
- name: Build everything
115+
run: |
116+
buck build //... --keep-going

.github/workflows/docker-builds.yml

Lines changed: 1 addition & 0 deletions
10000
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ jobs:
3838
- docker-image-name: pytorch-linux-xenial-py3-clang7-onnx
3939
- docker-image-name: pytorch-linux-xenial-py3.7-gcc5.4
4040
- docker-image-name: pytorch-linux-xenial-py3.7-gcc7
41+
- docker-image-name: pytorch-linux-focal-py3.7-gcc7
4142
env:
4243
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/${{ matrix.docker-image-name }}
4344
steps:

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,3 +325,7 @@ pr.diff
325325
.lsp-buck-out/
326326
.lsp.buckd/
327327
buck-out/
328+
329+
# Downloaded libraries
330+
third_party/ruy/
331+
third_party/glog/

.lintrunner.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ include_patterns = [
144144
'torch/utils/benchmark/utils/timer.py',
145145
'torch/utils/benchmark/utils/valgrind_wrapper/**/*.py',
146146
]
147+
exclude_patterns = [
148+
# (linbinyu) copied from internal repo
149+
'tools/code_analyzer/gen_operators_yaml.py',
150+
]
147151
command = [
148152
'python3',
149153
'tools/linter/adapters/mypy_linter.py',

BUILD.bazel

Lines changed: 9 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ load("//:tools/build_variables.bzl", "jit_core_sources", "libtorch_core_sources"
88
load("//tools/rules:cu.bzl", "cu_library")
99
load("//tools/config:defs.bzl", "if_cuda")
1010
load("//:aten.bzl", "intern_build_aten_ops", "generate_aten", "aten_ufunc_generated_cpu_sources", "aten_ufunc_generated_cpu_kernel_sources", "aten_ufunc_generated_cuda_sources")
11-
load(":build.bzl", "define_targets")
11+
load(":build.bzl", "define_targets", "GENERATED_AUTOGRAD_CPP", "GENERATED_AUTOGRAD_PYTHON")
1212

1313
define_targets(rules = rules)
1414

@@ -102,62 +102,10 @@ generate_aten(
102102
generator = "//torchgen:gen",
103103
)
104104

105-
libtorch_cpp_generated_sources = [
106-
"torch/csrc/autograd/generated/VariableType.h",
107-
"torch/csrc/autograd/generated/VariableType_0.cpp",
108-
"torch/csrc/autograd/generated/VariableType_1.cpp",
109-
"torch/csrc/autograd/generated/VariableType_2.cpp",
110-
"torch/csrc/autograd/generated/VariableType_3.cpp",
111-
"torch/csrc/autograd/generated/VariableType_4.cpp",
112-
# "torch/csrc/autograd/generated/VariableTypeEverything.cpp",
113-
"torch/csrc/autograd/generated/TraceType_0.cpp",
114-
"torch/csrc/autograd/generated/TraceType_1.cpp",
115-
"torch/csrc/autograd/generated/TraceType_2.cpp",
116-
"torch/csrc/autograd/generated/TraceType_3.cpp",
117-
"torch/csrc/autograd/generated/TraceType_4.cpp",
118-
# "torch/csrc/autograd/generated/TraceTypeEverything.cpp",
119-
"torch/csrc/autograd/generated/ADInplaceOrViewType_0.cpp",
120-
"torch/csrc/autograd/generated/ADInplaceOrViewType_1.cpp",
121-
# "torch/csrc/autograd/generated/ADInplaceOrViewTypeEverything.cpp",
122-
"torch/csrc/autograd/generated/Functions.h",
123-
"torch/csrc/autograd/generated/Functions.cpp",
124-
"torch/csrc/autograd/generated/variable_factories.h",
125-
"torch/csrc/lazy/generated/LazyIr.h",
126-
"torch/csrc/lazy/generated/LazyNativeFunctions.h",
127-
"torch/csrc/lazy/generated/LazyNativeFunctions.cpp",
128-
"torch/csrc/lazy/generated/RegisterAutogradLazy.cpp",
129-
"torch/csrc/lazy/generated/RegisterLazy.cpp",
130-
]
131-
132-
libtorch_python_generated_sources = [
133-
"torch/csrc/autograd/generated/python_functions.h",
134-
"torch/csrc/autograd/generated/python_functions_0.cpp",
135-
"torch/csrc/autograd/generated/python_functions_1.cpp",
136-
"torch/csrc/autograd/generated/python_functions_2.cpp",
137-
"torch/csrc/autograd/generated/python_functions_3.cpp",
138-
"torch/csrc/autograd/generated/python_functions_4.cpp",
139-
"torch/csrc/autograd/generated/python_variable_methods.cpp",
140-
"torch/csrc/autograd/generated/python_torch_functions_0.cpp",
141-
"torch/csrc/autograd/generated/python_torch_functions_1.cpp",
142-
"torch/csrc/autograd/generated/python_torch_functions_2.cpp",
143-
"torch/csrc/autograd/generated/python_nn_functions.cpp",
144-
"torch/csrc/autograd/generated/python_fft_functions.cpp",
145-
"torch/csrc/autograd/generated/python_linalg_functions.cpp",
146-
"torch/csrc/autograd/generated/python_sparse_functions.cpp",
147-
"torch/csrc/autograd/generated/python_special_functions.cpp",
148-
"torch/csrc/autograd/generated/python_return_types.cpp",
149-
]
150-
151105
filegroup(
152106
name = "cpp_generated_code",
153107
data = [":generate-code"],
154-
srcs = libtorch_cpp_generated_sources,
155-
)
156-
157-
filegroup(
158-
name = "python_generated_code",
159-
data = [":generate-code"],
160-
srcs = libtorch_python_generated_sources,
108+
srcs = GENERATED_AUTOGRAD_CPP,
161109
)
162110

163111
exports_files(
@@ -1647,7 +1595,7 @@ cc_library(
16471595
"torch/csrc/autograd/generated/variable_factories.h",
16481596
"torch/csrc/autograd/generated/Functions.h",
16491597
] + torch_cuda_headers,
1650-
) + [":cpp_generated_code", ":version_h"],
1598+
) + GENERATED_AUTOGRAD_CPP + [":version_h"],
16511599
includes = [
16521600
"torch/csrc",
16531601
"torch/csrc/api/include",
@@ -1692,8 +1640,7 @@ cc_library(
16921640
"torch/csrc/cuda/nccl.cpp",
16931641
"torch/csrc/distributed/c10d/quantization/quantization_gpu.cu",
16941642
],
1695-
)) + libtorch_core_sources + libtorch_distributed_sources + torch_cpp_srcs + libtorch_extra_sources + jit_core_sources + lazy_tensor_ts_sources +[
1696-
":cpp_generated_code",
1643+
)) + libtorch_core_sources + libtorch_distributed_sources + torch_cpp_srcs + libtorch_extra_sources + jit_core_sources + lazy_tensor_ts_sources + GENERATED_AUTOGRAD_CPP + [
16971644
"torch/csrc/jit/serialization/flatbuffer_serializer.cpp",
16981645
"torch/csrc/jit/mobile/flatbuffer_loader.cpp"
16991646
],
@@ -1726,7 +1673,10 @@ cc_library(
17261673
"**/*.h",
17271674
"**/*.cuh",
17281675
]) + [
1729-
":cpp_generated_code",
1676+
# We need the filegroup here because the raw list causes Bazel
1677+
# to see duplicate files. It knows how to deduplicate with the
1678+
# filegroup.
1679+
":cpp_generated_code"
17301680
],
17311681
includes = [
17321682
"torch/csrc/api/include",
@@ -1742,7 +1692,7 @@ cc_library(
17421692

17431693
cc_library(
17441694
name = "torch_python",
1745-
srcs = libtorch_python_core_sources + [":python_generated_code"],
1695+
srcs = libtorch_python_core_sources + GENERATED_AUTOGRAD_PYTHON,
17461696
deps = [
17471697
":torch",
17481698
":shm",

0 commit comments

Comments
 (0)
0