8000 Update on "[DataPipe] Basic snapshotting with IterableWrapper" · pytorch/pytorch@8f9f541 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8f9f541

Browse files
committed
Update on "[DataPipe] Basic snapshotting with IterableWrapper"
This PR changes: 1. Restricting the DataPipe to only have one iterator at a time 2. Adding the method `reset` to allow users to reset the iterator from that DataPipe 3. Adding features and tests related to serialization and snapshotting cc @VitalyFedyunin ejguan @NivekT [ghstack-poisoned]
2 parents 2f5d188 + 3044652 commit 8f9f541

File tree

152 files changed

+8021
-38753
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

152 files changed

+8021
-38753
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Test pytorch binary
2+
3+
description: Pulls the docker image and tests the pytorch binary using it. All env variable referenced in the "Test PyTorch binary" step must be set in the GITHUB_ENV file
4+
5+
runs:
6+
using: composite
7+
steps:
8+
- name: Test PyTorch binary
9+
shell: bash
10+
run: |
11+
set -x
12+
# shellcheck disable=SC2086,SC2090
13+
container_name=$(docker run \
14+
${GPU_FLAG:-} \
15+
-e BINARY_ENV_FILE \
16+
-e BUILDER_ROOT \
17+
-e BUILD_ENVIRONMENT \
18+
-e BUILD_SPLIT_CUDA \
19+
-e DESIRED_CUDA \
20+
-e DESIRED_DEVTOOLSET \
21+
-e DESIRED_PYTHON \
22+
-e GITHUB_ACTIONS \
23+
-e GPU_ARCH_TYPE \
24+
-e GPU_ARCH_VERSION \
25+
-e LIBTORCH_VARIANT \
26+
-e PACKAGE_TYPE \
27+
-e PYTORCH_FINAL_PACKAGE_DIR \
28+
-e PYTORCH_ROOT \
29+
-e SKIP_ALL_TESTS \
30+
--tty \
31+
--detach \
32+
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
33+
-v "${GITHUB_WORKSPACE}/builder:/builder" \
34+
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
35+
-w / \
36+
"${DOCKER_IMAGE}"
37+
)
38+
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
39+
# Generate test script
40+
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
41+
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"

.github/ci_commit_pins/functorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
39d790c41ce5e842ecdd1581b01b8e50b859adba
1+
b567f78968fc0bc5d0cf666b6c2e4792c40386e5
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
dbb005e7429300e6605abdf6533d0d6dac8dabe3
1+
b3806ec6974da20fbb1a656b55685ea7c912896c

.github/scripts/trymerge.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,6 @@ def has_internal_changes(self) -> bool:
758758

759759
def merge_ghstack_into(self, repo: GitRepo, force: bool, comment_id: Optional[int] = None) -> None:
760760
assert self.is_ghstack_pr()
761-
approved_by = self.get_approved_by()
762761
# For ghstack, cherry-pick commits based from origin
763762
orig_ref = f"{repo.remote}/{re.sub(r'/head$', '/orig', self.head_ref())}"
764763
rev_list = repo.revlist(f"{self.default_branch()}..{orig_ref}")

.github/templates/linux_binary_build_workflow.yml.j2

Lines changed: 30 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -48,146 +48,56 @@ jobs:
4848
{%- for config in build_configs %}
4949
!{{ config["build_name"] }}-build:
5050
if: ${{ github.repository_owner == 'pytorch' }}
51-
runs-on: linux.4xlarge
52-
timeout-minutes: !{{ common.timeout_minutes }}
53-
!{{ upload.binary_env(config) }}
54-
steps:
55-
!{{ common.setup_ec2_linux() }}
56-
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
57-
!{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
58-
{%- if config["gpu_arch_type"] == 'cuda' and config["gpu_arch_version"].startswith('11') %}
59-
- name: Set BUILD_SPLIT_CUDA
60-
run: |
61-
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
62-
{%- endif %}
63-
- name: Pull Docker image
64-
run: |
65-
!{{ common.add_retry_to_env() }}
66-
retry docker pull "${DOCKER_IMAGE}"
67-
- name: Build PyTorch binary
68-
run: |
69-
set -x
70-
mkdir -p artifacts/
71-
container_name=$(docker run \
72-
-e BINARY_ENV_FILE \
73-
-e BUILDER_ROOT \
74-
-e BUILD_ENVIRONMENT \
75-
-e BUILD_SPLIT_CUDA \
76-
-e DESIRED_CUDA \
77-
-e DESIRED_DEVTOOLSET \
78-
-e DESIRED_PYTHON \
79-
-e GITHUB_ACTIONS \
80-
-e GPU_ARCH_TYPE \
81-
-e GPU_ARCH_VERSION \
82-
-e LIBTORCH_VARIANT \
83-
-e PACKAGE_TYPE \
84-
-e PYTORCH_FINAL_PACKAGE_DIR \
85-
-e PYTORCH_ROOT \
86-
-e SKIP_ALL_TESTS \
87-
--tty \
88-
--detach \
89-
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
90-
-v "${GITHUB_WORKSPACE}/builder:/builder" \
91-
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
92-
-w / \
93-
"${DOCKER_IMAGE}"
94-
)
95-
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
96-
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/!{{ config["package_type"] }}/build.sh"
97-
!{{ common.chown_dir("${RUNNER_TEMP}/artifacts") }}
98-
- uses: !{{ common.upload_artifact_s3_action }}
99-
with:
100-
name: !{{ config["build_name"] }}
101-
retention-days: 14
102-
if-no-files-found: error
103-
path:
104-
${{ runner.temp }}/artifacts/*
105-
!{{ common.teardown_ec2_linux("pytorch/") }}
51+
uses: ./.github/workflows/_binary-build-linux.yml
52+
with:!{{ upload.binary_env_as_input(config) }}
53+
build_name: !{{ config["build_name"] }}
54+
build_environment: !{{ build_environment }}
55+
secrets:
56+
github-token: ${{ secrets.GITHUB_TOKEN }}
57+
10658
!{{ config["build_name"] }}-test: # Testing
10759
if: ${{ github.repository_owner == 'pytorch' }}
10860
needs: !{{ config["build_name"] }}-build
109-
{%- if config["gpu_arch_type"] == "rocm" %}
110-
runs-on: linux.rocm.gpu
111-
{%- elif config["gpu_arch_type"] == "cuda" %}
112-
runs-on: linux.4xlarge.nvidia.gpu
61+
{%- if config["gpu_arch_type"] != "rocm" %}
62+
uses: ./.github/workflows/_binary-test-linux.yml
63+
with:!{{ upload.binary_env_as_input(config) }}
64+
build_name: !{{ config["build_name"] }}
65+
build_environment: !{{ build_environment }}
66+
{%- if config["gpu_arch_type"] == "rocm" %}
67+
runs_on: linux.rocm.gpu
68+
{%- elif config["gpu_arch_type"] == "cuda" %}
69+
runs_on: linux.4xlarge.nvidia.gpu
70+
{%- else %}
71+
runs_on: linux.4xlarge
72+
{%- endif %}
73+
secrets:
74+
github-token: ${{ secrets.GITHUB_TOKEN }}
11375
{%- else %}
114-
runs-on: linux.4xlarge
115-
{%- endif %}
76+
runs-on: linux.rocm.gpu
11677
timeout-minutes: !{{ common.timeout_minutes }}
11778
!{{ upload.binary_env(config) }}
11879
steps:
119-
{%- if config["gpu_arch_type"] == "rocm" %}
12080
!{{ common.setup_rocm_linux() }}
121-
{%- else %}
122-
!{{ common.setup_ec2_linux() }}
123-
{%- endif %}
12481
- uses: !{{ common.download_artifact_s3_action }}
12582
name: Download Build Artifacts
12683
with:
12784
name: !{{ config["build_name"] }}
12885
path: "${{ runner.temp }}/artifacts/"
12986
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
13087
!{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
131-
{%- if config["gpu_arch_type"] == "rocm" %}
13288
- name: ROCm set GPU_FLAG
13389
run: |
13490
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
135-
{%- elif config["gpu_arch_type"] == "cuda" %}
136-
- uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a
137-
name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
138-
with:
139-
timeout_minutes: 10
140-
max_attempts: 3
141-
command: |
142-
set -ex
143-
pushd pytorch
144-
bash .github/scripts/install_nvidia_utils_linux.sh
145-
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
146-
popd
147-
{%- endif %}
14891
- name: Pull Docker image
149-
run: |
150-
!{{ common.add_retry_to_env() }}
151-
retry docker pull "${DOCKER_IMAGE}"
152-
- name: Test PyTorch binary
153-
run: |
154-
set -x
155-
# shellcheck disable=SC2086,SC2090
156-
container_name=$(docker run \
157-
${GPU_FLAG:-} \
158-
-e BINARY_ENV_FILE \
159-
-e BUILDER_ROOT \
160-
-e BUILD_ENVIRONMENT \
161-
-e BUILD_SPLIT_CUDA \
162-
-e DESIRED_CUDA \
163-
-e DESIRED_DEVTOOLSET \
164-
-e DESIRED_PYTHON \
165-
-e GITHUB_ACTIONS \
166-
-e GPU_ARCH_TYPE \
167-
-e GPU_ARCH_VERSION \
168-
-e LIBTORCH_VARIANT \
169-
-e PACKAGE_TYPE \
170-
-e PYTORCH_FINAL_PACKAGE_DIR \
171-
-e PYTORCH_ROOT \
172-
-e SKIP_ALL_TESTS \
173-
--tty \
174-
--detach \
175-
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
176-
-v "${GITHUB_WORKSPACE}/builder:/builder" \
177-
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
178-
-w / \
179-
"${DOCKER_IMAGE}"
180-
)
181-
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
182-
# Generate test script
183-
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
184-
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
185-
{%- if config["gpu_arch_type"] == "rocm" %}
92+
uses: ./pytorch/.github/actions/pull-docker-image
93+
with:
94+
docker-image: !{{ config["container_image"] }}
95+
- name: Test Pytorch binary
96+
uses: ./pytorch/.github/actions/test-pytorch-binary
18697
!{{ common.teardown_rocm_linux() }}
187-
{%- else %}
188-
!{{ common.teardown_ec2_linux("pytorch/") }}
18998
{%- endif %}
190-
{%- if branches == "nightly" %}
99+
100+
{%- if branches == "nightly" %}
191101
!{{ upload.upload_binaries(config) }}
192-
{%- endif %}
102+
{%- endif %}
193103
{%- endfor %}

.github/templates/upload.yml.j2

Lines changed: 28 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
11
{% import 'common.yml.j2' as common %}
22

33
{%- macro binary_env(config, is_windows=False) -%}
4-
env:
4+
env:!{{ binary_env_as_input(config, is_windows, True) }}
5+
{%- endmacro %}
6+
7+
{%- macro binary_env_as_input(config, is_windows=False, include_skip_tests=False) -%}
58
{%- if is_windows %}
69
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
710
BUILDER_ROOT: ${{ github.workspac 94A3 e }}/builder
11+
{%- else %}
12+
PYTORCH_ROOT: /pytorch
13+
BUILDER_ROOT: /builder
814
{%- endif %}
915
PACKAGE_TYPE: !{{ config["package_type"] }}
1016
# TODO: This is a legacy variable that we eventually want to get rid of in
@@ -14,23 +20,25 @@
1420
GPU_ARCH_VERSION: !{{ config["gpu_arch_version"] }}
1521
{%- endif %}
1622
GPU_ARCH_TYPE: !{{ config["gpu_arch_type"] }}
23+
{%- if include_skip_tests %}
24+
SKIP_ALL_TESTS: 1
25+
{%- endif %}
1726
{%- if not is_windows %}
1827
DOCKER_IMAGE: !{{ config["container_image"] }}
1928
{%- endif %}
20-
SKIP_ALL_TESTS: 1
2129
{%- if config["package_type"] == "libtorch" %}
22-
{%- if config["libtorch_config"] %}
30+
{%- if config["libtorch_config"] %}
2331
LIBTORCH_CONFIG: !{{ config["libtorch_config"] }}
24-
{%- endif %}
32+
{%- endif %}
2533
LIBTORCH_VARIANT: !{{ config["libtorch_variant"] }}
26-
{%- if config["devtoolset"] %}
34+
{%- if config["devtoolset"] %}
2735
DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
28-
{%- endif %}
29-
{%- if is_windows %}
36+
{%- endif %}
37+
{%- if is_windows %}
3038
# This is a dummy value for libtorch to work correctly with our batch scripts
3139
# without this value pip does not get installed for some reason
3240
DESIRED_PYTHON: "3.7"
33-
{%- endif %}
41+
{%- endif %}
3442
{%- else %}
3543
DESIRED_PYTHON: "!{{ config["python_version"] }}"
3644
{%- endif %}
@@ -39,60 +47,21 @@
3947

4048
{%- macro upload_binaries(config, is_windows=False, has_test=True, use_s3=True) -%}
4149
!{{ config["build_name"] }}-upload: # Uploading
42-
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
4350
if: ${{ github.repository_owner == 'pytorch' }}
4451
{%- if has_test %}
4552
needs: !{{ config["build_name"] }}-test
4653
{%- else %}
4754
needs: !{{ config["build_name"] }}-build
4855
{%- endif %}
49-
!{{ binary_env(config, is_windows) }}
50-
steps:
51-
!{{ common.setup_ec2_linux() }}
52-
- name: Clone pytorch/pytorch
53-
uses: actions/checkout@v2
54-
{%- if use_s3 %}
55-
- uses: !{{ common.download_artifact_s3_action }}
56-
{%- else %}
57-
- uses: actions/download-artifact@v2
58-
{%- endif %}
59-
name: Download Build Artifacts
60-
with:
61-
name: !{{ config["build_name"] }}
62-
path: "${{ runner.temp }}/artifacts/"
63-
- name: Set DRY_RUN (only for tagged pushes)
64-
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
65-
run: |
66-
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
67-
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
68-
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
69-
run: |
70-
# reference ends with an RC suffix
71-
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
72-
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
73-
fi
74-
- name: Upload binaries
75-
env:
76-
PKG_DIR: "${{ runner.temp }}/artifacts"
77-
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
78-
# When running these on pull_request events these should be blank
79-
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }}
80-
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }}
81-
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
82-
run: |
83-
docker run --rm -i \
84-
-e ANACONDA_API_TOKEN \
85-
-e AWS_ACCESS_KEY_ID \
86-
-e AWS_SECRET_ACCESS_KEY \
87-
-e DRY_RUN \
88-
-e PACKAGE_TYPE \
89-
-e PKG_DIR=/artifacts \
90-
814D -e UPLOAD_CHANNEL \
91-
-e UPLOAD_SUBFOLDER \
92-
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
93-
-v "${GITHUB_WORKSPACE}:/v" \
94-
-w /v \
95-
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
96-
bash -c '.circleci/scripts/binary_upload.sh'
97-
!{{ common.teardown_ec2_linux() }}
98-
{%- endmacro -%}
56+
with:!{{ binary_env_as_input(config, is_windows) }}
57+
build_name: !{{ config["build_name"] }}
58+
{%- if not use_s3 %}
59+
use_s3: False
60+
{%- endif %}
61+
secrets:
62+
github-token: ${{ secrets.GITHUB_TOKEN }}
63+
aws-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }}
64+
aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }}
65+
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
66+
uses: ./.github/workflows/_binary-upload.yml
67+
{%- endmacro %}

0 commit comments

Comments
 (0)
0