8000 Update on "[dtensor] PART 4: move remaining DTensor ops to core distr… · pytorch/pytorch@e210585 · GitHub
[go: up one dir, main page]

Skip to content

Commit e210585

Browse files
committed
Update on "[dtensor] PART 4: move remaining DTensor ops to core distributed"
This PR moves the view related DTensor ops to core distributed, tests will be add in follow up PRs part of #88838 [ghstack-poisoned]
2 parents 5faa761 + 7694435 commit e210585

File tree

371 files changed

+9014
-5930
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

371 files changed

+9014
-5930
lines changed

.github/ci_commit_pins/vision.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ffd5a567eb90abf6b5555063da434d3c130d540f
1+
b1f6c9e271368cd84837522af39e68dd4b5768a7

.github/ci_commit_pins/xla.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
7889d2d3be16675943d84e4a4133ed7c245a623f
1+
dd9b67ff0d6ba4da6a46ca1b22e35c98dbed0d77

.github/scripts/filter_test_configs.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@
3434
"xla",
3535
}}
3636

37+
# Supported modes when running periodically
38+
SUPPORTED_PERIODICAL_MODES = {
39+
"mem_leak_check",
40+
"rerun_disabled_tests",
41+
}
42+
43+
3744
def parse_args() -> Any:
3845
from argparse import ArgumentParser
3946
parser = ArgumentParser("Filter all test configurations and keep only requested ones")
@@ -109,6 +116,23 @@ def filter(test_matrix: Dict[str, List[Any]], labels: Set[str]) -> Dict[str, Lis
109116
return filtered_test_matrix
110117

111118

119+
def set_periodic_modes(test_matrix: Dict[str, List[Any]]) -> Dict[str, List[Any]]:
120+
"""
121+
Apply all periodic modes when running under a schedule
122+
"""
123+
scheduled_test_matrix: Dict[str, List[Any]] = {
124+
"include": [],
125+
}
126+
127+
for config in test_matrix.get("include", []):
128+
for mode in SUPPORTED_PERIODICAL_MODES:
129+
cfg = config.copy()
130+
cfg[mode] = mode
131+
scheduled_test_matrix["include"].append(cfg)
132+
133+
return scheduled_test_matrix
134+
135+
112136
def set_output(name: str, val: Any) -> None:
113137
if os.getenv("GITHUB_OUTPUT"):
114138
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
@@ -163,8 +187,7 @@ def main() -> None:
163187
filtered_test_matrix = test_matrix
164188

165189
if args.event_name == "schedule":
166-
for config in filtered_test_matrix.get("include", []):
167-
config["mem_leak_check"] = "mem_leak_check"
190+
filtered_test_matrix = set_periodic_modes(filtered_test_matrix)
168191

169192
# Set the filtered test matrix as the output
170193
set_output("test-matrix", json.dumps(filtered_test_matrix))

.github/scripts/install_nvidia_utils_linux.sh

-131
This file was deleted.

.github/scripts/test_filter_test_configs.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,14 @@
44
import yaml
55
import json
66
from unittest import TestCase, main, mock
7-
from filter_test_configs import get_labels, filter, PREFIX, VALID_TEST_CONFIG_LABELS
7+
from filter_test_configs import (
8+
get_labels,
9+
filter,
10+
set_periodic_modes,
11+
PREFIX,
12+
VALID_TEST_CONFIG_LABELS,
13+
SUPPORTED_PERIODICAL_MODES
14+
)
815
import requests
916
from requests.models import Response
1017
from typing import Any, Dict
@@ -86,5 +93,26 @@ def test_filter_with_valid_label(self) -> None:
8693
self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))
8794

8895

96+
def test_set_periodic_modes(self) -> None:
97+
testcases = [
98+
{
99+
"test_matrix": "{include: []}",
100+
"description": "Empty test matrix",
101+
},
102+
{
103+
"test_matrix": '{include: [{config: "default", runner: "linux"}, {config: "cfg", runner: "macos"}]}',
104+
"descripion": "Replicate each periodic mode in a different config",
105+
},
106+
]
107+
108+
for case in testcases:
109+
test_matrix = yaml.safe_load(case["test_matrix"])
110+
scheduled_test_matrix = set_periodic_modes(test_matrix)
111+
self.assertEqual(
112+
len(test_matrix["include"]) * len(SUPPORTED_PERIODICAL_MODES),
113+
len(scheduled_test_matrix["include"])
114+
)
115+
116+
89117
if __name__ == '__main__':
90118
main()

.github/workflows/_binary-test-linux.yml

+1-10
Original file line numberDiff line numberDiff line change
@@ -171,17 +171,8 @@ jobs:
171171
path: "${{ runner.temp }}/artifacts/"
172172

173173
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
174-
uses: nick-fields/retry@3e91a01664abd3c5cd539100d10d33b9c5b68482
174+
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
175175
if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' }}
176-
with:
177-
timeout_minutes: 10
178-
max_attempts: 3
179-
command: |
180-
set -ex
181-
pushd pytorch
182-
bash .github/scripts/install_nvidia_utils_linux.sh
183-
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
184-
popd
185176

186177
- name: Pull Docker image
187178
uses: pytorch/test-infra/.github/actions/pull-docker-image@main

.github/workflows/_linux-test.yml

+4-9
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,8 @@ jobs:
7474
docker-image: ${{ inputs.docker-image }}
7575

7676
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
77-
uses: nick-fields/retry@3e91a01664abd3c5cd539100d10d33b9c5b68482
77+
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
7878
if: contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu')
79-
with:
80-
timeout_minutes: 10
81-
max_attempts: 3
82-
command: |
83-
set -ex
84-
bash .github/scripts/install_nvidia_utils_linux.sh
85-
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
8679

8780
- name: Start monitoring script
8881
id: monitor-script
@@ -122,7 +115,8 @@ jobs:
122115
DOCKER_IMAGE: ${{ inputs.docker-image }}
123116
XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
124117
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
125-
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
118+
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
119+
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
126120
timeout-minutes: 240
127121
run: |
128122
set -x
@@ -177,6 +171,7 @@ jobs:
177171
-e XLA_CUDA \
178172
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
179173
-e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
174+
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \
180175
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
181176
--ulimit stack=10485760:83886080 \
182177
--security-opt seccomp=unconfined \

.github/workflows/_mac-build.yml

+10-5
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,17 @@ jobs:
109109
brew link --force libomp
110110
111111
- name: Install sccache (only for non-forked PRs, and pushes to trunk)
112+
uses: nick-fields/retry@v2.8.2
112113
if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
113-
run: |
114-
sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
115-
sudo chmod +x /usr/local/bin/sccache
116-
echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"
117-
echo "SCCACHE_S3_KEY_PREFIX=${GITHUB_WORKFLOW}" >> "${GITHUB_ENV}"
114+
with:
115+
timeout_minutes: 5
116+
max_attempts: 3
117+
retry_wait_seconds: 90
118+
command: |
119+
sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
120+
sudo chmod +x /usr/local/bin/sccache
121+
echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"
122+
echo "SCCACHE_S3_KEY_PREFIX=${GITHUB_WORKFLOW}" >> "${GITHUB_ENV}"
118123
119124
- name: Get workflow job id
120125
id: get-job-id

.github/workflows/_mac-test-mps.yml

+16-2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ jobs:
6666
${CONDA_RUN} python3 -mpip install --no-index --no-deps dist/*.whl
6767
6868
- name: Run MPS tests
69+
id: test
6970
env:
7071
ENV_NAME: conda-test-env-${{ github.run_id }}
7172
shell: arch -arch arm64 bash {0}
@@ -74,5 +75,18 @@ jobs:
7475
set -ex
7576
# TODO(https://github.com/pytorch/pytorch/issues/79293)
7677
77-
${CONDA_RUN} --cwd test python3 test_mps.py -v
78-
${CONDA_RUN} --cwd test python3 test_metal.py -v
78+
${CONDA_RUN} python3 test/run_test.py --mps --verbose
79+
80+
- name: Get workflow job id
81+
id: get-job-id
82+
uses: ./.github/actions/get-workflow-job-id
83+
if: always()
84+
with:
85+
github-token: ${{ secrets.GITHUB_TOKEN }}
86+
87+
- name: Upload test artifacts
88+
uses: ./.github/actions/upload-test-artifacts
89+
if: always() && (steps.test.conclusion == 'success' || steps.test.conclusion == 'failure')
90+
with:
91+
use-gha: true
92+
file-suffix: ${{ github.job }}-mps-1-1-macos-m1-12_${{ steps.get-job-id.outputs.job-id }}

.github/workflows/_mac-test.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ jobs:
129129
- name: Test
130130
id: test
131131
env:
132-
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
132+
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
133+
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
133134
run: |
134135
COMMIT_MESSAGES=$(git cherry -v "origin/${GIT_DEFAULT_BRANCH:-master}")
135136

.github/workflows/_rocm-test.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ jobs:
9797
DOCKER_IMAGE: ${{ inputs.docker-image }}
9898
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
9999
PYTORCH_JIT_ENABLE_NVFUSER: 1
100-
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
100+
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
101+
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
101102
timeout-minutes: 270
102103
run: |
103104
set -x
@@ -148,6 +149,7 @@ jobs:
148149
-e SCCACHE_BUCKET \
149150
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
150151
-e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
152+
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \
151153
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
152154
--ulimit stack=10485760:83886080 \
153155
--security-opt seccomp=unconfined \

.github/workflows/_win-test.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ jobs:
124124
TEST_CONFIG: ${{ matrix.config }}
125125
PR_BODY: ${{ github.event.pull_request.body }}
126126
TORCH_CUDA_ARCH_LIST: "7.0"
127-
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0'}}
127+
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
128+
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
128129
run: |
129130
COMMIT_MESSAGES=$(git cherry -v "origin/${GIT_DEFAULT_BRANCH:-master}")
130131

.github/workflows/docker-release.yml

+14
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,20 @@ jobs:
9191
# WITH_PUSH is used here to determine whether or not to add the --push flag
9292
run: |
9393
make -f docker.Makefile "${BUILD_IMAGE_TYPE}-image"
94+
- name: Push nightly tags
95+
if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' }}
96+
run: |
97+
PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-runtime"
98+
CUDA_VERSION=$(python3 -c "import re;print(re.search('CUDA_VERSION\s+=\s+([0-9\.]+)',open('docker.Makefile').read())[1],end='')")
99+
PYTORCH_NIGHTLY_COMMIT=$(docker run ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
100+
python -c 'import torch; print(torch.version.git_version[:7],end="")')
101+
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
102+
ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
103+
docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
104+
105+
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}" \
106+
ghcr.io/pytorch/pytorch-nightly:latest
107+
docker push ghcr.io/pytorch/pytorch-nightly:latest
94108
- name: Teardown Linux
95109
uses: pytorch/test-infra/.github/actions/teardown-linux@main
96110
if: always()

0 commit comments

Comments
 (0)
0