8000 Update on "[pytree] hardcode values for `none_is_leaf` and `namespace… · pytorch/pytorch@5e0601d · GitHub
[go: up one dir, main page]

Skip to content

Commit 5e0601d

Browse files
committed
Update on "[pytree] hardcode values for none_is_leaf and namespace in C++ pytree"
[ghstack-poisoned]
2 parents feede94 + 6e11709 commit 5e0601d

File tree

91 files changed

+1852
-1744
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+1852
-1744
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
bed91223f660685325147a5027348356f11cdd17
1+
3a4bb06b3a3a36863ff9d7fca3cfee9d8f7b6613

.github/scripts/generate_binary_build_matrix.py

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,34 @@
2424

2525
CPU_AARCH64_ARCH = ["cpu-aarch64"]
2626

27-
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = (
28-
"nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
29-
"nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
30-
"nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
31-
"nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
32-
"nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
33-
"nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | "
34-
"nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
35-
"nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | "
36-
"nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
37-
"nvidia-nccl-cu12==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
38-
"nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'"
39-
)
27+
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
28+
"11.8": (
29+
"nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
30+
"nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "
31+
"nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | "
32+
"nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | "
33+
"nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | "
34+
"nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | "
35+
"nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
36+
"nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
37+
"nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
38+
"nvidia-nccl-cu11==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
39+
"nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'"
40+
),
41+
"12.1": (
42+
"nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
43+
"nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
44+
"nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
45+
"nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
46+
"nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
47+
"nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | "
48+
"nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
49+
"nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | "
50+
"nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
51+
"nvidia-nccl-cu12==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
52+
"nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'"
53+
),
54+
}
4055

4156

4257
def get_nccl_submodule_version() -> str:
@@ -65,15 +80,17 @@ def get_nccl_submodule_version() -> str:
6580
return f"{d['NCCL_MAJOR']}.{d['NCCL_MINOR']}.{d['NCCL_PATCH']}"
6681

6782

68-
def get_nccl_wheel_version() -> str:
83+
def get_nccl_wheel_version(arch_version: str) -> str:
6984
import re
7085

71-
requrements = map(str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS))
72-
return [x for x in requrements if x.startswith("nvidia-nccl-cu")][0].split("==")[1]
86+
requirements = map(
87+
str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version])
88+
)
89+
return [x for x in requirements if x.startswith("nvidia-nccl-cu")][0].split("==")[1]
7390

7491

75-
def validate_nccl_dep_consistency() -> None:
76-
wheel_ver = get_nccl_wheel_version()
92+
def validate_nccl_dep_consistency(arch_version: str) -> None:
93+
wheel_ver = get_nccl_wheel_version(arch_version)
7794
submodule_ver = get_nccl_submodule_version()
7895
if wheel_ver != submodule_ver:
7996
raise RuntimeError(
@@ -298,7 +315,7 @@ def generate_wheels_matrix(
298315
)
299316

300317
# 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
301-
if arch_version == "12.1" and os == "linux":
318+
if arch_version in ["12.1", "11.8"] and os == "linux":
302319
ret.append(
303320
{
304321
"python_version": python_version,
@@ -310,7 +327,7 @@ def generate_wheels_matrix(
310327
"devtoolset": "",
311328
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
312329
"package_type": package_type,
313-
"pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS,
330+
"pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version], # fmt: skip
314331
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace( # noqa: B950
315332
".", "_"
316333
),
@@ -333,12 +350,13 @@ def generate_wheels_matrix(
333350
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
334351
".", "_"
335352
),
336-
"pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS
337-
if os != "linux"
338-
else "",
353+
"pytorch_extra_install_requirements":
354+
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.1"] # fmt: skip
355+
if os != "linux" else "",
339356
}
340357
)
341358
return ret
342359

343360

344-
validate_nccl_dep_consistency()
361+
validate_nccl_dep_consistency("12.1")
362+
validate_nccl_dep_consistency("11.8")

.github/workflows/generated-linux-binary-manywheel-main.yml

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.github/workflows/generated-linux-binary-manywheel-nightly.yml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

aten/src/ATen/mps/MPSStream.mm

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,9 @@ @interface MPSGraphExecutionDescriptor ()
146146
}
147147

148148
void MPSStream::fill(id<MTLBuffer> buffer, uint8_t value, size_t length, size_t offset, SyncType syncType) {
149-
TORCH_INTERNAL_ASSERT(length >= offset);
150-
if (length == 0)
149+
if (length == 0) {
151150
return;
151+
}
152152
dispatch_sync(_serialQueue, ^() {
153153
@autoreleasepool {
154154
endKernelCoalescing();

aten/src/ATen/native/mps/operations/ConstantOps.mm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ static bool fill_mps_tensor_(Tensor& self, uint8_t value) {
7979
if (self.is_contiguous()) {
8080
MPSStream* stream = getCurrentMPSStream();
8181
auto storage_byte_offset = self.storage_offset() * self.itemsize();
82-
stream->fill(mps::getMTLBufferStorage(self), 0, self.storage().nbytes(), storage_byte_offset);
82+
stream->fill(mps::getMTLBufferStorage(self), value, self.nbytes(), storage_byte_offset);
8383
return true;
8484
}
8585
return false;

aten/src/ATen/native/transformers/attention.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -548,13 +548,6 @@ at::Tensor preprocess_mask(
548548
constexpr int mem_eff_alignment = 8;
549549
at::Tensor result_mask = mask;
550550
if (!aligned_tensor<mem_eff_alignment>(mask)) {
551-
TORCH_WARN_ONCE(
552-
"Memory Efficient Attention requires the attn_mask to be aligned to, ",
553-
mem_eff_alignment,
554-
" elements. "
555-
"Prior to calling SDPA, pad the last dimension of the attn_mask "
556-
"to be at least a multiple of ", mem_eff_alignment,
557-
" and then slice the attn_mask to the original size.");
558551
result_mask = pad_bias<mem_eff_alignment>(mask);
559552
}
560553
return result_mask.expand_symint(

benchmarks/dynamo/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ build-deps: clone-deps
3535
(cd ../../../torchdata && python setup.py install)
3636
(cd ../../../torchtext && python setup.py clean && python setup.py develop)
3737
(cd ../../../torchaudio && python setup.py clean && python setup.py develop)
38-
(cd ../../../FBGEMM/fbgemm_gpu && python setup.py clean && pip install -r requirements.txt && python setup.py develop)
38+
(cd ../../../FBGEMM/fbgemm_gpu && pip install -r requirements.txt && python setup.py clean && python setup.py develop)
3939
(cd ../../../torchrec && python setup.py clean && python setup.py develop)
4040
(cd ../../../detectron2 && python setup.py clean && python setup.py develop)
4141
(cd ../../../torchbenchmark && python install.py --continue_on_fail)

benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Background_Matting,pass_due_to_skip,0
1010

1111

1212

13-
DALLE2_pytorch,fail_to_run,21
13+
DALLE2_pytorch,timeout,0
1414

1515

1616

benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ opacus_cifar10,pass,0
186186

187187

188188

189-
phi_1_5,pass,74
189+
phi_1_5,pass,0
190190

191191

192192

benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Background_Matting,pass_due_to_skip,0
1010

1111

1212

13-
DALLE2_pytorch,fail_to_run,21
13+
DALLE2_pytorch,timeout,0
1414

1515

1616

benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ opacus_cifar10,pass,0
186186

187187

188188

189-
phi_1_5,pass,74
189+
phi_1_5,pass,0
190190

191191

192192

benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Background_Matting,pass_due_to_skip,0
1010

1111

1212

13-
DALLE2_pytorch,fail_to_run,21
13+
DALLE2_pytorch,timeout,0
1414

1515

1616

benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Background_Matting,pass_due_to_skip,0
1010

1111

1212

13-
DALLE2_pytorch,fail_to_run,21
13+
DALLE2_pytorch,timeout,0
1414

1515

1616

benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Background_Matting,pass_due_to_skip,0
1010

1111

1212

13-
DALLE2_pytorch,fail_to_run,21
13+
DALLE2_pytorch,timeout,0
1414

1515

1616

benchmarks/dynamo/common.py

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,39 @@ def onnxrt_model_iter_fn(model, inputs, collect_outputs=True):
760760

761761
return onnxrt_model_iter_fn
762762

763+
def timed_onnx(model, onnx_model: OnnxModelFromTorchScript, inputs):
764+
if current_device == "cpu" or onnx_model.is_cpu():
765+
onnxrt_model_iter_fn = create_onnx_fn(onnx_model, inputs)
766+
else:
767+
onnxrt_model_iter_fn = create_onnx_input_binded_fn(
768+
onnx_model, inputs, expected_output
769+
)
770+
return timed(
771+
model,
772+
onnxrt_model_iter_fn,
773+
inputs,
774+
return_result=True,
775+
times=times,
776+
collect_outputs=args.collect_outputs,
777+
)
778+
779+
# Insert ONNX warm-up
780+
inputs = (
781+
randomize_input(copy.deepcopy(example_inputs))
782+
if should_randomize_input
783+
else example_inputs
784+
)
785+
_, expected_output = timed(
786+
model,
787+
model_iter_fn,
788+
inputs,
789+
return_result=True,
790+
times=times,
791+
collect_outputs=args.collect_outputs,
792+
)
793+
for _ in range(2):
794+
timed_onnx(model, onnx_model, inputs)
795+
763796
for rep in range(args.repeat):
764797
inputs = (
765798
randomize_input(copy.deepcopy(example_inputs))
@@ -775,21 +808,7 @@ def onnxrt_model_iter_fn(model, inputs, collect_outputs=True):
775808
collect_outputs=args.collect_outputs,
776809
)
777810

778-
if current_device == "cpu" or onnx_model.is_cpu():
779-
onnxrt_model_iter_fn = create_onnx_fn(onnx_model, inputs)
780-
else:
781-
onnxrt_model_iter_fn = create_onnx_input_binded_fn(
782-
onnx_model, inputs, expected_output
783-
)
784-
785-
timings[rep, 1], actual_output = timed(
786-
model,
787-
onnxrt_model_iter_fn,
788-
inputs,
789-
return_result=True,
790-
times=times,
791-
collect_outputs=args.collect_outputs,
792-
)
811+
timings[rep, 1], actual_output = timed_onnx(model, onnx_model, inputs)
793812

794813
pvalue = ttest_ind(timings[:, 0], timings[:, 1]).pvalue
795814
median = np.median(timings, axis=0)

benchmarks/dynamo/timm_models.py

Lines changed: 9 additions & 3 deletions
B54E
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,13 @@ def pip_install(package):
6767
"xcit_large_24_p8_224": 4,
6868
}
6969

70-
REQUIRE_HIGHER_TOLERANCE = set("sebotnet33ts_256")
70+
REQUIRE_HIGHER_TOLERANCE = {
71+
"fbnetv3_b",
72+
"hrnet_w18",
73+
"inception_v3",
74+
"sebotnet33ts_256",
75+
"selecsls42b",
76+
}
7177

7278
SCALED_COMPUTE_LOSS = {
7379
"ese_vovnet19b_dw",
@@ -304,8 +310,8 @@ def get_tolerance_and_cosine_flag(self, is_training, current_device, name):
304310
cosine = self.args.cosine
305311
tolerance = 1e-3
306312
if is_training:
307-
if REQUIRE_HIGHER_TOLERANCE:
308-
tolerance = 2 * 1e-2
313+
if name in REQUIRE_HIGHER_TOLERANCE:
314+
tolerance = 4 * 1e-2
309315
else:
310316
tolerance = 1e-2
311317
return tolerance, cosine

0 commit comments

Comments
 (0)
0