-
Notifications
You must be signed in to change notification settings - Fork 24.7k
Description
🐛 Describe the bug
I'm trying out things like triton and torchao on Windows
In torch/_inductor/codecache.py
, function write_atomic
, when updating a cache entry, tmp_path.rename
on Windows does not allow overwriting the file.
It should be tmp_path.replace
, which is cross-platform.
Script to reproduce:
import torch
from diffusers import DiffusionPipeline
pipeline = DiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-pipe")
pipeline = pipeline.to("cuda")
pipeline.text_encoder = torch.compile(pipeline.text_encoder, mode="max-autotune")
out = pipeline("Image of a cat")
It happens with mode="max-autotune"
, as long as the model is complicated enough to trigger should_pad_bench
. Compiling to CPU on Windows has other issues for now, so I compile to CUDA here.
Error traceback:
Traceback (most recent call last):
File "C:\tmp\test_torch_compile_diffusion.py", line 6, in <module>
out = pipeline("Image of a cat")
File "C:\Python310\lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "C:\Python310\lib\site-packages\diffusers\pipelines\stable_diffusion\pipeline_stable_diffusion.py", line 924, in __call__
prompt_embeds, negative_prompt_embeds = self.encode_prompt(
File "C:\Python310\lib\site-packages\diffusers\pipelines\stable_diffusion\pipeline_stable_diffusion.py", line 393, in encode_prompt
prompt_embeds = self.text_encoder(text_input_ids.to(device), attention_mask=attention_mask)
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Python310\lib\site-packages\torch\_dynamo\eval_frame.py", line 433, in _fn
return fn(*args, **kwargs)
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Python310\lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Python310\lib\site-packages\torch\_dynamo\convert_frame.py", line 1116, in __call__
return self._torchdynamo_orig_callable(
File "C:\Python310\lib\site-packages\torch\_dynamo\convert_frame.py", line 948, in __call__
result = self._inner_convert(
File "C:\Python310\lib\site-packages\torch\_dynamo\convert_frame.py", line 472, in __call__
return _compile(
File "C:\Python310\lib\site-packages\torch\_utils_internal.py", line 84, in wrapper_function
return StrobelightCompileTimeProfiler.profile_compile_time(
File "C:\Python310\lib\site-packages\torch\_strobelight\compile_time_profiler.py", line 129, in profile_compile_time
return func(*args, **kwargs)
File "C:\Python310\lib\contextlib.py", line 79, in inner
return func(*args, **kwds)
File "C:\Python310\lib\site-packages\torch\_dynamo\convert_frame.py", line 817, in _compile
guarded_code = compile_inner(code, one_graph, hooks, transform)
File "C:\Python310\lib\site-packages\torch\_dynamo\utils.py", line 231, in time_wrapper
r = func(*args, **kwargs)
File "C:\Python310\lib\site-packages\torch\_dynamo\convert_frame.py", line 636, in compile_inner
out_code = transform_code_object(code, transform)
File "C:\Python310\lib\site-packages\torch\_dynamo\bytecode_transformation.py", line 1185, in transform_code_object
transformations(instructions, code_options)
File "C:\Python310\lib\site-packages\torch\_dynamo\convert_frame.py", line 178, in _fn
return fn(*args, **kwargs)
File "C:\Python310\lib\site-packages\torch\_dynamo\convert_frame.py", line 582, in transform
tracer.run()
File "C:\Python310\lib\site-packages\torch\_dynamo\symbolic_convert.py", line 2451, in run
super().run()
File "C:\Python310\lib\site-packages\torch\_dynamo\symbolic_convert.py", line 893, in run
while self.step():
File "C:\Python310\lib\site-packages\torch\_dynamo\symbolic_convert.py", line 805, in step
self.dispatch_table[inst.opcode](self, inst)
File "C:\Python310\lib\site-packages\torch\_dynamo\symbolic_convert.py", line 2642, in RETURN_VALUE
self._return(inst)
File "C:\Python310\lib\site-packages\torch\_dynamo\symbolic_convert.py", line 2627, in _return
self.output.compile_subgraph(
File "C:\Python310\lib\site-packages\torch\_dynamo\output_graph.py", line 1123, in compile_subgraph
self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
File "C:\Python310\lib\contextlib.py", line 79, in inner
return func(*args, **kwds)
File "C:\Python310\lib\site-packages\torch\_dynamo\output_graph.py", line 1318, in compile_and_call_fx_graph
compiled_fn = self.call_user_compiler(gm)
File "C:\Python310\lib\site-packages\torch\_dynamo\utils.py", line 231, in time_wrapper
r = func(*args, **kwargs)
File "C:\Python310\lib\site-packages\torch\_dynamo\output_graph.py", line 1409, in call_user_compiler
raise BackendCompilerFailed(self.compiler_fn, e).with_traceback(
File "C:\Python310\lib\site-packages\torch\_dynamo\output_graph.py", line 1390, in call_user_compiler
compiled_fn = compiler_fn(gm, self.example_inputs())
File "C:\Python310\lib\site-packages\torch\_dynamo\repro\after_dynamo.py", line 129, in __call__
compiled_gm = compiler_fn(gm, example_inputs)
File "C:\Python310\lib\site-packages\torch\__init__.py", line 1951, in __call__
return compile_fx(model_, inputs_, config_patches=self.config)
File "C:\Python310\lib\contextlib.py", line 79, in inner
return func(*args, **kwds)
File "C:\Python310\lib\site-packages\torch\_inductor\compile_fx.py", line 1261, in compile_fx
return compile_fx(
File "C:\Python310\lib\contextlib.py", line 79, in inner
return func(*args, **kwds)
File "C:\Python310\lib\site-packages\torch\_inductor\compile_fx.py", line 1505, in compile_fx
return aot_autograd(
File "C:\Python310\lib\site-packages\torch\_dynamo\backends\common.py", line 69, in __call__
cg = aot_module_simplified(gm, example_inputs, **self.kwargs)
File "C:\Python310\lib\site-packages\torch\_functorch\aot_autograd.py", line 954, in aot_module_simplified
compiled_fn, _ = create_aot_dispatcher_function(
File "C:\Python310\lib\site-packages\torch\_dynamo\utils.py", line 231, in time_wrapper
r = func(*args, **kwargs)
File "C:\Python310\lib\site-packages\torch\_functorch\aot_autograd.py", line 687, in create_aot_dispatcher_function
compiled_fn, fw_metadata = compiler_fn(
File "C:\Python310\lib\site-packages\torch\_functorch\_aot_autograd\jit_compile_runtime_wrappers.py", line 168, in aot_dispatch_base
compiled_fw = compiler(fw_module, updated_flat_args)
File "C:\Python310\lib\site-packages\torch\_dynamo\utils.py", line 231, in time_wrapper
r = func(*args, **kwargs)
File "C:\Python310\lib\site-packages\torch\_inductor\compile_fx.py", line 1352, in fw_compiler_base
_recursive_joint_graph_passes(model)
File "C:\Python310\lib\site-packages\torch\_inductor\compile_fx.py", line 256, in _recursive_joint_graph_passes
joint_graph_passes(gm)
File "C:\Python310\lib\site-packages\torch\_inductor\fx_passes\joint_graph.py", line 326, in joint_graph_passes
count += patterns.apply(graph.graph) # type: ignore[arg-type]
File "C:\Python310\lib\site-packages\torch\_inductor\pattern_matcher.py", line 1698, in apply
if is_match(m) and entry.extra_check(m):
File "C:\Python310\lib\site-packages\torch\_inductor\pattern_matcher.py", line 1314, in check_fn
if is_match(specific_pattern_match) and extra_check(specific_pattern_match):
File "C:\Python310\lib\site-packages\torch\_inductor\fx_passes\pad_mm.py", line 129, in should_pad_addmm
return should_pad_common(mat1, mat2, input) and should_pad_bench(
File "C:\Python310\lib\site-packages\torch\_inductor\fx_passes\pad_mm.py", line 404, in should_pad_bench
set_cached_base_mm_benchmark_time(ori_time_key, ori_time)
File "C:\Python310\lib\site-package
78F0
s\torch\_inductor\fx_passes\pad_mm.py", line 245, in set_cached_base_mm_benchmark_time
return get_pad_cache().set_value(key, value=value)
File "C:\Python310\lib\site-packages\torch\_inductor\codecache.py", line 230, in set_value
self.update_local_cache(cache)
File "C:\Python310\lib\site-packages\torch\_inductor\codecache.py", line 201, in update_local_cache
write_atomic(
File "C:\Python310\lib\site-packages\torch\_inductor\codecache.py", line 404, in write_atomic
tmp_path.rename(path)
File "C:\Python310\lib\pathlib.py", line 1234, in rename
self._accessor.rename(self, target)
torch._dynamo.exc.BackendCompilerFailed: backend='inductor' raised:
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\wocto\\AppData\\Local\\Temp\\torchinductor_wocto\\cache\\.45836.73820.tmp' -> 'C:\\Users\\wocto\\AppData\\Local\\Temp\\torchinductor_wocto\\cache\\d9b74eed9a7bcf764916564747f715832b7eed03eb03ed2f5f9daeaf282c1d29'
Versions
Collecting environment information...
PyTorch version: 2.4.1+cu124
Is debug build: False
CUDA used to build PyTorch: 12.4
ROCM used to build PyTorch: N/A
OS: Microsoft Windows 11 Enterprise
GCC version: (Rev1, Built by MSYS2 project) 14.2.0
Clang version: Could not collect
CMake version: version 3.29.5-msvc4
Libc version: N/A
Python version: 3.10.10 (tags/v3.10.10:aad5f6a, Feb 7 2023, 17:20:36) [MSC v.1929 64 bit (AMD64)] (64-bit runtime)
Python platform: Windows-10-10.0.22631-SP0
Is CUDA available: True
CUDA runtime version: 12.5.82
CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: GPU 0: NVIDIA GeForce RTX 3080 Ti Laptop GPU
Nvidia driver version: 561.09
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True
CPU:
Architecture=9
CurrentClockSpeed=2500
DeviceID=CPU0
Family=207
L2CacheSize=11776
L2CacheSpeed=
Manufacturer=GenuineIntel
MaxClockSpeed=2500
Name=12th Gen Intel(R) Core(TM) i9-12900H
ProcessorType=3
Revision=
Versions of relevant libraries:
[pip3] clip-anytorch==2.6.0
[pip3] dctorch==0.1.2
[pip3] flake8==7.1.1
[pip3] flake8-2020==1.8.1
[pip3] flake8-bandit==4.1.1
[pip3] flake8-broken-line==1.0.0
[pip3] flake8-bugbear==24.8.19
[pip3] flake8-builtins==2.5.0
[pip3] flake8-comprehensions==3.15.0
[pip3] flake8-datetimez==20.10.0
[pip3] flake8-eradicate==1.5.0
[pip3] flake8-isort==6.1.1
[pip3] flake8-logging==1.6.0
[pip3] flake8-noqa==1.4.0
[pip3] flake8-pie==0.16.0
[pip3] flake8-plugin-utils==1.3.3
[pip3] flake8-pytest-style==2.0.0
[pip3] flake8-quotes==3.4.0
[pip3] flake8-return==1.2.0
[pip3] flake8_simplify==0.21.0
[pip3] lion-pytorch==0.2.2
[pip3] mypy-extensions==1.0.0
[pip3] numpy==1.26.4
[pip3] onnx==1.17.0
[pip3] onnxoptimizer==0.3.13
[pip3] onnxruntime==1.19.2
[pip3] onnxruntime-gpu==1.19.2
[pip3] onnxsim==0.4.36
[pip3] open_clip_torch==2.27.1
[pip3] optree==0.13.0
[pip3] pytorch-lightning==2.4.0
[pip3] torch==2.4.1+cu124
[pip3] torchao==0.5.0+git
[pip3] torchaudio==2.4.1+cu124
[pip3] torchcrepe==0.0.23
[pip3] torchdiffeq==0.2.4
[pip3] torchlibrosa==0.1.0
[pip3] torchmetrics==1.4.3
[pip3] torchsde==0.2.6
[pip3] torchvision==0.19.1+cu124
[pip3] triton==3.1.0
[conda] No relevant packages
cc @peterjc123 @mszhanyi @skyline75489 @nbcsm @iremyux @Blackhex @ezyang @chauhang @penguinwu