-
Notifications
You must be signed in to change notification settings - Fork 64
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
repro script
import torch
from nvfuser import FusionDefinition, DataType
def nvfuser_fusion_id0(fd : FusionDefinition) -> None :
T0 = fd.define_tensor(shape=[-1, -1, 1], contiguity=[True, True, None], dtype=DataType.Float, is_cpu=False, stride_order=[2, 1, 0])
T1 = fd.define_tensor(shape=[-1, 1, -1], contiguity=[True, None, True], dtype=DataType.Float, is_cpu=False, stride_order=[2, 1, 0])
T2 = fd.ops.sum(T0, axes=[1], keepdim=False, dtype=DataType.Null)
T3 = fd.ops.sum(T1, axes=[1], keepdim=False, dtype=DataType.Null)
T4 = fd.ops.mul(T2, T3)
fd.add_output(T4)
with FusionDefinition() as fd:
nvfuser_fusion_id0(fd)
inputs = [
torch.randn((524288,), dtype=torch.float32, device='cuda:0').as_strided((1024, 512, 1), (512, 1, 1)),
torch.randn((524288,), dtype=torch.float32, device='cuda:0').as_strided((1024, 1, 512), (512, 512, 1)),
]
fd.execute(inputs)
Running into issues:
Traceback (most recent call last):
File "/opt/pytorch/nvfuser/nvfuser/__init__.py", line 137, in execute
result = self._execute(
RuntimeError: Merging IterDomains requires that their iteration types match. Outer: iS69{32}, Inner: rS7{i1}
Exception raised from merge at /opt/pytorch/nvfuser/csrc/ir/nodes.cpp:2692 (most recent call first):
frame #0: nvfuser::nvfCheckFail(char const*, char const*, unsigned int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0xf3 (0x7fd2d0278a43 in /opt/pytorch/nvfuser/nvfuser/lib/libnvfuser_codegen.so)
frame #1: nvfuser::IterDomain::merge(nvfuser::IterDomain*, nvfuser::IterDomain*, bool) + 0x39b (0x7fd2d04fd27b in /opt/pytorch/nvfuser/nvfuser/lib/libnvfuser_codegen.so)
frame #2: nvfuser::TensorDomain::merge(int, int) + 0xc9 (0x7fd2d04fd3d9 in /opt/pytorch/nvfuser/nvfuser/lib/libnvfuser_codegen.so)
frame #3: nvfuser::TensorView::merge(int, int) + 0xdb (0x7fd2d07cffcb in /opt/pytorch/nvfuser/nvfuser/lib/libnvfuser_codegen.so)
frame #4: nvfuser::scheduleTranspose(nvfuser::Fusion*, nvfuser::TransposeParams) + 0x109b (0x7fd2d071fb5b in /opt/pytorch/nvfuser/nvfuser/lib/libnvfuser_codegen.so)
frame #5: nvfuser::TransposeScheduler::schedule(nvfuser::Fusion*) + 0xe0 (0x7fd2d0721db0 in /opt/pytorch/nvfuser/nvfuser/lib/libnvfuser_codegen.so)
frame #6: nvfuser::FusionKernelRuntime::compileKernel(nvfuser::KernelArgumentHolder const&, nvfuser::SegmentedGroup*) + 0x151 (0x7fd2d0533ab1 in /opt/pytorch/nvfuser/nvfuser/lib/libnvfuser_codegen.so)
frame #7: nvfuser::FusionKernelRuntime::compileFusionParallel(nvfuser::KernelArgumentHolder) + 0x41c (0x7fd2d0539b9c in /opt/pytorch/nvfuser/nvfuser/lib/libnvfuser_codegen.so)
frame #8: nvfuser::FusionExecutorCache::runFusionWithInputs(c10::ArrayRef<c10::IValue> const&, std::optional<nvfuser::PrimDataType>, std::optional<signed char>) + 0xa43 (0x7fd2d0545523 in /opt/pytorch/nvfuser/nvfuser/lib/libnvfuser_codegen.so)
frame #9: nvfuser::python_frontend::FusionDefinition::execute(c10::ArrayRef<c10::IValue> const&, bool, bool, std::optional<signed char>) const + 0x16c (0x7fd2d0854a2c in /opt/pytorch/nvfuser/nvfuser/lib/libnvfuser_codegen.so)
frame #10: <unknown function> + 0xf152e (0x7fd2d0ba252e in /opt/pytorch/nvfuser/nvfuser/_C.cpython-310-x86_64-linux-gnu.so)
frame #11: <unknown function> + 0x177488 (0x7fd2d0c28488 in /opt/pytorch/nvfuser/nvfuser/_C.cpython-310-x86_64-linux-gnu.so)
<omitting python frames>
frame #27: <unknown function> + 0x29d90 (0x7fd4f73bbd90 in /usr/lib/x86_64-linux-gnu/libc.so.6)
frame #28: __libc_start_main + 0x80 (0x7fd4f73bbe40 in /usr/lib/x86_64-linux-gnu/libc.so.6)
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working