8000 [cutlass backend] Reduce log level for cutlass runtime error by henrylhtsang · Pull Request #153457 · pytorch/pytorch · GitHub
[go: up one dir, main page]

Skip to content

[cutlass backend] Reduce log level for cutlass runtime error #153457

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8000 Open
wants to merge 3 commits into
base: gh/henrylhtsang/84/base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion torch/_inductor/autotune_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ def make_run_fn(
workspace_ptr = c_void_p(self.workspace.data_ptr())

# Generate partial function.
return functools.partial(
ret = functools.partial(
run_method,
*args,
*self.extra_args,
Expand All @@ -727,6 +727,20 @@ def make_run_fn(
stream_ptr,
)

# sanity check to make sure we cleanup run fn properly
try:
ret()
except RuntimeError as e:
err_msg = str(e)

def raise_runtime_error():
raise RuntimeError(err_msg)

self.cleanup_run_fn()
return raise_runtime_error

return ret

def update_workspace_size(self) -> None:
if self._workspace_size_updated:
return
Expand Down
23 changes: 19 additions & 4 deletions torch/_inductor/select_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2265,16 +2265,31 @@ def benchmark_choices(
log.warning("Not yet implemented: %s", e)
timing = float("inf")
except RuntimeError as e:
from torch._inductor.codegen.cuda.cuda_kernel import CUDATemplateCaller

if not isinstance(choice, CUDATemplateCaller):
log.error(
"CUDA runtime error during autotuning: \n%s. \nIgnoring this choice.",
e,
)
msg = str(e)
if "invalid argument" in msg:
msg += "\n\nThis may mean this GPU is too small for max_autotune mode.\n\n"
else:
if "illegal memory access" in msg:
msg += "\n\nEither error in template or triton bug.\n"
log.error(
"Runtime error during autotuning: \n%s. \nIgnoring this choice.",
msg,
)

if isinstance(choice, CUDATemplateCaller):
log.debug(
"Runtime error during autotuning: \n%s. \nIgnoring this choice.",
msg,
exc_info=True,
)
else:
log.error(
"Runtime error during autotuning: \n%s. \nIgnoring this choice.",
msg,
)
timing = float("inf")
except AssertionError as e:
raise AssertionError( # noqa: B904
Expand Down
Loading
0