@@ -2076,14 +2076,15 @@ def precompile_with_captured_stdout(choice) -> tuple[None, int]:
2076
2076
return None , elapsed_ns // 1000
2077
2077
2078
2078
def on_complete (future ):
2079
- _ , precompile_elapsed_us = future .result ()
2080
- elapsed_seconds = precompile_elapsed_us / 1e6
2081
- elapsed_times [future ] = elapsed_seconds
2082
- log .debug (
2083
- "Precompilation complete for future: %s, elapsed time: %.02fs" ,
2084
- future ,
2085
- elapsed_seconds ,
2086
- )
2079
+ if not future .exception ():
2080
+ _ , precompile_elapsed_us = future .result ()
2081
+ elapsed_seconds = precompile_elapsed_us / 1e6
2082
+ elapsed_times [future ] = elapsed_seconds
2083
+ log .debug (
2084
+ "Precompilation complete for future: %s, elapsed time: %.02fs" ,
2085
+ future ,
2086
+ elapsed_seconds ,
2087
+ )
2087
2088
2088
2089
executor = ThreadPoolExecutor (max_workers = num_workers )
2089
2090
async_compile = torch ._inductor .async_compile .AsyncCompile ()
@@ -2130,9 +2131,23 @@ def wait_on_futures():
2130
2131
timeout = precompilation_timeout_seconds ,
2131
2132
):
2132
2133
if e := future .exception ():
2133
- log . error (
2134
- "Exception %s for benchmark choice %s" , e , futures [ future ]
2134
+ from torch . _inductor . codegen . cuda . cuda_kernel import (
2135
+ CUDATemplateCaller ,
2135
2136
)
2137
+
2138
+ if isinstance (e , CUDACompileError ) and isinstance (
2139
+ futures [future ], CUDATemplateCaller
2140
+ ):
2141
+ log .debug (
2142
+ "Exception %s for benchmark choice %s" ,
2143
+ e ,
2144
+ futures [future ],
2145
+ exc_info = True ,
2146
+ )
2147
+ else :
2148
+ log .error (
2149
+ "Exception %s for benchmark choice %s" , e , futures [future ]
2150
+ )
2136
2151
else :
2137
2152
counters ["inductor" ]["select_algorithm_num_precompiles" ] += 1
2138
2153
log .info (
@@ -2238,10 +2253,13 @@ def benchmark_choices(
2238
2253
try :
2239
2254
timing = cls .benchmark_choice (choice , autotune_args )
2240
2255
except CUDACompileError as e :
2241
- log .error (
2242
- "CUDA compilation error during autotuning: \n %s. \n Ignoring this choice." ,
2243
- str (e ),
2244
- )
2256
+ from torch ._inductor .codegen .cuda .cuda_kernel import CUDATemplateCaller
2257
+
2258
+ if not isinstance (choice , CUDATemplateCaller ):
2259
+ log .error (
2260
+ "CUDA compilation error during autotuning: \n %s. \n Ignoring this choice." ,
2261
+ e ,
2262
+ )
2245
2263
timing = float ("inf" )
2246
2264
except NotImplementedError as e :
2247
2265
log .warning ("Not yet implemented: %s" , e )
0 commit comments