File tree Expand file tree Collapse file tree 2 files changed +26
-0
lines changed Expand file tree Collapse file tree 2 files changed +26
-0
lines changed Original file line number Diff line number Diff line change @@ -349,6 +349,8 @@ cmake_dependent_option(
349
349
"NOT INTERN_BUILD_MOBILE" OFF )
350
350
cmake_dependent_option (
351
351
BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF )
352
+ cmake_dependent_option (
353
+ BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler" OFF "USE_CUDA" OFF )
352
354
353
355
option (USE_MIMALLOC "Use mimalloc" OFF )
354
356
# Enable third party mimalloc library to improve memory allocation performance on Windows.
@@ -1230,3 +1232,12 @@ if(DEFINED USE_CUSTOM_DEBINFO)
1230
1232
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g" )
1231
1233
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -g" )
1232
1234
endif ()
1235
+
1236
+ # Bundle PTXAS if needed
1237
+ if (BUILD_BUNDLE_PTXAS AND USE_CUDA )
1238
+ if (NOT EXISTS "${PROJECT_SOURCE_DIR} /build/bin/ptxas" )
1239
+ message (STATUS "Copying PTXAS into the bin folder" )
1240
+ file (COPY "${CUDAToolkit_BIN_DIR} /ptxas" DESTINATION "${PROJECT_BINARY_DIR} " )
1241
+ endif ()
1242
+ install (PROGRAMS "${PROJECT_BINARY_DIR} /ptxas" DESTINATION "${CMAKE_INSTALL_BINDIR} " )
1243
+ endif ()
Original file line number Diff line number Diff line change @@ -2277,6 +2277,20 @@ def caching_device_properties():
2277
2277
device_interface .Worker .get_device_properties ()
2278
2278
2279
2279
2280
+ def _set_triton_ptxas_path () -> None :
2281
+ if os .environ .get ("TRITON_PTXAS_PATH" ) is not None :
2282
+ return
2283
+ ptxas_path = os .path .abspath (
2284
+ os .path .join (os .path .dirname (__file__ ), ".." , "bin" , "ptxas" )
2285
+ )
2286
+ if not os .path .exists (ptxas_path ):
2287
+ return
2288
+ if os .path .isfile (ptxas_path ) and os .access (ptxas_path , os .X_OK ):
2289
+ os .environ ["TRITON_PTXAS_PATH" ] = ptxas_path
2290
+ else :
2291
+ warnings .warn (f"{ ptxas_path } exists but is not an executable" )
2292
+
2293
+
2280
2294
def _worker_compile (
2281
2295
kernel_name : str , source_code : str , cc : int , device : torch .device
2282
2296
) -> None :
@@ -2287,6 +2301,7 @@ def _worker_compile(
2287
2301
2288
2302
2289
2303
def _load_kernel (kernel_name : str , source_code : str ) -> ModuleType :
2304
+ _set_triton_ptxas_path ()
2290
2305
kernel = TritonCodeCache .load (kernel_name , source_code )
2291
2306
kernel .precompile ()
2292
2307
return kernel
You can’t perform that action at this time.
0 commit comments