11 files changed
+11
-11
lines changed- .bazelrc-1
- .bazelversion-1
- .github/workflows/build.yml-68
- BUILD.bazel-416
- CMakeLists.txt+93-146
- CODE_OF_CONDUCT.md-76
- CONTRIBUTING.md-31
- README.md+3-117
- WORKSPACE.bazel-1
- cmake/DownloadGoogleBenchmark.cmake+2-2
- cmake/DownloadGoogleTest.cmake+2-2
- cmake/cpuinfo-config.cmake.in-12
- deps/clog/BUILD.bazel-58
- deps/clog/CMakeLists.txt+1-4
- deps/clog/src/clog.c+5-18
- include/cpuinfo.h+5-63
- libcpuinfo.pc.in-12
- scripts/local-build.sh-38
- src/arm/cache.c-61
- src/arm/linux/aarch32-isa.c+1-8
- src/arm/linux/aarch64-isa.c-18
- src/arm/linux/chipset.c+8-86
- src/arm/linux/clusters.c+10-10
- src/arm/linux/cpuinfo.c-8
- src/arm/linux/init.c+1-1
- src/arm/linux/midr.c+3-3
- src/arm/mach/init.c+101-118
- src/arm/midr.h+14-17
- src/arm/uarch.c-17
- src/arm/windows/init-by-logical-sys-info.c10000-885
- src/arm/windows/init.c-253
- src/arm/windows/windows-arm-init.h-32
- src/cpuinfo/internal-api.h+1-5
- src/init.c-2
- src/x86/name.c+1-1
- test/arm-cache.cc+4-4
- tools/cpu-info.c-16
- tools/isa-info.c+2-6
Submodule cudnn_frontend updated 28 files
- CMakeLists.txt+2-7
- Doxyfile+4-4
- README.md+1-4
- contrib/catch2/catch.hpp+267-438
- include/cudnn_frontend.h-3
- include/cudnn_frontend_ConvDesc.h+14-51
- include/cudnn_frontend_ExecutionPlan.h+11-46
- include/cudnn_frontend_ExecutionPlanCache.h-5
- include/cudnn_frontend_Filters.h-2
- include/cudnn_frontend_Heuristics.h-3
- include/cudnn_frontend_MatMulDesc.h+6-12
- include/cudnn_frontend_Operation.h+18-481
- include/cudnn_frontend_OperationGraph.h+1-2
- include/cudnn_frontend_PointWiseDesc.h+7-21
- include/cudnn_frontend_ReductionDesc.h+6-12
- include/cudnn_frontend_Resample.h-459
- include/cudnn_frontend_Tensor.h+4-63
- include/cudnn_frontend_utils.h-221
- samples/conv_sample.cpp+63-98
- samples/conv_sample.h+1-2
- samples/cpu_references.h-91
- samples/fp16_emu.cpp+7-21
- samples/fp16_emu.h+13-41
- samples/fusion_sample.cpp+205-1.2k
- samples/fusion_sample.h+2-68
- samples/helpers.h+6-21
- samples/mha_sample.cpp+34-34
- samples/test_list.cpp+6-504
Submodule gloo updated from 5b14351 to c22a5cf
- libkineto/CMakeLists.txt+4-3
- libkineto/include/ActivityType.h+1-1
- libkineto/include/Config.h+9-8
- libkineto/include/ILoggerObserver.h-2
- libkineto/include/libkineto.h+2-18
- libkineto/libkineto_defs.bzl+1-1
- libkineto/sample_programs/kineto_stress_test.cpp-179
- libkineto/sample_programs/random_ops_stress_test.cu-587
- libkineto/sample_programs/random_ops_stress_test.cuh-67
- libkineto/src/ActivityProfilerController.cpp+47-114
- libkineto/src/ActivityProfilerController.h+7-15
- libkineto/src/ActivityType.cpp+2-2
- libkineto/src/Config.cpp+4-5
- libkineto/src/ConfigLoader.h+5
- libkineto/src/CudaDeviceProperties.cpp-30
- libkineto/src/CudaDeviceProperties.h-5
- libkineto/src/CuptiActivity.h-1
- libkineto/src/CuptiActivity.tpp+8-90
- libkineto/src/CuptiActivityApi.cpp+35-4
- libkineto/src/CuptiActivityApi.h+1
- libkineto/src/CuptiActivityProfiler.cpp+105-165
- libkineto/src/CuptiActivityProfiler.h+22-74
- libkineto/src/CuptiNvPerfMetric.cpp+12-27
- libkineto/src/CuptiRangeProfiler.cpp+7-6
- libkineto/src/CuptiRangeProfilerApi.cpp+5-9
- libkineto/src/CuptiRangeProfilerApi.h+2-2
- libkineto/src/Logger.cpp-14
- libkineto/src/Logger.h-15
- libkineto/src/LoggerCollector.h-2
- libkineto/src/RoctracerActivityApi.cpp+5-5
- libkineto/src/RoctracerActivityApi.h-1
- libkineto/src/cuda_call.h-25
- libkineto/src/init.cpp+6-4
- libkineto/src/output_base.h+17
- libkineto/src/output_json.cpp+191-27
- libkineto/src/output_json.h+11-7
- libkineto/src/output_membuf.h+23
- libkineto/test/ConfigTest.cpp-6
- libkineto/test/CuptiActivityProfilerTest.cpp+5-1
- libkineto/test/MockActivitySubProfiler.cpp+2-4
- tb_plugin/README.md+1-1
- tb_plugin/fe/yarn.lock+40-60
- tb_plugin/torch_tb_profiler/profiler/trace.py+9-9
- ext-net/google-fastsocket/Makefile-22
- makefiles/common.mk+3-2
- makefiles/version.mk+1-1
- src/Makefile+9-10
- src/bootstrap.cc+252-156
- src/channel.cc+49-31
- src/collectives/device/Makefile+3-8
- src/collectives/device/all_gather.h+13-13
- src/collectives/device/all_reduce.h+74-153
- src/collectives/device/broadcast.h+15-15
- src/collectives/device/common.h+120-181
- src/collectives/device/common_kernel.h+31-51
- src/collectives/device/functions.cu+65-71
- src/collectives/device/gen_rules.sh+3-11
- src/collectives/device/onerank_reduce.cu-60
- src/collectives/device/primitives.h+2-2
- src/collectives/device/prims_ll.h+14-21
- src/collectives/device/prims_ll128.h+14-16
- src/collectives/device/prims_simple.h+84-230
- src/collectives/device/reduce.h+14-14
- src/collectives/device/reduce_kernel.h+53-100
- src/collectives/device/reduce_scatter.h+13-13
- src/collectives/device/sendrecv.h+72-73
- src/collectives/sendrecv.cc+4-4
- src/debug.cc+40-76
- src/enhcompat.cc-28
- src/enqueue.cc+778-1.3k
- src/graph/connect.cc+17-23
- src/graph/paths.cc+40-231
- src/graph/search.cc+161-238
- src/graph/topo.cc+49-92
- src/graph/topo.h+35-64
- src/graph/tuning.cc+25-46
- src/graph/xml.cc+14-8
- src/graph/xml.h+1-14
- src/group.cc+344-387
- src/include/alloc.h+18-120
- src/include/bootstrap.h+5-5
- src/include/channel.h-32
- src/include/checks.h+4-82
- src/include/coll_net.h+17-18
- src/include/collectives.h+54-86
- src/include/comm.h+56-266
- src/include/core.h-1
- src/include/cudawrap.h-88
- src/include/debug.h+8-12
- src/include/devcomm.h+74-130
- src/include/enqueue.h+54-8
- src/include/gdrwrap.h-1
- src/include/graph.h+5-12
- src/include/group.h+5-97
- src/include/ibvwrap.h+2-7
- src/include/info.h+9-71
- src/include/nccl_net.h+14-203
- src/include/net.h+58-25
- src/include/nvmlwrap.h+68-57
- src/include/p2p.h+15
- src/include/param.h+67-15
- src/include/profiler.h-37
- src/include/proxy.h+57-147
- src/include/shm.h+61-5
- src/include/socket.h+429-38
- src/include/strongstream.h-142
- src/include/timer.h-60
- src/include/transport.h+8-22
- src/include/utils.h+56-431
- src/init.cc+427-936
- src/misc/argcheck.cc+15-13
- src/misc/cudawrap.cc-154
- src/misc/gdrwrap.cc+17-12
- src/misc/ibvwrap.cc+24-55
- src/misc/nvmlwrap.cc+177-220
- src/misc/param.cc-81
- src/misc/profiler.cc-115
- src/misc/shmutils.cc-93
- src/misc/socket.cc-571
- src/misc/strongstream.cc-272
- src/misc/utils.cc+5-103
- src/nccl.h.in+6-91
- src/net.cc-352
- src/proxy.cc+354-998
- src/transport.cc+43-53
- src/transport/coll_net.cc+249-526
- src/transport/net.cc+275-889
- src/transport/net_ib.cc+217-582
- src/transport/net_socket.cc+68-151
- src/transport/p2p.cc+95-360
- src/transport/shm.cc+72-323
Submodule onnx updated 1761 files
0 commit comments