From 9ad2e7dfd2215cb89c80b8e91a1514b3e8353391 Mon Sep 17 00:00:00 2001 From: Nikita Sarychev <42014488+sARY77@users.noreply.github.com> Date: Sat, 4 Jan 2025 16:38:35 -0800 Subject: [PATCH 1/5] Remove obsolete HIP workaround --- ggml/src/ggml-cuda/ggml-cuda.cu | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index c180adc84b861..feafb93a84e3b 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -119,13 +119,6 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) } static ggml_cuda_device_info ggml_cuda_init() { -#ifdef __HIP_PLATFORM_AMD__ - // Workaround for a rocBLAS bug when using multiple graphics cards: - // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346 - rocblas_initialize(); - CUDA_CHECK(cudaDeviceSynchronize()); -#endif - ggml_cuda_device_info info = {}; cudaError_t err = cudaGetDeviceCount(&info.device_count); From 7aba1f9cf3f0f2b6f6bc8c6a9fda69c5b1b45b97 Mon Sep 17 00:00:00 2001 From: Nikita Sarychev <42014488+sARY77@users.noreply.github.com> Date: Sun, 5 Jan 2025 19:51:00 -0800 Subject: [PATCH 2/5] Remove more references to rocBLAS --- .devops/nix/package.nix | 1 - .devops/rocm.Dockerfile | 2 +- .github/workflows/build.yml | 5 +---- Makefile | 2 +- cmake/llama-config.cmake.in | 3 +-- ggml/src/ggml-cuda/vendors/hip.h | 4 ---- ggml/src/ggml-hip/CMakeLists.txt | 3 +-- 7 files changed, 5 insertions(+), 15 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 043c4364b956a..8558ef9d813a5 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -93,7 +93,6 @@ let rocmBuildInputs = with rocmPackages; [ clr hipblas - rocblas ]; vulkanBuildInputs = [ diff --git a/.devops/rocm.Dockerfile b/.devops/rocm.Dockerfile index a8088ea00da5b..2be58f3d6e11b 100644 --- a/.devops/rocm.Dockerfile +++ b/.devops/rocm.Dockerfile @@ -12,7 +12,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build # Unless otherwise specified, we make a fat build. # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 -# This is mostly tied to rocBLAS supported archs. +# This is mostly tied to HIP supported archs. # gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported # gfx906 is deprecated #check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 602cf5220e483..2128eab4fb200 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -344,7 +344,7 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev + sudo apt-get install -y build-essential git cmake hipblas-dev - name: Build with native CMake HIP support id: cmake_build @@ -1105,10 +1105,7 @@ jobs: $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON cmake --build build -j ${env:NUMBER_OF_PROCESSORS} - md "build\bin\rocblas\library\" cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" - cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\" - cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\" - name: Determine tag name id: tag diff --git a/Makefile b/Makefile index 19ae0d5f1c87b..435241ea886c5 100644 --- a/Makefile +++ b/Makefile @@ -781,7 +781,7 @@ endif # GGML_HIP_UMA MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 - MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas + MK_LDFLAGS += -lhipblas -lamdhip64 HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc diff --git a/cmake/llama-config.cmake.in b/cmake/llama-config.cmake.in index 5c55bc6b822a6..72b263ed60f55 100644 --- a/cmake/llama-config.cmake.in +++ b/cmake/llama-config.cmake.in @@ -140,8 +140,7 @@ if (NOT LLAMA_SHARED_LIB) if (GGML_HIP) find_package(hip REQUIRED) find_package(hipblas REQUIRED) - find_package(rocblas REQUIRED) - list(APPEND _llama_link_deps hip::host roc::rocblas roc::hipblas) + list(APPEND _llama_link_deps hip::host roc::hipblas) endif() if (GGML_SYCL) diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h index 3205534d66f10..5a5d1eeb16ae6 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -3,10 +3,6 @@ #include #include #include -#ifdef __HIP_PLATFORM_AMD__ -// for rocblas_initialize() -#include "rocblas/rocblas.h" -#endif // __HIP_PLATFORM_AMD__ #define CUBLAS_COMPUTE_16F HIPBLAS_R_16F #define CUBLAS_COMPUTE_32F HIPBLAS_R_32F #define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index b15fbd24d6b36..736c0049cf9b0 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -38,7 +38,6 @@ endif() find_package(hip REQUIRED) find_package(hipblas REQUIRED) -find_package(rocblas REQUIRED) message(STATUS "HIP and hipBLAS found") @@ -101,4 +100,4 @@ if (GGML_STATIC) message(FATAL_ERROR "Static linking not supported for HIP/ROCm") endif() -target_link_libraries(ggml-hip PRIVATE ggml-base hip::host roc::rocblas roc::hipblas) +target_link_libraries(ggml-hip PRIVATE ggml-base hip::host roc::hipblas) From cbf779c45032b8e67827fa8333b72b6b0267c8d1 Mon Sep 17 00:00:00 2001 From: Nikita Sarychev <42014488+sARY77@users.noreply.github.com> Date: Sat, 25 Jan 2025 17:17:21 -0800 Subject: [PATCH 3/5] Temporarily add logging of free device memory at the end of main --- examples/main/main.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index da2a03ab9ba10..e96f827f7f3c7 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -3,6 +3,10 @@ #include "console.h" #include "log.h" #include "sampling.h" +// vvv REMOVE BEFORE MERGING +#include "llama-model.h" +#include "llama-impl.h" +// ^^^ REMOVE BEFORE MERGING #include "llama.h" #include "chat-template.hpp" @@ -912,6 +916,13 @@ int main(int argc, char ** argv) { } LOG("\n\n"); + // vvv REMOVE BEFORE MERGING + for (auto * dev : model->devices) { + size_t free, total; // NOLINT + ggml_backend_dev_memory(dev, &free, &total); + LLAMA_LOG_INFO("%s: using device %s (%s) - %zu MiB free\n", __func__, ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), free/1024/1024); + } + // ^^^ REMOVE BEFORE MERGING common_perf_print(ctx, smpl); common_sampler_free(smpl); From bb37819954c472297673ce834513890aad024e55 Mon Sep 17 00:00:00 2001 From: Nikita Sarychev <42014488+sARY77@users.noreply.github.com> Date: Sun, 26 Jan 2025 14:44:04 -0800 Subject: [PATCH 4/5] Address PR feedback --- .devops/nix/package.nix | 1 + .devops/rocm.Dockerfile | 2 +- .github/workflows/build.yml | 5 ++++- Makefile | 2 +- examples/main/main.cpp | 11 ----------- ggml/src/ggml-cuda/ggml-cuda.cu | 14 ++++++++++++++ ggml/src/ggml-cuda/vendors/hip.h | 4 ++++ ggml/src/ggml-hip/CMakeLists.txt | 3 ++- 8 files changed, 27 insertions(+), 15 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 8558ef9d813a5..043c4364b956a 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -93,6 +93,7 @@ let rocmBuildInputs = with rocmPackages; [ clr hipblas + rocblas ]; vulkanBuildInputs = [ diff --git a/.devops/rocm.Dockerfile b/.devops/rocm.Dockerfile index 2be58f3d6e11b..a8088ea00da5b 100644 --- a/.devops/rocm.Dockerfile +++ b/.devops/rocm.Dockerfile @@ -12,7 +12,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build # Unless otherwise specified, we make a fat build. # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 -# This is mostly tied to HIP supported archs. +# This is mostly tied to rocBLAS supported archs. # gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported # gfx906 is deprecated #check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7dd4111aef0ba..cd8422f8a266c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -361,7 +361,7 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install -y build-essential git cmake hipblas-dev + sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev - name: Build with native CMake HIP support id: cmake_build @@ -1125,7 +1125,10 @@ jobs: -DGGML_HIP=ON ` -DGGML_RPC=ON cmake --build build -j ${env:NUMBER_OF_PROCESSORS} + md "build\bin\rocblas\library\" cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" + cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\" + cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\" - name: Determine tag name id: tag diff --git a/Makefile b/Makefile index 1c9b92bf3832c..295522ba356b4 100644 --- a/Makefile +++ b/Makefile @@ -781,7 +781,7 @@ endif # GGML_HIP_UMA MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 - MK_LDFLAGS += -lhipblas -lamdhip64 + MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc diff --git a/examples/main/main.cpp b/examples/main/main.cpp index e96f827f7f3c7..da2a03ab9ba10 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -3,10 +3,6 @@ #include "console.h" #include "log.h" #include "sampling.h" -// vvv REMOVE BEFORE MERGING -#include "llama-model.h" -#include "llama-impl.h" -// ^^^ REMOVE BEFORE MERGING #include "llama.h" #include "chat-template.hpp" @@ -916,13 +912,6 @@ int main(int argc, char ** argv) { } LOG("\n\n"); - // vvv REMOVE BEFORE MERGING - for (auto * dev : model->devices) { - size_t free, total; // NOLINT - ggml_backend_dev_memory(dev, &free, &total); - LLAMA_LOG_INFO("%s: using device %s (%s) - %zu MiB free\n", __func__, ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), free/1024/1024); - } - // ^^^ REMOVE BEFORE MERGING common_perf_print(ctx, smpl); common_sampler_free(smpl); diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 29d0177c8bc7c..c895381ecae11 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -120,6 +120,20 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) } static ggml_cuda_device_info ggml_cuda_init() { +#ifdef __HIP_PLATFORM_AMD__ + // Workaround for a rocBLAS bug when using multiple graphics cards: + // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346 + { + char version_string[64]; + version_string[0] = '\0'; + const rocblas_status status = rocblas_get_version_string(version_string, sizeof(version_string)); + if (status != rocblas_status_success || version_string[0] < '4') { + rocblas_initialize(); + CUDA_CHECK(cudaDeviceSynchronize()); + } + } +#endif + ggml_cuda_device_info info = {}; cudaError_t err = cudaGetDeviceCount(&info.device_count); diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h index ef7cd58208b86..8594093f052ef 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -4,6 +4,10 @@ #include #include #include +#ifdef __HIP_PLATFORM_AMD__ +// for rocblas_initialize() +#include "rocblas/rocblas.h" +#endif // __HIP_PLATFORM_AMD__ #define CUBLAS_COMPUTE_16F HIPBLAS_R_16F #define CUBLAS_COMPUTE_32F HIPBLAS_R_32F #define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index 46434f79091c8..ecc3bc66d44c0 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -38,6 +38,7 @@ endif() find_package(hip REQUIRED) find_package(hipblas REQUIRED) +find_package(rocblas REQUIRED) message(STATUS "HIP and hipBLAS found") @@ -110,4 +111,4 @@ if (GGML_STATIC) message(FATAL_ERROR "Static linking not supported for HIP/ROCm") endif() -target_link_libraries(ggml-hip PRIVATE ggml-base hip::host roc::hipblas) +target_link_libraries(ggml-hip PRIVATE ggml-base hip::host roc::rocblas roc::hipblas) From 61d341f8180071177595de39bf9bb23edc85c75b Mon Sep 17 00:00:00 2001 From: Nikita Sarychev <42014488+sARY77@users.noreply.github.com> Date: Mon, 27 Jan 2025 21:16:54 -0800 Subject: [PATCH 5/5] Address code review feedback --- ggml/src/ggml-cuda/ggml-cuda.cu | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index c895381ecae11..a0d6a54969c3d 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -124,10 +125,20 @@ static ggml_cuda_device_info ggml_cuda_init() { // Workaround for a rocBLAS bug when using multiple graphics cards: // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346 { - char version_string[64]; - version_string[0] = '\0'; - const rocblas_status status = rocblas_get_version_string(version_string, sizeof(version_string)); - if (status != rocblas_status_success || version_string[0] < '4') { + int major_version = 0; + size_t version_length = 0; + if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) { + std::string version(version_length, '\0'); + if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) { + version.resize(::strlen(version.c_str())); + int parsed_value = 0; + if (std::from_chars(version.c_str(), version.c_str() + version.length(), parsed_value).ec == std::errc()) { + major_version = parsed_value; + } + } + } + if (major_version < 4) { + GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n"); rocblas_initialize(); CUDA_CHECK(cudaDeviceSynchronize()); }