Merge 'origin/master' into hipblas

ggml-org · SlyEcho · Aug 25, 2023 · Apr 19, 2023 · Apr 20, 2023 · Apr 20, 2023
commit db7a01297e691caaf670a3afd197d2802af78d67
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
@@ -1,11 +1,11 @@
 #include <stdint.h>
-#if defined(__HIP_PLATFORM_AMD__)
-#include "hip/hip_runtime.h"
-#define cudaStream_t hipStream_t
-#define __half _Float16
+#include <stdio.h>
+#if defined(GGML_USE_HIPBLAS)
+#include "hip/hip_fp16.h"
 #else
 #include <cuda_fp16.h>
 #endif
+#include <atomic>
 #include "ggml-cuda.h"
 
 typedef uint16_t ggml_fp16_t;

diff --git a/ggml-cuda.h b/ggml-cuda.h
@@ -1,5 +1,37 @@
+#if defined(GGML_USE_HIPBLAS)
+#include "hipblas/hipblas.h"
+#include "hip/hip_runtime.h"
+#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
+#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
+#define CUBLAS_OP_N HIPBLAS_OP_N
+#define CUBLAS_OP_T HIPBLAS_OP_T
+#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
+#define cublasCreate hipblasCreate
+#define cublasGemmEx hipblasGemmEx
+#define cublasHandle_t hipblasHandle_t
+#define cublasSetStream hipblasSetStream
+#define cublasSgemm hipblasSgemm
+#define cublasStatus_t hipblasStatus_t
+#define CUDA_R_16F  HIPBLAS_R_16F
+#define CUDA_R_32F  HIPBLAS_R_32F
+#define cudaError_t hipError_t
+#define cudaFree hipFree
+#define cudaGetErrorString hipGetErrorString
+#define cudaGetLastError hipGetLastError
+#define cudaMalloc hipMalloc
+#define cudaMemcpyAsync hipMemcpyAsync
+#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
+#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
+#define cudaStream_t hipStream_t
+#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
+#define cudaStreamNonBlocking hipStreamNonBlocking
+#define cudaStreamSynchronize hipStreamSynchronize
+#define cudaSuccess hipSuccess
+#define GGML_USE_CUBLAS
+#else
 #include <cublas_v2.h>
 #include <cuda_runtime.h>
+#endif
 
 #ifdef  __cplusplus
 extern "C" {

diff --git a/ggml.c b/ggml.c
@@ -147,41 +147,7 @@ inline static void* ggml_aligned_malloc(size_t size) {
 #include <Accelerate/Accelerate.h>
 #elif defined(GGML_USE_OPENBLAS)
 #include <cblas.h>
-#elif defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
-
-#if defined(GGML_USE_HIPBLAS)
-#include "hipblas/hipblas.h"
-#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
-#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
-#define CUBLAS_OP_N HIPBLAS_OP_N
-#define CUBLAS_OP_T HIPBLAS_OP_T
-#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
-#define cublasCreate hipblasCreate
-#define cublasGemmEx hipblasGemmEx
-#define cublasHandle_t hipblasHandle_t
-#define cublasSetStream hipblasSetStream
-#define cublasSgemm hipblasSgemm
-#define cublasStatus_t hipblasStatus_t
-#define CUDA_R_16F  HIPBLAS_R_16F
-#define CUDA_R_32F  HIPBLAS_R_32F
-#define cudaError_t hipError_t
-#define cudaFree hipFree
-#define cudaGetErrorString hipGetErrorString
-#define cudaGetLastError hipGetLastError
-#define cudaMalloc hipMalloc
-#define cudaMemcpyAsync hipMemcpyAsync
-#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
-#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
-#define cudaStream_t hipStream_t
-#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
-#define cudaStreamNonBlocking hipStreamNonBlocking
-#define cudaStreamSynchronize hipStreamSynchronize
-#define cudaSuccess hipSuccess
-#define GGML_USE_CUBLAS
-#else
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#endif
+#elif defined(GGML_USE_CUBLAS) | defined(GGML_USE_HIPBLAS)
 #include "ggml-cuda.h"
 #endif