cuda : add TODO for calling cublas from kernel + using mem pool

ggml-org · ggerganov · Oct 24, 2023 · Oct 23, 2023 · Oct 23, 2023 · Oct 23, 2023
commit d798a17c34f2326093d0cf2c0ea90b8fded15dc6
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
@@ -7149,6 +7149,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
                 CUBLAS_GEMM_DEFAULT_TENSOR_OP));
     } else {
         // use cublasGemmBatchedEx
+        // TODO: https://github.com/ggerganov/llama.cpp/pull/3749#discussion_r1369997000
         const int ne23 = ne12*ne13;
 
         // TODO: avoid this alloc