Add assert to make sure we only allocate temp buffer for non-CPU back…

…end tensor Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
ggml-org · JohannesGaessler · Jun 12, 2023 · Jun 10, 2023 · Jun 10, 2023 · Jun 12, 2023
commit 61726bd9421e2b9c2720cd1349d2d4119b151eaa
diff --git a/llama.cpp b/llama.cpp
@@ -762,6 +762,7 @@ struct llama_model_loader {
 
             // allocate temp buffer if not using mmap
             if (!use_mmap && lt.data == NULL) {
+                GGML_ASSERT(lt.ggml_tensor->backend != GGML_BACKEND_CPU);
                 lt.data = (uint8_t*)malloc(ggml_nbytes(lt.ggml_tensor));
             }