10000
We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ab77d76 commit 4640effCopy full SHA for 4640eff
llama.cpp
@@ -852,8 +852,11 @@ static bool llama_eval_internal(
852
};
853
854
struct ggml_context * ctx0 = ggml_init(params);
855
+
856
+ // for big prompts, if BLAS is enabled, it is better to use only one thread
857
+ // otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance
858
ggml_cgraph gf = {};
- gf.n_threads = n_threads;
859
+ gf.n_threads = N > 255 && ggml_cpu_has_blas() ? 1 : n_threads;
860
861
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
862
memcpy(embd->data, tokens, N*ggml_element_size(embd));
0 commit comments