8000 kompute : llama-bench support and ggml_cpu_has_kompute() (#5226) · ggml-org/llama.cpp@e8dc55d · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e8dc55d

Browse files
authored
kompute : llama-bench support and ggml_cpu_has_kompute() (#5226)
1 parent e0085fd commit e8dc55d

File tree

5 files changed

+23
-10
lines changed

5 files changed

+23
-10
lines changed

common/common.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,6 +1521,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
15211521
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
15221522
fprintf(stream, "cpu_has_cublas: %s\n", ggml_cpu_has_cublas() ? "true" : "false");
15231523
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
1524+
fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false");
15241525
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
15251526
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");
15261527
fprintf(stream, "cpu_has_neon: %s\n", ggml_cpu_has_neon() ? "true" : "false");

examples/llama-bench/llama-bench.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ struct test {
563563
static const bool cuda;
564564
static const bool opencl;
565565
static const bool vulkan;
566+
static const bool kompute;
566567
static const bool metal;
567568
static const bool gpu_blas;
568569
static const bool blas;
@@ -647,6 +648,9 @@ struct test {
647648
if (vulkan) {
648649
return "Vulkan";
649650
}
651+
if (kompute) {
652+
return "Kompute";
653+
}
650654
if (metal) {
651655
return "Metal";
652656
}
@@ -662,7 +666,7 @@ struct test {
662666
static const std::vector<std::string> & get_fields() {
663667
static const std::vector<std::string> fields = {
664668
"build_commit", "build_number",
665-
"cuda", "opencl", "vulkan", "metal", "gpu_blas", "blas",
669+
"cuda", "opencl", "vulkan", "kompute", "metal", "gpu_blas", "blas",
666670
"cpu_info", "gpu_info",
667671
"model_filename", "model_type", "model_size", "model_n_params",
668672
"n_batch", "n_threads", "type_k", "type_v",
@@ -686,8 +690,9 @@ struct test {
686690
field == "avg_ns" || field == "stddev_ns") {
687691
return INT;
688692
}
689-
if (field == "cuda" || field == "opencl" || field == "vulkan"|| field == "metal" || field == "gpu_blas" || field == "blas" ||
690-
field == "f16_kv" || field == "no_kv_offload" || field == "mul_mat_q") {
693+
if (field == "cuda" || field == "opencl" || field == "vulkan" || field == "kompute" || field == "metal" ||
694+
field == "gpu_blas" || field == "blas" || field == "f16_kv" || field == "no_kv_offload" ||
695+
field == "mul_mat_q") {
691696
return BOOL;
692697
}
693698
if (field == "avg_ts" || field == "stddev_ts") {
@@ -714,7 +719,8 @@ struct test {
714719
}
715720
std::vector<std::string> values = {
716721
build_commit, std::to_string(build_number),
717-
std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
722+
std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(vulkan),
723+
std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
718724
cpu_info, gpu_info,
719725
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
720726
std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
@@ -743,6 +749,7 @@ const int test::build_number = LLAMA_BUILD_NUMBER;
743749
const bool test::cuda = !!ggml_cpu_has_cublas();
744750
const bool test::opencl = !!ggml_cpu_has_clblast();
745751
const bool test::vulkan = !!ggml_cpu_has_vulkan();
752+
const bool test::kompute = !!ggml_cpu_has_kompute();
746753
const bool test::metal = !!ggml_cpu_has_metal();
747754
const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
748755
const bool test::blas = !!ggml_cpu_has_blas();

ggml.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20473,6 +20473,14 @@ int ggml_cpu_has_vulkan(void) {
2047320473
#endif
2047420474
}
2047520475

20476+
int ggml_cpu_has_kompute(void) {
20477+
#if defined(GGML_USE_KOMPUTE)
20478+
return 1;
20479+
#else
20480+
return 0;
20481+
#endif
20482+
}
20483+
2047620484
int ggml_cpu_has_sycl(void) {
2047720485
#if defined(GGML_USE_SYCL)
2047820486
return 1;
@@ -20482,7 +20490,8 @@ int ggml_cpu_has_sycl(void) {
2048220490
}
2048320491

2048420492
int ggml_cpu_has_gpublas(void) {
20485-
return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_sycl();
20493+
return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() ||
20494+
ggml_cpu_has_sycl();
2048620495
}
2048720496

2048820497
int ggml_cpu_has_sse3(void) {

ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2266,6 +2266,7 @@ extern "C" {
22662266
GGML_API int ggml_cpu_has_cublas (void);
22672267
GGML_API int ggml_cpu_has_clblast (void);
22682268
GGML_API int ggml_cpu_has_vulkan (void);
2269+
GGML_API int ggml_cpu_has_kompute (void);
22692270
GGML_API int ggml_cpu_has_gpublas (void);
22702271
GGML_API int ggml_cpu_has_sse3 (void);
22712272
GGML_API int ggml_cpu_has_ssse3 (void);

llama.cpp

Lines changed: 0 additions & 5 deletions
7E0F
Original file line numberDiff line numberDiff line change
@@ -6878,11 +6878,6 @@ static int llama_decode_internal(
68786878
n_threads = std::min(4, n_threads);
68796879
}
68806880

6881-
const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 1;
6882-
if ((ggml_cpu_has_cublas() || ggml_cpu_has_vulkan()) && fully_offloaded) {
6883-
n_threads = 1;
6884-
}
6885-
68866881
#ifdef GGML_USE_MPI
68876882
const int64_t n_layer = hparams.n_layer;
68886883
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);

0 commit comments

Comments
 (0)
0