8000 ggml-vulkan: fix VULKAN_CHECK_RESULTS flag, which was previously brok… · TabbyML/llama.cpp@c2224f0 · GitHub
[go: up one dir, main page]

Skip to content

Commit c2224f0

Browse files
authored
ggml-vulkan: fix VULKAN_CHECK_RESULTS flag, which was previously broken (ggml-org#5813)
1 parent e743386 commit c2224f0

File tree

1 file changed

+18
-16
lines changed

1 file changed

+18
-16
lines changed

ggml-vulkan.cpp

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5428,7 +5428,8 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
54285428

54295429
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
54305430

5431-
ggml_vk_buffer_read(ctx, extra->buffer_gpu, extra->offset, tensor_data, tensor_size);
5431+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5432+
ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset, tensor_data, tensor_size);
54325433
}
54335434

54345435
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
@@ -5540,7 +5541,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
55405541
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
55415542
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
55425543
const int idx = i3*src0->ne[2] + i2;
5543-
ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
5544+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5545+
ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
55445546
}
55455547
}
55465548

@@ -5550,10 +5552,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
55505552
src0_clone->nb[i] = src0_clone->nb[i - 1]*src0_clone->ne[i - 1];
55515553
}
55525554
} else {
5553-
if (offset + src0_size >= extra->buffer_gpu->size) {
5554-
src0_size = extra->buffer_gpu->size - offset;
5555+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5556+
if (offset + src0_size >= buffer_gpu->size) {
5557+
src0_size = buffer_gpu->size - offset;
55555558
}
5556-
ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset, src0_clone->data, src0_size);
5559+
ggml_vk_buffer_read(ctx, buffer_gpu, offset, src0_clone->data, src0_size);
55575560
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
55585561
}
55595562
} else {
@@ -5583,7 +5586,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
55835586
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
55845587
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
55855588
const int idx = i3*src1->ne[2] + i2;
5586-
ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
5589+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5590+
ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
55875591
}
55885592
}
55895593

@@ -5593,10 +5597,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
55935597
src1_clone->nb[i] = src1_clone->nb[i - 1]*src1_clone->ne[i - 1];
55945598
}
55955599
} else {
5596-
if (offset + src1_size >= extra->buffer_gpu->size) {
5597-
src1_size = extra->buffer_gpu->size - offset;
5600+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5601+
if (offset + src1_size >= buffer_gpu->size) {
5602+
src1_size = buffer_gpu->size - offset;
55985603
}
5599-
ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset, src1_clone->data, src1_size);
5604+
ggml_vk_buffer_read(ctx, buffer_gpu, offset, src1_clone->data, src1_size);
56005605
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
56015606
}
56025607
} else {
@@ -5643,11 +5648,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
56435648
} else if (tensor->op == GGML_OP_RMS_NORM) {
56445649
tensor_clone = ggml_rms_norm(ggml_ctx, src0_clone, *(float *)tensor->op_params);
56455650
} else if (tensor->op == GGML_OP_SOFT_MAX) {
5646-
if (src1 != nullptr) {
5647-
tensor_clone = ggml_soft_max_ext(ggml_ctx, src0_clone, src1_clone, *(float *)tensor->op_params);
5648-
} else {
56495651
tensor_clone = ggml_soft_max(ggml_ctx, src0_clone);
5650-
}
56515652
} else if (tensor->op == GGML_OP_DIAG_MASK_INF) {
56525653
tensor_clone = ggml_diag_mask_inf(ggml_ctx, src0_clone, *(float *)tensor->op_params);
56535654
} else if (tensor->op == GGML_OP_ROPE) {
@@ -5753,11 +5754,12 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
57535754

57545755
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
57555756

5756-
if (extra->offset + tensor_size >= extra->buffer_gpu->size) {
5757-
tensor_size = extra->buffer_gpu->size - (extra->offset);
5757+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5758+
if (extra->offset + tensor_size >= buffer_gpu->size) {
5759+
tensor_size = buffer_gpu->size - (extra->offset);
57585760
}
57595761

5760-
ggml_vk_buffer_read(ctx, extra->buffer_gpu, extra->offset, tensor_data, tensor_size);
5762+
ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset, tensor_data, tensor_size);
57615763
}
57625764

57635765
float first_error_result = -1.0f;

0 commit comments

Comments
 (0)
0