@@ -5428,7 +5428,8 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
5428
5428
5429
5429
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra ;
5430
5430
5431
- ggml_vk_buffer_read (ctx, extra->buffer_gpu , extra->offset , tensor_data, tensor_size);
5431
+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5432
+ ggml_vk_buffer_read (ctx, buffer_gpu, extra->offset , tensor_data, tensor_size);
5432
5433
}
5433
5434
5434
5435
std::cerr << " TENSOR CHECK " << name << " (" << tensor->name << " ): " << ggml_op_name (tensor->op ) << std::endl;
@@ -5540,7 +5541,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
5540
5541
for (int i3 = 0 ; i3 < src0->ne [3 ]; i3++) {
5541
5542
for (int i2 = 0 ; i2 < src0->ne [2 ]; i2++) {
5542
5543
const int idx = i3*src0->ne [2 ] + i2;
5543
- ggml_vk_buffer_read (ctx, extra->buffer_gpu , offset + idx * src0->nb [2 ], ((char *)src0_clone->data + idx * src0_clone->nb [2 ]), src0->ne [1 ] * src0->nb [1 ]);
5544
+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5545
+ ggml_vk_buffer_read (ctx, buffer_gpu, offset + idx * src0->nb [2 ], ((char *)src0_clone->data + idx * src0_clone->nb [2 ]), src0->ne [1 ] * src0->nb [1 ]);
5544
5546
}
5545
5547
}
5546
5548
@@ -5550,10 +5552,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
5550
5552
src0_clone->nb [i] = src0_clone->nb [i - 1 ]*src0_clone->ne [i - 1 ];
5551
5553
}
5552
5554
} else {
5553
- if (offset + src0_size >= extra->buffer_gpu ->size ) {
5554
- src0_size = extra->buffer_gpu ->size - offset;
5555
+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5556
+ if (offset + src0_size >= buffer_gpu->size ) {
5557
+ src0_size = buffer_gpu->size - offset;
5555
5558
}
5556
- ggml_vk_buffer_read (ctx, extra-> buffer_gpu , offset, src0_clone->data , src0_size);
5559
+ ggml_vk_buffer_read (ctx, buffer_gpu, offset, src0_clone->data , src0_size);
5557
5560
memcpy (src0_clone->nb , src0->nb , sizeof (size_t ) * GGML_MAX_DIMS);
5558
5561
}
5559
5562
} else {
@@ -5583,7 +5586,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
5583
5586
for (int i3 = 0 ; i3 < src1->ne [3 ]; i3++) {
5584
5587
for (int i2 = 0 ; i2 < src1->ne [2 ]; i2++) {
5585
5588
const int idx = i3*src1->ne [2 ] + i2;
5586
- ggml_vk_buffer_read (ctx, extra->buffer_gpu , offset + idx * src1->nb [2 ], ((char *)src1_clone->data + idx * src1_clone->nb [2 ]), src1->ne [1 ] * src1->nb [1 ]);
5589
+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5590
+ ggml_vk_buffer_read (ctx, buffer_gpu, offset + idx * src1->nb [2 ], ((char *)src1_clone->data + idx * src1_clone->nb [2 ]), src1->ne [1 ] * src1->nb [1 ]);
5587
5591
}
5588
5592
}
5589
5593
@@ -5593,10 +5597,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
5593
5597
src1_clone->nb [i] = src1_clone->nb [i - 1 ]*src1_clone->ne [i - 1 ];
5594
5598
}
5595
5599
} else {
5596
- if (offset + src1_size >= extra->buffer_gpu ->size ) {
5597
- src1_size = extra->buffer_gpu ->size - offset;
5600
+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5601
+ if (offset + src1_size >= buffer_gpu->size ) {
5602
+ src1_size = buffer_gpu->size - offset;
5598
5603
}
5599
- ggml_vk_buffer_read (ctx, extra-> buffer_gpu , offset, src1_clone->data , src1_size);
5604
+ ggml_vk_buffer_read (ctx, buffer_gpu, offset, src1_clone->data , src1_size);
5600
5605
memcpy (src1_clone->nb , src1->nb , sizeof (size_t ) * GGML_MAX_DIMS);
5601
5606
}
5602
5607
} else {
@@ -5643,11 +5648,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
5643
5648
} else if (tensor->op == GGML_OP_RMS_NORM) {
5644
5649
tensor_clone = ggml_rms_norm (ggml_ctx, src0_clone, *(float *)tensor->op_params );
5645
5650
} else if (tensor->op == GGML_OP_SOFT_MAX) {
5646
- if (src1 != nullptr ) {
5647
- tensor_clone = ggml_soft_max_ext (ggml_ctx, src0_clone, src1_clone, *(float *)tensor->op_params );
5648
- } else {
5649
5651
tensor_clone = ggml_soft_max (ggml_ctx, src0_clone);
5650
- }
5651
5652
} else if (tensor->op == GGML_OP_DIAG_MASK_INF) {
5652
5653
tensor_clone = ggml_diag_mask_inf (ggml_ctx, src0_clone, *(float *)tensor->op_params );
5653
5654
} else if (tensor->op == GGML_OP_ROPE) {
@@ -5753,11 +5754,12 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
5753
5754
5754
5755
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra ;
5755
5756
5756
- if (extra->offset + tensor_size >= extra->buffer_gpu ->size ) {
5757
- tensor_size = extra->buffer_gpu ->size - (extra->offset );
5757
+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5758
+ if (extra->offset + tensor_size >= buffer_gpu->size ) {
5759
+ tensor_size = buffer_gpu->size - (extra->offset );
5758
5760
}
5759
5761
5760
- ggml_vk_buffer_read (ctx, extra-> buffer_gpu , extra->offset , tensor_data, tensor_size);
5762
+ ggml_vk_buffer_read (ctx, buffer_gpu, extra->offset , tensor_data, tensor_size);
5761
5763
}
5762
5764
5763
5765
float first_error_result = -1 .0f ;
0 commit comments