8000 LaunchFastllmGemmFp32Int4NoZero优化 · ztxz16/fastllm@8278ef0 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8278ef0

Browse files
author
黄宇扬
committed
LaunchFastllmGemmFp32Int4NoZero优化
1 parent df48569 commit 8278ef0

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

src/devices/cuda/fastllm-cuda.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -2754,7 +2754,7 @@ bool FastllmCudaMatMulFloatInt4Group(const fastllm::Data &input, fastllm::Data &
27542754
}
27552755

27562756
void LaunchFastllmGemmFp32Int4NoZero(float *input, uint8_t *weight, float *output, float *bias, float *scales, float *mins, int n, int m, int k) {
2757-
for (int i = 0; i < n; i++) {
2757+
/* for (int i = 0; i < n; i++) {
27582758
FastllmGemvInt4NoZeroKernel1<64, 1> <<< k, 64 >>>(input + i * m, weight, output + i * k, bias, scales, mins, m, k);
27592759
}
27602760
return;

0 commit comments

Comments
 (0)
0