8000 Merge branch 'master' into gg/flash-attn · ggml-org/llama.cpp@3d03bcb · GitHub
[go: up one dir, main page]

Skip to content

Commit 3d03bcb

Browse files
committed
Merge branch 'master' into gg/flash-attn
2 parents 78df552 + e0085fd commit 3d03bcb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+6348
-435
lines changed

.ecrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
"Exclude": ["^\\.gitmodules$"],
23
"Disable": {
34
"IndentSize": true
45
}

.github/workflows/build.yml

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ jobs:
337337
OPENCL_VERSION: 2023.04.17
338338
CLBLAST_VERSION: 1.6.0
339339
SDE_VERSION: 9.33.0-2024-01-07
340+
VULKAN_VERSION: 1.3.261.1
340341

341342
strategy:
342343
matrix:
@@ -353,6 +354,8 @@ jobs:
353354
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
354355
- build: 'openblas'
355356
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
357+
- build: 'kompute'
358+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
356359

357360
steps:
358361
- name: Clone
@@ -361,6 +364,12 @@ jobs:
361364
with:
362365
fetch-depth: 0
363366

367+
- name: Clone Kompute submodule
368+
id: clone_kompute
369+
if: ${{ matrix.build == 'kompute' }}
370+
run: |
371+
git submodule update --init kompute
372+
364373
- name: Download OpenCL SDK
365374
id: get_opencl
366375
if: ${{ matrix.build == 'clblast' }}
@@ -395,6 +404,15 @@ jobs:
395404
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
396405
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
397406
407+
- name: Install Vulkan SDK
408+
id: get_vulkan
409+
if: ${{ matrix.build == 'kompute' }}
410+
run: |
411+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
412+
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
413+
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
414+
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
415+
398416
- name: Build
399417
id: cmake_build
400418
run: |
@@ -432,7 +450,8 @@ jobs:
432450
433451
- name: Test
434452
id: cmake_test
435-
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
453+
# not all machines have native AVX-512
454+
if: ${{ matrix.build != 'clblast' && matrix.build != 'kompute' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }}
436455
run: |
437456
cd build
438457
ctest -L main -C Release --verbose --timeout 900

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "kompute"]
2+
path = kompute
3+
url = https://github.com/nomic-ai/kompute.git

CMakeLists.txt

Lines changed: 170 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ option(LLAMA_VULKAN "llama: use Vulkan"
103103
option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
104104
option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
105105
option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF)
106+
option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
106107
option(LLAMA_MPI "llama: use MPI" OFF)
107108
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
108109
option(LLAMA_SYCL "llama: use SYCL" OFF)
@@ -422,7 +423,13 @@ if (LLAMA_VULKAN)
422423
if (Vulkan_FOUND)
423424
message(STATUS "Vulkan found")
424425

426+
set(GGML_HEADERS_VULKAN ggml-vulkan.h)
427+
set(GGML_SOURCES_VULKAN ggml-vulkan.cpp)
428+
425429
add_library(ggml-vulkan STATIC ggml-vulkan.cpp ggml-vulkan.h)
430+
if (BUILD_SHARED_LIBS)
431+
set_target_properties(ggml-vulkan PROPERTIES POSITION_INDEPENDENT_CODE ON)
432+
endif()
426433
target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
427434

428435
add_compile_definitions(GGML_USE_VULKAN)
@@ -478,7 +485,6 @@ if (LLAMA_HIPBLAS)
478485
endif()
479486
endif()
480487

481-
482488
if (LLAMA_SYCL)
483489
if ( NOT DEFINED ENV{ONEAPI_ROOT})
484490
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
@@ -504,6 +510,160 @@ if (LLAMA_SYCL)
504510
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
505511
endif()
506512

513+
if (LLAMA_KOMPUTE)
514+
add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
515+
find_package(Vulkan COMPONENTS glslc REQUIRED)
516+
find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
517+
if (NOT glslc_executable)
518+
message(FATAL_ERROR "glslc not found")
519+
endif()
520+
521+
function(compile_shader)
522+
set(options)
523+
set(oneValueArgs)
524+
set(multiValueArgs SOURCES)
525+
cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
526+
foreach(source ${compile_shader_SOURCES})
527+
get_filename_component(filename ${source} NAME)
528+
set(spv_file ${filename}.spv)
529+
add_custom_command(
530+
OUTPUT ${spv_file}
531+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
532+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
533+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
534+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
535+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
536+
COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
537+
COMMENT "Compiling ${source} to ${spv_file}"
538+
)
539+
540+
get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
541+
set(FILE_NAME "shader${RAW_FILE_NAME}")
542+
string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
543+
string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
544+
string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
545+
set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
546+
message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
547+
if(CMAKE_GENERATOR MATCHES "Visual Studio")
548+
add_custom_command(
549+
OUTPUT ${OUTPUT_HEADER_FILE}
550+
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
551+
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
552+
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
553+
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
554+
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
555+
COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
556+
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
557+
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
558+
DEPENDS ${spv_file} xxd
559+
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
560+
)
561+
else()
562+
add_custom_command(
563+
OUTPUT ${OUTPUT_HEADER_FILE}
564+
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
565+
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
566+
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
567+
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
568+
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
569+
COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
570+
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
571+
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
572+
DEPENDS ${spv_file} xxd
573+
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
574+
)
575+
endif()
576+
endforeach()
577+
endfunction()
578+
579+
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
580+
message(STATUS "Kompute found")
581+
set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level")
582+
add_subdirectory(kompute)
583+
584+
# Compile our shaders
585+
compile_shader(SOURCES
586+
kompute-shaders/op_scale.comp
587+
kompute-shaders/op_scale_8.comp
588+
kompute-shaders/op_add.comp
589+
kompute-shaders/op_addrow.comp
590+
kompute-shaders/op_mul.comp
591+
kompute-shaders/op_silu.comp
592+
kompute-shaders/op_relu.comp
593+
kompute-shaders/op_gelu.comp
594+
kompute-shaders/op_softmax.comp
595+
kompute-shaders/op_norm.comp
596+
kompute-shaders/op_rmsnorm.comp
597+
kompute-shaders/op_diagmask.comp
598+
kompute-shaders/op_mul_mat_mat_f32.comp
599+
kompute-shaders/op_mul_mat_f16.comp
600+
kompute-shaders/op_mul_mat_q8_0.comp
601+
kompute-shaders/op_mul_mat_q4_0.comp
602+
kompute-shaders/op_mul_mat_q4_1.comp
603+
kompute-shaders/op_mul_mat_q6_k.comp
604+
kompute-shaders/op_getrows_f16.comp
605+
kompute-shaders/op_getrows_q4_0.comp
606+
kompute-shaders/op_getrows_q4_1.comp
607+
kompute-shaders/op_getrows_q6_k.comp
608+
kompute-shaders/op_rope_f16.comp
609+
kompute-shaders/op_rope_f32.comp
610+
kompute-shaders/op_cpy_f16_f16.comp
611+
kompute-shaders/op_cpy_f16_f32.comp
612+
kompute-shaders/op_cpy_f32_f16.comp
613+
kompute-shaders/op_cpy_f32_f32.comp
614+
)
615+
616+
# Create a custom target for our generated shaders
617+
add_custom_target(generated_shaders DEPENDS
618+
shaderop_scale.h
619+
shaderop_scale_8.h
620+
shaderop_add.h
621+
shaderop_addrow.h
622+
shaderop_mul.h
623+
shaderop_silu.h
624+
shaderop_relu.h
625+
shaderop_gelu.h
626+
shaderop_softmax.h
627+
shaderop_norm.h
628+
shaderop_rmsnorm.h
629+
shaderop_diagmask.h
630+
shaderop_mul_mat_mat_f32.h
631+
shaderop_mul_mat_f16.h
632+
shaderop_mul_mat_q8_0.h
633+
shaderop_mul_mat_q4_0.h
634+
shaderop_mul_mat_q4_1.h
635+
shaderop_mul_mat_q6_k.h
636+
shaderop_getrows_f16.h
637+
shaderop_getrows_q4_0.h
638+
shaderop_getrows_q4_1.h
639+
shaderop_getrows_q6_k.h
640+
shaderop_rope_f16.h
641+
shaderop_rope_f32.h
642+
shaderop_cpy_f16_f16.h
643+
shaderop_cpy_f16_f32.h
644+
shaderop_cpy_f32_f16.h
645+
shaderop_cpy_f32_f32.h
646+
)
647+
648+
# Create a custom command that depends on the generated_shaders
649+
add_custom_command(
650+
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
651+
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
652+
DEPENDS generated_shaders
653+
COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp"
654+
)
655+
656+
# Add the stamp to the main sources to ensure dependency tracking
657+
set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
658+
set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
659+
add_compile_definitions(GGML_USE_KOMPUTE)
660+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
661+
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
662+
else()
663+
message(WARNING "Kompute not found")
664+
endif()
665+
endif()
666+
507667
function(get_flags CCID CCVER)
508668
set(C_FLAGS "")
509669
set(CXX_FLAGS "")
@@ -846,12 +1006,14 @@ add_library(ggml OBJECT
8461006
ggml-backend.h
8471007
ggml-quants.c
8481008
ggml-quants.h
849-
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
850-
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
851-
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
852-
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
853-
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
854-
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1009+
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1010+
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1011+
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1012+
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1013+
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}< 10000 /span>
1014+
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1015+
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1016+
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
8551017
)
8561018
8571019
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
@@ -928,7 +1090,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
9281090
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
9291091
9301092
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
931-
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
1093+
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}" "${GGML_HEADERS_VULKAN}"
9321094
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}")
9331095
9341096
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")

README.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
1010

1111
### Hot topics
1212

13-
- ⚠️ Incoming backends: https://github.com/ggerganov/llama.cpp/discussions/5138
1413
- New SOTA quantized models, including pure 2-bits: https://huggingface.co/ikawrakow
1514
- Collecting Apple Silicon performance stats:
1615
- M-series: https://github.com/ggerganov/llama.cpp/discussions/4167
@@ -291,7 +290,7 @@ In order to build llama.cpp you have three different options.
291290
sudo pkg install gmake automake autoconf pkgconf llvm15 clinfo clover \
292291
opencl clblast openblas
293292

294-
gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j4
293+
gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j4
295294
```
296295

297296
**Notes:** With this packages you can build llama.cpp with OPENBLAS and
@@ -614,18 +613,18 @@ Building the program with BLAS support may lead to some performance improvements
614613
# obtain the original LLaMA model weights and place them in ./models
615614
ls ./models
616615
65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model
617-
97AE # [Optional] for models using BPE tokenizers
618-
ls ./models
619-
65B 30B 13B 7B vocab.json
616+
# [Optional] for models using BPE tokenizers
617+
ls ./models
618+
65B 30B 13B 7B vocab.json
620619

621620
# install Python dependencies
622621
python3 -m pip install -r requirements.txt
623622

624623
# convert the 7B model to ggml FP16 format
625624
python3 convert.py models/7B/
626625

627-
# [Optional] for models using BPE tokenizers
628-
python convert.py models/7B/ --vocabtype bpe
626+
# [Optional] for models using BPE tokenizers
627+
python convert.py models/7B/ --vocabtype bpe
629628

630629
# quantize the model to 4-bits (using q4_0 method)
631630
./quantize ./models/7B/ggml-model-f16.gguf ./models/7B/ggml-model-q4_0.gguf q4_0

convert.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,10 @@ def load(model_plus: ModelPlus) -> Params:
334334
class BpeVocab:
335335
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> 741A None:
336336
self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read())
337-
self.vocab = self.bpe_tokenizer["model"]["vocab"]
337+
try:
338+
self.vocab = self.bpe_tokenizer["model"]["vocab"]
339+
except KeyError:
340+
self.vocab = self.bpe_tokenizer
338341
added_tokens: dict[str, int]
339342
if fname_added_tokens is not None:
340343
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.

examples/main/main.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,17 @@ static std::ostringstream * g_output_ss;
3939
static std::vector<llama_token> * g_output_tokens;
4040
static bool is_interacting = false;
4141

42+
static bool file_exists(const std::string &path) {
43+
std::ifstream f(path.c_str());
44+
return f.good();
45+
}
46+
47+
static bool file_is_empty(const std::string &path) {
48+
std::ifstream f;
49+
f.exceptions(std::ifstream::failbit | std::ifstream::badbit);
50+
f.open(path.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
51+
return f.tellg() == 0;
52+
}
4253

4354
static void write_logfile(
4455
const llama_context * ctx, const gpt_params & params, const llama_model * model,
@@ -215,12 +226,12 @@ int main(int argc, char ** argv) {
215226

216227
if (!path_session.empty()) {
217228
LOG_TEE("%s: attempting to load saved session from '%s'\n", __func__, path_session.c_str());
218-
219-
// fopen to check for existing session
220-
FILE * fp = std::fopen(path_session.c_str(), "rb");
221-
if (fp != NULL) {
222-
std::fclose(fp);
223-
229+
if (!file_exists(path_session)) {
230+
LOG_TEE("%s: session file does not exist, will create.\n", __func__);
231+
} else if (file_is_empty(path_session)) {
232+
LOG_TEE("%s: The session file is empty. A new session will be initialized.\n", __func__);
233+
} else {
234+
// The file exists and is not empty
224235
session_tokens.resize(n_ctx);
225236
size_t n_token_count_out = 0;
226237
if (!llama_load_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) {
@@ -229,10 +240,7 @@ int main(int argc, char ** argv) {
229240
}
230241
session_tokens.resize(n_token_count_out);
231242
llama_set_rng_seed(ctx, params.seed);
232-
233-
LOG_TEE("%s: loaded a session with prompt size of %d tokens\n", __func__, (int) session_tokens.size());
234-
} else {
235-
LOG_TEE("%s: session file does not exist, will create\n", __func__);
243+
LOG_TEE("%s: loaded a session with prompt size of %d tokens\n", __func__, (int)session_tokens.size());
236244
}
237245
}
238246

examples/quantize-stats/quantize-stats.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,8 @@ int main(int argc, char ** argv) {
378378
printf("testing %s ...\n", ggml_type_name(type));
379379
}
380380

381+
ggml_quantize_init(type);
382+
381383
error_stats global_stats {};
382384

383385
for (const auto& kv_tensor : tensors) {

0 commit comments

Comments
 (0)
0