8000 mtmd : move helpers to dedicated library (⚠️ breaking change) (#13866) · ggml-org/llama.cpp@1096133 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1096133

Browse files
authored
mtmd : move helpers to dedicated library (⚠️ breaking change) (#13866)
* mtmd : move helpers to dedicated library * fix server build * rm leftover cmakelist code
1 parent d98f2a3 commit 1096133

File tree

15 files changed

+277
-269
lines changed

15 files changed

+277
-269
lines changed

.editorconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,6 @@ charset = unset
4949
trim_trailing_whitespace = unset
5050
insert_final_newline = unset
5151

52-
[tools/mtmd/miniaudio.h]
52+
[tools/mtmd/vendor/miniaudio.h]
5353
trim_trailing_whitespace = unset
5454
insert_final_newline = unset

tools/mtmd/CMakeLists.txt

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,54 @@
11
# mtmd
22

3-
# compile mtmd-audio separately to avoid long compile times with miniaudio.h
4-
# TODO @ngxson : move miniaudio.h and stb_image.h to mtmd-helper.cpp, then compile the helper as a separate library
5-
add_library(mtmd_audio STATIC mtmd-audio.cpp mtmd-audio.h)
6-
if (BUILD_SHARED_LIBS)
7-
set_target_properties(mtmd_audio PROPERTIES POSITION_INDEPENDENT_CODE ON)
8-
endif()
9-
target_link_libraries(mtmd_audio PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
10-
target_compile_features(mtmd_audio PRIVATE cxx_std_17)
11-
target_include_directories(mtmd_audio PRIVATE .)
12-
133
add_library(mtmd OBJECT
144
mtmd.cpp
15-
mtmd-helper.cpp
5+
mtmd-audio.cpp
166
mtmd.h
177
clip.cpp
188
clip.h
199
clip-impl.h
2010
)
2111

22-
target_link_libraries(mtmd PRIVATE ggml llama mtmd_audio ${CMAKE_THREAD_LIBS_INIT})
23-
12+
target_link_libraries(mtmd PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
2413
target_include_directories(mtmd PUBLIC .)
2514
target_include_directories(mtmd PRIVATE ../..)
26-
target_include_directories(mtmd PRIVATE ../../common) # for stb_image.h
27-
2815
target_compile_features(mtmd PRIVATE cxx_std_17)
2916

30-
add_library(mtmd_static STATIC $<TARGET_OBJECTS:mtmd>)
17+
# compile the helper separately, to avoid long compile times with miniaudio.h and stb_image.h
18+
19+
add_library(mtmd_helper OBJECT
20+
mtmd-helper.cpp
21+
mtmd-helper.h
22+
)
23+
24+
target_link_libraries(mtmd_helper PRIVATE ggml llama mtmd ${CMAKE_THREAD_LIBS_INIT})
25+
target_include_directories(mtmd_helper PUBLIC .)
26+
target_include_directories(mtmd_helper PRIVATE ./vendor)
27+
target_include_directories(mtmd_helper PRIVATE ../..)
28+
target_compile_features(mtmd_helper PRIVATE cxx_std_17)
29+
3130
if (BUILD_SHARED_LIBS)
3231
set_target_properties(mtmd PROPERTIES POSITION_INDEPENDENT_CODE ON)
3332
target_compile_definitions(mtmd PRIVATE LLAMA_SHARED LLAMA_BUILD)
3433
add_library(mtmd_shared SHARED $<TARGET_OBJECTS:mtmd>)
35-
target_link_libraries(mtmd_shared PRIVATE ggml llama mtmd_audio ${CMAKE_THREAD_LIBS_INIT})
34+
target_link_libraries(mtmd_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
3635
install(TARGETS mtmd_shared LIBRARY)
36+
37+
set_target_properties(mtmd_helper PROPERTIES POSITION_INDEPENDENT_CODE ON)
38+
target_compile_definitions(mtmd_helper PRIVATE LLAMA_SHARED LLAMA_BUILD)
39+
add_library(mtmd_helper_shared SHARED $<TARGET_OBJECTS:mtmd>)
40+
target_link_libraries(mtmd_helper_shared PRIVATE ggml llama mtmd ${CMAKE_THREAD_LIBS_INIT})
41+
install(TARGETS mtmd_helper_shared LIBRARY)
3742
endif()
3843

3944
if (NOT MSVC)
40-
target_compile_options(mtmd PRIVATE -Wno-cast-qual) # stb_image.h
41-
target_compile_options(mtmd_audio PRIVATE -Wno-cast-qual) # miniaudio.h
45+
# for stb_image.h and miniaudio.h
46+
target_compile_options(mtmd_helper PRIVATE -Wno-cast-qual)
4247
endif()
4348

4449
if(TARGET BUILD_INFO)
4550
add_dependencies(mtmd BUILD_INFO)
51+
add_dependencies(mtmd_helper BUILD_INFO)
4652
endif()
4753

4854
add_executable(llama-llava-cli deprecation-warning.cpp)
@@ -54,5 +60,5 @@ set(TARGET llama-mtmd-cli)
5460
add_executable(${TARGET} mtmd-cli.cpp)
5561
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli)
5662
install(TARGETS ${TARGET} RUNTIME)
57-
target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT})
63+
target_link_libraries(${TARGET} PRIVATE common mtmd mtmd_helper ${CMAKE_THREAD_LIBS_INIT})
5864
target_compile_features(${TARGET} PRIVATE cxx_std_17)

tools/mtmd/clip.cpp

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@
1111
#include "ggml-backend.h"
1212
#include "gguf.h"
1313

14-
#define STB_IMAGE_IMPLEMENTATION
15-
#include "stb_image.h"
16-
1714
#include <cassert>
1815
#include <cmath>
1916
#include <cstdlib>
@@ -2786,30 +2783,6 @@ void clip_build_img_from_pixels(const unsigned char * rgb_pixels, int nx, int ny
27862783
memcpy(img->buf.data(), rgb_pixels, img->buf.size());
27872784
}
27882785

2789-
bool clip_image_load_from_file(const char * fname, clip_image_u8 * img) {
2790-
int nx, ny, nc;
2791-
auto * data = stbi_load(fname, &nx, &ny, &nc, 3);
2792-
if (!data) {
2793-
LOG_ERR("%s: failed to load image '%s'\n", __func__, fname);
2794-
return false;
2795-
}
2796-
clip_build_img_from_pixels(data, nx, ny, img);
2797-
stbi_image_free(data);
2798-
return true;
2799-
}
2800-
2801-
bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img) {
2802-
int nx, ny, nc;
2803-
auto * data = stbi_load_from_memory(bytes, bytes_length, &nx, &ny, &nc, 3);
2804-
if (!data) {
2805-
LOG_ERR("%s: failed to decode image bytes\n", __func__);
2806-
return false;
2807-
}
2808-
clip_build_img_from_pixels(data, nx, ny, img);
2809-
stbi_image_free(data);
2810-
return true;
2811-
}
2812-
28132786
// Normalize image to float32 - careful with pytorch .to(model.device, dtype=torch.float16) - this sometimes reduces precision (32>16>32), sometimes not
28142787
static void normalize_image_u8_to_f32(const clip_image_u8 & src, clip_image_f32 & dst, const float mean[3], const float std[3]) {
28152788
dst.nx = src.nx;

tools/mtmd/mtmd-audio.cpp

Lines changed: 0 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,5 @@
1-
// fix problem with std::min and std::max
2-
#if defined(_WIN32)
3-
#define WIN32_LEAN_AND_MEAN
4-
#ifndef NOMINMAX
5-
# define NOMINMAX
6-
#endif
7-
#include <windows.h>
8-
#endif
9-
101
#include "mtmd-audio.h"
112

12-
//#define MTMD_AUDIO_DEBUG
13-
14-
#define MINIAUDIO_IMPLEMENTATION
15-
#ifndef MTMD_AUDIO_DEBUG
16-
# define MA_NO_ENCODING
17-
#endif
18-
#define MA_NO_DEVICE_IO
19-
#define MA_NO_RESOURCE_MANAGER
20-
#define MA_NO_NODE_GRAPH
21-
#define MA_NO_ENGINE
22-
#define MA_NO_GENERATION
23-
#define MA_API static
24-
#include "miniaudio.h"
25-
263
#define _USE_MATH_DEFINES // for M_PI
274
#include <cmath>
285
#include <cstdint>
@@ -359,69 +336,6 @@ bool preprocess_audio(
359336
} // namespace whisper_preprocessor
360337

361338

362-
namespace audio_helpers {
363-
364-
bool is_audio_file(const char * buf, size_t len) {
365-
if (len < 12) {
366-
return false;
367-
}
368-
369-
// RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
370-
// WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
371-
bool is_wav = memcmp(buf, "RIFF", 4) == 0 && memcmp(buf + 8, "WAVE", 4) == 0;
372-
bool is_mp3 = len >= 3 && (
373-
memcmp(buf, "ID3", 3) == 0 ||
374-
// Check for MPEG sync word (simplified check)
375-
((unsigned char)buf[0] == 0xFF && ((unsigned char)buf[1] & 0xE0) == 0xE0)
376-
);
377< EED3 /td>-
bool is_flac = memcmp(buf, "fLaC", 4) == 0;
378-
379-
return is_wav || is_mp3 || is_flac;
380-
}
381-
382-
// returns true if the buffer is a valid audio file
383-
bool decode_audio_from_buf(const unsigned char * buf_in, size_t len, int target_sampler_rate, std::vector<float> & pcmf32_mono) {
384-
ma_result result;
385-
const int channels = 1;
386-
ma_decoder_config decoder_config = ma_decoder_config_init(ma_format_f32, channels, target_sampler_rate);
387-
ma_decoder decoder;
388-
389-
result = ma_decoder_init_memory(buf_in, len, &decoder_config, &decoder);
390-
if (result != MA_SUCCESS) {
391-
return false;
392-
}
393-
394-
ma_uint64 frame_count;
395-
ma_uint64 frames_read;
396-
result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count);
397-
if (result != MA_SUCCESS) {
398-
ma_decoder_uninit(&decoder);
399-
return false;
400-
}
401-
402-
pcmf32_mono.resize(frame_count);
403-
result = ma_decoder_read_pcm_frames(&decoder, pcmf32_mono.data(), frame_count, &frames_read);
404-
if (result != MA_SUCCESS) {
405-
ma_decoder_uninit(&decoder);
406-
return false;
407-
}
408-
409-
#ifdef MTMD_AUDIO_DEBUG
410-
// save audio to wav file
411-
ma_encoder_config config = ma_encoder_config_init(ma_encoding_format_wav, ma_format_f32, 1, target_sampler_rate);
412-
ma_encoder encoder;
413-
ma_encoder_init_file("output.wav", &config, &encoder);
414-
ma_encoder_write_pcm_frames(&encoder, pcmf32_mono.data(), pcmf32_mono.size(), &frames_read);
415-
ma_encoder_uninit(&encoder);
416-
#endif
417-
418-
ma_decoder_uninit(&decoder);
419-
return true;
420-
}
421-
422-
} // namespace wav_utils
423-
424-
425339
// precalculated mel filter banks
426340
// values are multiplied by 1000.0 to save space, and will be divided by 1000.0 in the end of the function
427341
//

tools/mtmd/mtmd-audio.h

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,31 +32,16 @@ struct whisper_filters {
3232
std::vector<float> data;
3333
};
3434

35-
extern bool preprocess_audio(
35+
bool preprocess_audio(
3636
const float * samples,
3737
size_t n_samples,
3838
const whisper_filters & filters,
3939
std::vector<whisper_mel> & output);
4040

4141
} // namespace whisper_preprocessor
4242

43-
44-
// TODO @ngxson : move this helper to mtmd-helpers.cpp
45-
namespace audio_helpers {
46-
47-
extern bool is_audio_file(const char * buf, size_t len);
48-
49-
extern bool decode_audio_from_buf(
50-
const unsigned char * buf_in,
51-
size_t len,
52-
int target_sampler_rate,
53-
std::vector<float> & pcmf32_mono);
54-
55-
} // namespace audio_helpers
56-
57-
5843
namespace whisper_precalc_filters {
5944

60-
extern whisper_preprocessor::whisper_filters get_128_bins();
45+
whisper_preprocessor::whisper_filters get_128_bins();
6146

6247
} // namespace whisper_precalc_filters

tools/mtmd/mtmd-cli.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "console.h"
88
#include "chat.h"
99
#include "mtmd.h"
10+
#include "mtmd-helper.h"
1011

1112
#include <vector>
1213
#include <limits.h>
@@ -143,7 +144,7 @@ struct mtmd_cli_context {
143144
}
144145

145146
bool load_media(const std::string & fname) {
146-
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(fname.c_str()));
147+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(ctx_vision.get(), fname.c_str()));
147148
if (!bmp.ptr) {
148149
return false;
149150
}

0 commit comments

Comments
 (0)
0