8000 Expose Llava as a shared library for downstream projects by damian0815 · Pull Request #3613 · ggml-org/llama.cpp · GitHub
[go: up one dir, main page]

Skip to content

Expose Llava as a shared library for downstream projects #3613

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 34 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
0209d39
wip llava python bindings compatibility
damian0815 Oct 13, 2023
3c10d9f
add external llava API
damian0815 Oct 13, 2023
770dc9d
add base64 in-prompt image support
damian0815 Oct 13, 2023
8224ca5
wip refactor image loading
damian0815 Oct 14, 2023
c693208
refactor image load out of llava init
damian0815 Oct 14, 2023
0889117
cleanup
damian0815 Oct 14, 2023
f83c060
further cleanup; move llava-cli into its own file and rename
damian0815 Oct 14, 2023
e2cd07c
move base64.hpp into common/
damian0815 Oct 14, 2023
f8eddcf
collapse clip and llava libraries
damian0815 Oct 14, 2023
b9f533b
move llava into its own subdir
damian0815 Oct 14, 2023
f21af51
wip
damian0815 Oct 14, 2023
708928c
fix bug where base64 string was not removed from the prompt
damian0815 Oct 14, 2023
09edb7e
get libllava to output in the right place
damian0815 Oct 14, 2023
2847ecf
expose llava methods in libllama.dylib
damian0815 Oct 14, 2023
e3261ff
cleanup memory usage around clip_image_*
damian0815 Oct 14, 2023
d64891b
cleanup and refactor *again*
damian0815 Oct 15, 2023
5a91551
update headerdoc
damian0815 Oct 15, 2023
e84003b
Move llava back to examples
monatis Nov 2, 2023
8037034
build with cmake, not tested (WIP)
monatis Nov 2, 2023
52143f7
Editorconfig
monatis Nov 5, 2023
c6b8844
Merge branch 'master' into llava-lib
monatis Nov 5, 2023
32bf7bf
Editorconfig
monatis Nov 5, 2023
53dca51
Build with make
monatis Nov 5, 2023
b927772
Build with make
monatis Nov 5, 2023
01f06e2
Fix cyclical depts on Windows
monatis Nov 5, 2023
ad97e0e
attempt to fix build on Windows
monatis Nov 5, 2023
71ea278
Merge branch 'master' into llava-lib
monatis Nov 5, 2023
1f8c866
attempt to fix build on Windows
monatis Nov 6, 2023
d6be69f
Upd TODOs
monatis Nov 6, 2023
5b8b9ef
attempt to fix build on Windows+CUDA
monatis Nov 6, 2023
b9bacc7
Revert changes in cmake
monatis Nov 6, 2023
9f03ac7
Fix according to review comments
monatis Nov 6, 2023
22f43fc
Support building as a shared library
monatis Nov 6, 2023
3548029
address review comments
cebtenzzre Nov 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
< 8000 /details-menu>
Diff view
Diff view
Prev Previous commit
Next Next commit
wip
  • Loading branch information
damian0815 committed Oct 14, 2023
commit f21af512cd8176b5ea8eb81b722054d47d8b2c0a
5 changes: 3 additions & 2 deletions examples/llava/llava-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ static void show_additional_info(int /*argc*/, char ** argv) {
printf(" note: a lower temperature value like 0.1 is recommended for better quality.\n");
}

static bool load_image(llava_context * ctx_llava, gpt_params * params, float **image_embd, int * n_image_pos) {
static bool load_image(llava_context * ctx_llava, gpt_params * params, float **image_embd, int * n_img_pos) {
// load and preprocess the image
clip_image_u8 img;
auto prompt = params->prompt;
Expand All @@ -32,7 +32,7 @@ static bool load_image(llava_context * ctx_llava, gpt_params * params, float **i
return false;
}
}
bool image_embed_result = llava_build_img_embed(ctx_llava->ctx_llama, ctx_llava->ctx_clip, params->n_threads, &img, image_embd, n_image_pos);
bool image_embed_result = llava_build_img_embed(ctx_llava->ctx_llama, ctx_llava->ctx_clip, params->n_threads, &img, image_embd, n_img_pos);
if (!image_embed_result) {
fprintf(stderr, "%s: coulnd't embed the image\n", __func__);
return false;
Expand All @@ -49,6 +49,7 @@ static void process_prompt(struct llava_context * ctx_llava, float * image_embd,
// llava chat format is "<system_prompt>USER: <image_embeddings>\n<textual_prompt>\nASSISTANT:"
// GG: are we sure that the should be a trailing whitespace at the end of this string?
eval_string(ctx_llava->ctx_llama, "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER: ", params->n_batch, &n_past);
printf("embedding image, n_img_pos is %d\n", n_img_pos);
eval_image_embd(ctx_llava->ctx_llama, image_embd, n_img_pos, params->n_batch, &n_past);
eval_string(ctx_llava->ctx_llama, prompt, params->n_batch, &n_past);
eval_string(ctx_llava->ctx_llama, "\nASSISTANT:", params->n_batch, &n_past);
Expand Down
5 changes: 3 additions & 2 deletions llava/clip.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define CLIP_H

#include "ggml.h"
#include "llama.h"

struct clip_ctx;

Expand Down Expand Up @@ -57,8 +58,8 @@ struct clip_image_f32_batch {

struct clip_image_u8 * make_clip_image_u8();
struct clip_image_f32 * make_clip_image_f32();
bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, clip_image_u8 * img);
LLAMA_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
LLAMA_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, clip_image_u8 * img);
bool clip_image_preprocess(const struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32 * res, const bool pad2square);
bool clip_image_encode(const struct clip_ctx * ctx, const int n_threads, struct clip_image_f32 * img, float * vec);

Expand Down
18 changes: 9 additions & 9 deletions llava/llava.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#include "base64.hpp"

static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float * image_embd, int * n_img_embd, int * n_img_pos) {
static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float * image_embd, int * n_image_embd, int * n_img_pos) {
clip_image_f32 img_res;
if (!clip_image_preprocess(ctx_clip, img, &img_res, /*pad2square =*/ true)) {
fprintf(stderr, "%s: unable to preprocess image\n", __func__);
Expand All @@ -19,7 +19,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
}

*n_img_pos = clip_n_patches(ctx_clip);
*n_img_embd = clip_n_mmproj_embd(ctx_clip);
*n_image_embd = clip_n_mmproj_embd(ctx_clip);

const int64_t t_img_enc_start_us = ggml_time_us();
if (!clip_image_encode(ctx_clip, n_threads, &img_res, image_embd)) {
Expand All @@ -37,7 +37,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
return true;
}

bool llava_build_img_embed(const llama_context * ctx_llama, clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_image_pos_out) {
bool llava_build_img_embed(const llama_context * ctx_llama, clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) {

float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip));
if (!image_embd) {
Expand All @@ -46,23 +46,23 @@ bool llava_build_img_embed(const llama_context * ctx_llama, clip_ctx * ctx_clip,
return false;
}

int n_image_pos;
int n_img_embd;
if (!encode_image_with_clip(ctx_clip, n_threads, img, image_embd, &n_img_embd, &n_image_pos)) {
int n_img_pos;
int n_image_embd;
if (!encode_image_with_clip(ctx_clip, n_threads, img, image_embd, &n_image_embd, &n_img_pos)) {
fprintf(stderr, "%s: cannot encode image, aborting\n", __func__);
free(image_embd);
return false;
}
// make sure that the correct mmproj was used, i.e., compare apples to apples
int n_llama_embd = llama_n_embd(llama_get_model(ctx_llama));
if (n_img_embd != n_llama_embd) {
printf("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_img_embd, n_llama_embd);
if (n_image_embd != n_llama_embd) {
printf("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_llama_embd);
free(image_embd);
return false;
}

*image_embd_out = image_embd;
*n_image_pos_out = n_image_pos;
*n_img_pos_out = n_img_pos;

return true;
}
Expand Down
2 changes: 1 addition & 1 deletion llava/llava.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ struct llava_context * llava_init(gpt_params * params);
void llava_free(struct llava_context * ctx_llava);

/** build a llava image embedding from the passed-in clip image `img`. result is returned as image_embd_out, size n_image_pos_out */
bool llava_build_img_embed(const struct llama_context * ctx_llama, struct clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_image_pos_out);
LLAMA_API bool llava_build_img_embed(const struct llama_context * ctx_llama, struct clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_image_pos_out);


#ifdef __cplusplus
Expand Down
0