99import llama_cpp .llama_types as llama_types
1010import llama_cpp .llama_grammar as llama_grammar
1111
12+ from ._utils import suppress_stdout_stderr
13+
1214
1315class LlamaChatCompletionHandler (Protocol ):
1416 def __call__ (
@@ -775,20 +777,26 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
775777
776778
777779class Llava15ChatHandler :
778- def __init__ (self , clip_model_path : str ):
780+ _clip_free = None
781+
782+ def __init__ (self , clip_model_path : str , verbose : bool = False ):
779783 import llama_cpp .llava_cpp as llava_cpp
780784
781785 self ._llava_cpp = llava_cpp
782786 self .clip_model_path = clip_model_path
787+ self .verbose = verbose
788+ self ._clip_free = self ._llava_cpp ._libllava .clip_free # type: ignore
783789
784- self .clip_ctx = self ._llava_cpp .clip_model_load (
785- self .clip_model_path .encode (), 0
786- )
790+ with suppress_stdout_stderr (disable = self .verbose ):
791+ self .clip_ctx = self ._llava_cpp .clip_model_load (
792+ self .clip_model_path .encode (), 0
793+ )
787794
788795 def __del__ (self ):
789- if self .clip_ctx is not None :
790- self ._llava_cpp .clip_free (self .clip_ctx )
791- self .clip_ctx = None
796+ with suppress_stdout_stderr (disable =self .verbose ):
797+ if self .clip_ctx is not None and self ._clip_free is not None :
798+ self ._clip_free (self .clip_ctx )
799+ self .clip_ctx = None
792800
793801 def load_image (self , image_url : str ) -> bytes :
794802 if image_url .startswith ("data:" ):
@@ -881,27 +889,28 @@ def __call__(
881889 c_ubyte_ptr = (
882890 ctypes .c_ubyte * len (data_array )
883891 ).from_buffer (data_array )
884- embed = self ._llava_cpp .llava_image_embed_make_with_bytes (
885- ctx_clip = self .clip_ctx ,
886- n_threads = llama .context_params .n_threads ,
887- image_bytes = c_ubyte_ptr ,
888- image_bytes_length = len (image_bytes ),
889- )
890- # image_bytes_p = (ctypes.c_uint8 * len(image_bytes)).from_buffer_copy(image_bytes)
891- # embed = self._llava_cpp.llava_image_embed_make_with_bytes(ctx_clip=self.clip_ctx, n_threads=1, image_bytes=image_bytes_p, image_bytes_length=len(image_bytes))
892+ with suppress_stdout_stderr (disable = self .verbose ):
893+ embed = self ._llava_cpp .llava_image_embed_make_with_bytes (
894+ ctx_clip = self .clip_ctx ,
895+ n_threads = llama .context_params .n_threads ,
896+ image_bytes = c_ubyte_ptr ,
897+ image_bytes_length = len (image_bytes ),
898+ )
892899 try :
893900 n_past = ctypes .c_int (llama .n_tokens )
894901 n_past_p = ctypes .pointer (n_past )
895- self ._llava_cpp .llava_eval_image_embed (
896- ctx_llama = llama .ctx ,
897- embed = embed ,
898- n_batch = llama .n_batch ,
899- n_past = n_past_p ,
900- )
902+ with suppress_stdout_stderr (disable = self .verbose ):
903+ self ._llava_cpp .llava_eval_image_embed (
904+ ctx_llama = llama .ctx ,
905+ embed = embed ,
906+ n_batch = llama .n_batch ,
907+ n_past = n_past_p ,
908+ )
901909 assert llama .n_ctx () >= n_past .value
902910 llama .n_tokens = n_past .value
903911 finally :
904- self ._llava_cpp .llava_image_embed_free (embed )
912+ with suppress_stdout_stderr (disable = self .verbose ):
913+ self ._llava_cpp .llava_image_embed_free (embed )
905914 if message ["role" ] == "assistant" and message ["content" ] is not None :
906915 llama .eval (
907916 llama .tokenize (
@@ -910,7 +919,7 @@ def __call__(
910919 )
911920 llama .eval (llama .tokenize (f"{ assistant_role } " .encode ("utf8" ), add_bos = False ))
912921
913- prompt = llama ._input_ids .tolist ()
922+ prompt = llama .input_ids [: llama . n_tokens ] .tolist ()
914923
915924 return _convert_completion_to_chat (
916925 llama .create_completion (
0 commit comments