From 586f8c34736957a54541108c93c72aaefcf1e732 Mon Sep 17 00:00:00 2001 From: Junpei Kawamoto Date: Wed, 14 Aug 2024 00:29:53 -0600 Subject: [PATCH] fix: Llama.close didn't free lora adapter --- llama_cpp/llama.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index b780ef6c6..a556bfb8c 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -198,6 +198,7 @@ def __init__( A Llama instance. """ self.verbose = verbose + self._stack = contextlib.ExitStack() set_verbose(verbose) @@ -365,8 +366,6 @@ def __init__( if not os.path.exists(model_path): raise ValueError(f"Model path does not exist: {model_path}") - self._stack = contextlib.ExitStack() - self._model = self._stack.enter_context( contextlib.closing( _LlamaModel( @@ -420,6 +419,15 @@ def __init__( raise RuntimeError( f"Failed to initialize LoRA adapter from lora path: {self.lora_path}" ) + + def free_lora_adapter(): + if self._lora_adapter is None: + return + llama_cpp.llama_lora_adapter_free(self._lora_adapter) + self._lora_adapter = None + + self._stack.callback(free_lora_adapter) + assert self._ctx.ctx is not None if llama_cpp.llama_lora_adapter_set( self._ctx.ctx, self._lora_adapter, self.lora_scale @@ -2085,14 +2093,9 @@ def pooling_type(self) -> str: def close(self) -> None: """Explicitly free the model from memory.""" - if hasattr(self,'_stack'): - if self._stack is not None: - self._stack.close() + self._stack.close() def __del__(self) -> None: - if hasattr(self,'_lora_adapter'): - if self._lora_adapter is not None: - llama_cpp.llama_lora_adapter_free(self._lora_adapter) self.close() @staticmethod