devilcoder01
diff --git a/‎llama_cpp/_internals.py
Lines changed: 20 additions & 31 deletions b/‎llama_cpp/_internals.py
Lines changed: 20 additions & 31 deletions
diff --git a/‎llama_cpp/_logger.py
Lines changed: 37 additions & 0 deletions b/‎llama_cpp/_logger.py
Lines changed: 37 additions & 0 deletions
diff --git a/‎llama_cpp/llama.py
Lines changed: 4 additions & 3 deletions b/‎llama_cpp/llama.py
Lines changed: 4 additions & 3 deletions
@@ -18,8 +18,6 @@
 
 import llama_cpp.llama_cpp as llama_cpp
 
-from ._utils import suppress_stdout_stderr
-
 
 # Python wrappers over llama.h structs
 
@@ -30,7 +28,6 @@ class _LlamaModel:
 
     _llama_free_model = None
     # NOTE: this must be "saved" here to avoid exceptions when calling __del__
-    _suppress_stdout_stderr = suppress_stdout_stderr
 
     def __init__(
         self,
@@ -48,16 +45,14 @@ def __init__(
         if not os.path.exists(path_model):
             raise ValueError(f"Model path does not exist: {path_model}")
 
-        with self._suppress_stdout_stderr(disable=self.verbose):
-            self.model = llama_cpp.llama_load_model_from_file(
-                self.path_model.encode("utf-8"), self.params
-            )
+        self.model = llama_cpp.llama_load_model_from_file(
+            self.path_model.encode("utf-8"), self.params
+        )
 
     def __del__(self):
-        with self._suppress_stdout_stderr(disable=self.verbose):
-            if self.model is not None and self._llama_free_model is not None:
-                self._llama_free_model(self.model)
-                self.model = None
+        if self.model is not None and self._llama_free_model is not None:
+            self._llama_free_model(self.model)
+            self.model = None
 
     def vocab_type(self) -> int:
         assert self.model is not None
@@ -240,8 +235,6 @@ class _LlamaContext:
     NOTE: For stability it's recommended you use the Llama class instead."""
 
     _llama_free = None
-    # NOTE: this must be "saved" here to avoid exceptions when calling __del__
-    _suppress_stdout_stderr = suppress_stdout_stderr
 
     def __init__(
         self,
@@ -256,16 +249,16 @@ def __init__(
 
         self._llama_free = llama_cpp._lib.llama_free  # type: ignore
 
-        with self._suppress_stdout_stderr(disable=self.verbose):
-            self.ctx = llama_cpp.llama_new_context_with_model(
-                self.model.model, self.params
-            )
+        assert self.model.model is not None
+
+        self.ctx = llama_cpp.llama_new_context_with_model(
+            self.model.model, self.params
+        )
 
     def __del__(self):
-        with self._suppress_stdout_stderr(disable=self.verbose):
-            if self.ctx is not None and self._llama_free is not None:
-                self._llama_free(self.ctx)
-                self.ctx = None
+        if self.ctx is not None and self._llama_free is not None:
+            self._llama_free(self.ctx)
+            self.ctx = None
 
     def n_ctx(self) -> int:
         assert self.ctx is not None
@@ -493,8 +486,6 @@ def default_params():
 
 class _LlamaBatch:
     _llama_batch_free = None
-    # NOTE: this must be "saved" here to avoid exceptions when calling __del__
-    _suppress_stdout_stderr = suppress_stdout_stderr
 
     def __init__(
         self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True
@@ -506,16 +497,14 @@ def __init__(
 
         self._llama_batch_free = llama_cpp._lib.llama_batch_free  # type: ignore
 
-        with self._suppress_stdout_stderr(disable=self.verbose):
-            self.batch = llama_cpp.llama_batch_init(
-                self.n_tokens, self.embd, self.n_seq_max
-            )
+        self.batch = llama_cpp.llama_batch_init(
+            self.n_tokens, self.embd, self.n_seq_max
+        )
 
     def __del__(self):
-        with self._suppress_stdout_stderr(disable=self.verbose):
-            if self.batch is not None and self._llama_batch_free is not None:
-                self._llama_batch_free(self.batch)
-                self.batch = None
+        if self.batch is not None and self._llama_batch_free is not None:
+            self._llama_batch_free(self.batch)
+            self.batch = None
 
     def set_batch(self, batch: Sequence[int], n_past: int, logits_all: bool):
         assert self.batch is not None
 
@@ -0,0 +1,37 @@
+import sys
+import ctypes
+import logging
+
+import llama_cpp
+
+# enum ggml_log_level {
+#     GGML_LOG_LEVEL_ERROR = 2,
+#     GGML_LOG_LEVEL_WARN = 3,
+#     GGML_LOG_LEVEL_INFO = 4,
+#     GGML_LOG_LEVEL_DEBUG = 5
+# };
+GGML_LOG_LEVEL_TO_LOGGING_LEVEL = {
+    2: logging.ERROR,
+    3: logging.WARNING,
+    4: logging.INFO,
+    5: logging.DEBUG,
+}
+
+logger = logging.getLogger("llama-cpp-python")
+
+
+@llama_cpp.llama_log_callback
+def llama_log_callback(
+    level: int,
+    text: bytes,
+    user_data: ctypes.c_void_p,
+):
+    if logger.level <= GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level]:
+        print(text.decode("utf-8"), end="", flush=True, file=sys.stderr)
+
+
+llama_cpp.llama_log_set(llama_log_callback, ctypes.c_void_p(0))
+
+
+def set_verbose(verbose: bool):
+    logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
@@ -35,7 +35,6 @@
 import numpy as np
 import numpy.typing as npt
 
-from ._utils import suppress_stdout_stderr
 from ._internals import (
     _LlamaModel,  # type: ignore
     _LlamaContext,  # type: ignore
@@ -44,6 +43,7 @@
     _LlamaSamplingParams,  # type: ignore
     _LlamaSamplingContext,  # type: ignore
 )
+from ._logger import set_verbose
 
 
 class Llama:
@@ -169,10 +169,11 @@ def __init__(
         """
         self.verbose = verbose
 
+        set_verbose(verbose)
+
         self.numa = numa
         if not Llama.__backend_initialized:
-            with suppress_stdout_stderr(disable=self.verbose):
-                llama_cpp.llama_backend_init(self.numa)
+            llama_cpp.llama_backend_init(self.numa)
             Llama.__backend_initialized = True
 
         self.model_path = model_path