coderonion
diff --git a/‎llama_cpp/_internals.py
Lines changed: 8 additions & 7 deletions b/‎llama_cpp/_internals.py
Lines changed: 8 additions & 7 deletions
diff --git a/‎llama_cpp/llama.py
Lines changed: 1 addition & 1 deletion b/‎llama_cpp/llama.py
Lines changed: 1 addition & 1 deletion
@@ -108,7 +108,7 @@ def apply_lora_from_file(
             scale,
             path_base_model.encode("utf-8")
             if path_base_model is not None
-            else llama_cpp.c_char_p(0),
+            else ctypes.c_char_p(0),
             n_threads,
         )
 
@@ -303,8 +303,8 @@ def decode(self, batch: "_LlamaBatch"):
         assert self.ctx is not None
         assert batch.batch is not None
         return_code = llama_cpp.llama_decode(
-            ctx=self.ctx,
-            batch=batch.batch,
+            self.ctx,
+            batch.batch,
         )
         if return_code != 0:
             raise RuntimeError(f"llama_decode returned {return_code}")
@@ -493,7 +493,7 @@ class _LlamaBatch:
     def __init__(
         self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True
     ):
-        self.n_tokens = n_tokens
+        self._n_tokens = n_tokens
         self.embd = embd
         self.n_seq_max = n_seq_max
         self.verbose = verbose
@@ -502,7 +502,7 @@ def __init__(
 
         self.batch = None
         self.batch = llama_cpp.llama_batch_init(
-            self.n_tokens, self.embd, self.n_seq_max
+            self._n_tokens, self.embd, self.n_seq_max
         )
 
     def __del__(self):
@@ -570,12 +570,13 @@ def copy_logits(self, logits: npt.NDArray[np.single]):
         self.candidates.data = self.candidates_data.ctypes.data_as(
             llama_cpp.llama_token_data_p
         )
-        self.candidates.sorted = llama_cpp.c_bool(False)
-        self.candidates.size = llama_cpp.c_size_t(self.n_vocab)
+        self.candidates.sorted = ctypes.c_bool(False)
+        self.candidates.size = ctypes.c_size_t(self.n_vocab)
 
 
 # Python wrappers over common/common
 def _tokenize(model: _LlamaModel, text: str, add_bos: bool, special: bool) -> list[int]:
+    assert model.model is not None
     n_tokens = len(text) + 1 if add_bos else len(text)
     result = (llama_cpp.llama_token * n_tokens)()
     n_tokens = llama_cpp.llama_tokenize(
 
@@ -1818,7 +1818,7 @@ def load_state(self, state: LlamaState) -> None:
         self.input_ids = state.input_ids.copy()
         self.n_tokens = state.n_tokens
         state_size = state.llama_state_size
-        LLamaStateArrayType = llama_cpp.c_uint8 * state_size
+        LLamaStateArrayType = ctypes.c_uint8 * state_size
         llama_state = LLamaStateArrayType.from_buffer_copy(state.llama_state)
 
         if llama_cpp.llama_set_state_data(self._ctx.ctx, llama_state) != state_size: