File tree Expand file tree Collapse file tree 2 files changed +7
-3
lines changed Expand file tree Collapse file tree 2 files changed +7
-3
lines changed Original file line number Diff line number Diff line change @@ -12,7 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
12
12
- (build-system) Migrate from scikit-build to scikit-build-core
13
13
14
14
### Fixed
15
+
15
16
- Truncate max_tokens in create_completion so requested tokens doesn't exceed context size.
17
+ - Temporarily disable cache for completion requests
16
18
17
19
## [ v0.1.59]
18
20
Original file line number Diff line number Diff line change @@ -831,7 +831,9 @@ def _create_completion(
831
831
"logprobs is not supported for models created with logits_all=False"
832
832
)
833
833
834
- if self .cache :
834
+ # Temporarily disable usage of the cache
835
+ # See: https://github.com/abetlen/llama-cpp-python/issues/348#issuecomment-1583072408
836
+ if self .cache and False :
835
837
try :
836
838
cache_item = self .cache [prompt_tokens ]
837
839
cache_prefix_len = Llama .longest_token_prefix (
@@ -1069,14 +1071,14 @@ def _create_completion(
1069
1071
}
1070
1072
],
1071
1073
}
1072
- if self .cache :
1074
+ if self .cache and False :
1073
1075
if self .verbose :
1074
1076
print ("Llama._create_completion: cache save" , file = sys .stderr )
1075
1077
self .cache [prompt_tokens + completion_tokens ] = self .save_state ()
1076
1078
print ("Llama._create_completion: cache saved" , file = sys .stderr )
1077
1079
return
1078
1080
1079
- if self .cache :
1081
+ if self .cache and False :
1080
1082
if self .verbose :
1081
1083
print ("Llama._create_completion: cache save" , file = sys .stderr )
1082
1084
self .cache [prompt_tokens + completion_tokens ] = self .save_state ()
You can’t perform that action at this time.
0 commit comments