8000 updated submodules · masmullin2000/llama_cpp-rs@0ab877f · GitHub
[go: up one dir, main page]

Skip to content

Commit 0ab877f

Browse files
committed
updated submodules
1 parent 012baa2 commit 0ab877f

File tree

5 files changed

+66
-19
lines changed

5 files changed

+66
-19
lines changed

crates/llama_cpp/src/model/params.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ use std::ptr;
44

55
use llama_cpp_sys::{
66
llama_model_default_params, llama_model_params, llama_split_mode,
7-
llama_split_mode_LLAMA_SPLIT_LAYER, llama_split_mode_LLAMA_SPLIT_NONE,
8-
llama_split_mode_LLAMA_SPLIT_ROW,
7+
llama_split_mode_LLAMA_SPLIT_MODE_LAYER, llama_split_mode_LLAMA_SPLIT_MODE_NONE,
8+
llama_split_mode_LLAMA_SPLIT_MODE_ROW,
99
};
1010

1111
/// Parameters for llama.
@@ -65,9 +65,9 @@ pub enum SplitMode {
6565
impl From<SplitMode> for llama_split_mode {
6666
fn from(value: SplitMode) -> Self {
6767
match value {
68-
SplitMode::None => llama_split_mode_LLAMA_SPLIT_NONE,
69-
SplitMode::Layer => llama_split_mode_LLAMA_SPLIT_LAYER,
70-
SplitMode::Row => llama_split_mode_LLAMA_SPLIT_ROW,
68+
SplitMode::None => llama_split_mode_LLAMA_SPLIT_MODE_NONE,
69+
SplitMode::Layer => llama_split_mode_LLAMA_SPLIT_MODE_LAYER,
70+
SplitMode::Row => llama_split_mode_LLAMA_SPLIT_MODE_ROW,
7171
}
7272
}
7373
}
@@ -76,9 +76,9 @@ impl From<llama_split_mode> for SplitMode {
7676
fn from(value: llama_split_mode) -> Self {
7777
#![allow(non_upper_case_globals)]
7878
match value {
79-
llama_split_mode_LLAMA_SPLIT_NONE => SplitMode::None,
80-
llama_split_mode_LLAMA_SPLIT_LAYER => SplitMode::Layer,
81-
llama_split_mode_LLAMA_SPLIT_ROW => SplitMode::Row,
79+
llama_split_mode_LLAMA_SPLIT_MODE_NONE => SplitMode::None,
80+
llama_split_mode_LLAMA_SPLIT_MODE_LAYER => SplitMode::Layer,
81+
llama_split_mode_LLAMA_SPLIT_MODE_ROW => SplitMode::Row,
8282
_ => unimplemented!(),
8383
}
8484
}

crates/llama_cpp/src/session/params.rs

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,49 @@
11
//! Implements [`SessionParams`], which configures a [`crate::LlamaSession`]
22
3-
use std::ptr;
3+
use std::ptr::null_mut;
4+
5+
use llama_cpp_sys::{
6+
ggml_type, llama_context_default_params, llama_context_params, llama_pooling_type,
7+
llama_pooling_type_LLAMA_POOLING_TYPE_CLS, llama_pooling_type_LLAMA_POOLING_TYPE_MEAN,
8+
llama_pooling_type_LLAMA_POOLING_TYPE_NONE, llama_pooling_type_LLAMA_POOLING_TYPE_UNSPECIFIED,
9+
};
10+
11+
/// whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
12+
#[derive(Clone, Copy, Debug)]
13+
pub enum PoolingType {
14+
/// Unspecified.
15+
Unspecified,
16+
/// Don't pool.
17+
None,
18+
/// TODO lookup what this does
19+
Mean,
20+
/// TODO lookup what this does
21+
Cls,
22+
}
423

5-
use llama_cpp_sys::{ggml_type, llama_context_default_params, llama_context_params};
24+
impl From<PoolingType> for llama_pooling_type {
25+
fn from(value: PoolingType) -> Self {
26+
match value {
27+
PoolingType::Unspecified => llama_pooling_type_LLAMA_POOLING_TYPE_UNSPECIFIED,
28+
PoolingType::None => llama_pooling_type_LLAMA_POOLING_TYPE_NONE,
29+
PoolingType::Mean => llama_pooling_type_LLAMA_POOLING_TYPE_MEAN,
30+
PoolingType::Cls => llama_pooling_type_LLAMA_POOLING_TYPE_CLS,
31+
}
32+
}
33+
}
34+
35+
impl From<llama_pooling_type> for PoolingType {
36+
fn from(value: llama_pooling_type) -> Self {
37+
#![allow(non_upper_case_globals)]
38+
match value {
39+
llama_pooling_type_LLAMA_POOLING_TYPE_UNSPECIFIED => PoolingType::Unspecified,
40+
llama_pooling_type_LLAMA_POOLING_TYPE_NONE => PoolingType::None,
41+
llama_pooling_type_LLAMA_POOLING_TYPE_MEAN => PoolingType::Mean,
42+
llama_pooling_type_LLAMA_POOLING_TYPE_CLS => PoolingType::Cls,
43+
_ => unimplemented!(),
44+
}
45+
}
46+
}
647

748
/// Session-specific parameters.
849
#[derive(Clone)]
@@ -61,7 +102,10 @@ pub struct SessionParams {
61102
pub offload_kqv: bool,
62103

63104
/// whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
64-
pub pooling: bool,
105+
pub pooling: PoolingType,
106+
107+
/// defragment the KV cache if holes/size > thold, < 0 disabled (default)
108+
defrag_threshold: f32,
65109
}
66110

67111
impl Default for SessionParams {
@@ -91,7 +135,8 @@ impl Default for SessionParams {
91135
type_v: c_defaults.type_v as u32,
92136
embedding: c_defaults.embedding,
93137
offload_kqv: c_defaults.offload_kqv,
94-
pooling: c_defaults.do_pooling,
138+
pooling: c_defaults.pooling_type.into(),
139+
defrag_threshold: c_defaults.defrag_thold,
95140
}
96141
}
97142
}
@@ -112,15 +157,17 @@ impl From<SessionParams> for llama_context_params {
112157
yarn_beta_fast: value.yarn_beta_fast,
113158
yarn_beta_slow: value.yarn_beta_slow,
114159
yarn_orig_ctx: value.yarn_orig_ctx,
160+
defrag_thold: value.defrag_threshold,
115161
cb_eval: None,
116-
cb_eval_user_data: ptr::null_mut(),
162+
cb_eval_user_data: null_mut(),
117163
type_k: value.type_k as ggml_type,
118164
type_v: value.type_v as ggml_type,
119-
mul_mat_q: true, // Deprecated
120165
logits_all: false, // Deprecated
121166
embedding: value.embedding,
122167
offload_kqv: value.offload_kqv,
123-
do_pooling: value.pooling,
168+
pooling_type: value.pooling.into(),
169+
abort_callback: None,
170+
abort_callback_data: null_mut(),
124171
}
125172
}
126173
}

crates/llama_cpp_sys/include/build-info.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#ifndef BUILD_INFO_H
1414
#define BUILD_INFO_H
1515

16-
#define BUILD_NUMBER 2234
17-
#define BUILD_COMMIT "973053d"
16+
#define BUILD_NUMBER 2333
17+
#define BUILD_COMMIT "4ffcdce"
1818

1919
#endif // BUILD_INFO_H
< 358C div class="border position-relative rounded-bottom-2">

0 commit comments

Comments
 (0)
0