From dabcb105ccfeac309f63d760cec63d0de4ad3bbf Mon Sep 17 00:00:00 2001 From: Navid Haghighat Date: Sat, 22 Feb 2025 20:27:08 +0100 Subject: [PATCH 01/65] Add grammar_lazy sampler --- llama-cpp-2/src/sampling.rs | 43 +++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/llama-cpp-2/src/sampling.rs b/llama-cpp-2/src/sampling.rs index 1c9663bf..c0c1c84c 100644 --- a/llama-cpp-2/src/sampling.rs +++ b/llama-cpp-2/src/sampling.rs @@ -239,6 +239,49 @@ impl LlamaSampler { Self { sampler } } + /// Lazy grammar sampler, introduced in + /// + /// This sampler enforces grammar rules only when specific trigger words or tokens are encountered. + /// + /// # Panics + /// - If `grammar_str` or `grammar_root` contain null bytes + /// - If any trigger word contains null bytes + #[must_use] + pub fn grammar_lazy( + model: &LlamaModel, + grammar_str: &str, + grammar_root: &str, + trigger_words: impl IntoIterator>, + trigger_tokens: &[LlamaToken], + ) -> Self { + let grammar_str = CString::new(grammar_str).unwrap(); + let grammar_root = CString::new(grammar_root).unwrap(); + + let trigger_word_cstrings: Vec = trigger_words + .into_iter() + .map(|word| CString::new(word.as_ref()).unwrap()) + .collect(); + + let mut trigger_word_ptrs: Vec<*const c_char> = trigger_word_cstrings + .iter() + .map(|cs| cs.as_ptr()) + .collect(); + + let sampler = unsafe { + llama_cpp_sys_2::llama_sampler_init_grammar_lazy( + model.vocab_ptr(), + grammar_str.as_ptr(), + grammar_root.as_ptr(), + trigger_word_ptrs.as_mut_ptr(), + trigger_word_ptrs.len(), + trigger_tokens.as_ptr().cast(), + trigger_tokens.len(), + ) + }; + + Self { sampler } + } + /// DRY sampler, designed by p-e-w, as described in: /// , porting Koboldcpp /// implementation authored by pi6am: From c1e17d78740bc4d904b65f9c4b3588f30a41a67b Mon Sep 17 00:00:00 2001 From: Navid Haghighat Date: Sat, 22 Feb 2025 20:34:17 +0100 Subject: [PATCH 02/65] Add top_n_sigma sampler --- llama-cpp-2/src/sampling.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/llama-cpp-2/src/sampling.rs b/llama-cpp-2/src/sampling.rs index c0c1c84c..8488ed4f 100644 --- a/llama-cpp-2/src/sampling.rs +++ b/llama-cpp-2/src/sampling.rs @@ -191,6 +191,37 @@ impl LlamaSampler { Self { sampler } } + /// Top-nσ sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" + /// + /// + /// This method filters logits by selecting only those within *n* standard deviations of the mean. + /// + /// # Parameters + /// - `n`: Number of standard deviations from the mean to include in sampling + /// + /// # Example + /// ```rust + /// use llama_cpp_2::sampling::LlamaSampler; + /// use llama_cpp_2::token::{ + /// LlamaToken, + /// data::LlamaTokenData, + /// data_array::LlamaTokenDataArray + /// }; + /// + /// let mut data_array = LlamaTokenDataArray::new(vec![ + /// LlamaTokenData::new(LlamaToken(0), 0.0, 0.0), + /// LlamaTokenData::new(LlamaToken(1), 1.0, 0.0), + /// LlamaTokenData::new(LlamaToken(2), 2.0, 0.0), + /// ], false); + /// + /// data_array.apply_sampler(&mut LlamaSampler::top_n_sigma(2.0)); + /// ``` + #[must_use] + pub fn top_n_sigma(n: f32) -> Self { + let sampler = unsafe { llama_cpp_sys_2::llama_sampler_init_top_n_sigma(n) }; + Self { sampler } + } + /// Locally Typical Sampling implementation described in the paper . #[must_use] pub fn typical(p: f32, min_keep: usize) -> Self { From 71a8e22a0a84091e19bdc0cdb06467ab4b36cdf4 Mon Sep 17 00:00:00 2001 From: Navid Haghighat Date: Sat, 22 Feb 2025 20:39:10 +0100 Subject: [PATCH 03/65] Add reset sampler --- llama-cpp-2/src/sampling.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llama-cpp-2/src/sampling.rs b/llama-cpp-2/src/sampling.rs index 8488ed4f..982cd524 100644 --- a/llama-cpp-2/src/sampling.rs +++ b/llama-cpp-2/src/sampling.rs @@ -62,6 +62,15 @@ impl LlamaSampler { self } + /// Resets the internal state of the sampler. + /// + /// This can be useful when you want to start fresh with a sampler without creating a new instance. + pub fn reset(&mut self) { + unsafe { + llama_cpp_sys_2::llama_sampler_reset(self.sampler); + } + } + /// Combines a list of samplers into a single sampler that applies each component sampler one /// after another. /// From 914bce3c2672c04560c496ba7d5f2438ad807b80 Mon Sep 17 00:00:00 2001 From: Navid Haghighat Date: Sat, 22 Feb 2025 20:51:20 +0100 Subject: [PATCH 04/65] Add get_seed method --- llama-cpp-2/src/sampling.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/llama-cpp-2/src/sampling.rs b/llama-cpp-2/src/sampling.rs index 982cd524..a659ab73 100644 --- a/llama-cpp-2/src/sampling.rs +++ b/llama-cpp-2/src/sampling.rs @@ -71,6 +71,17 @@ impl LlamaSampler { } } + /// Gets the random seed used by this sampler. + /// + /// Returns: + /// - For random samplers (dist, mirostat, mirostat_v2): returns their current seed + /// - For sampler chains: returns the first non-default seed found in reverse order + /// - For all other samplers: returns 0xFFFFFFFF + #[must_use] + pub fn get_seed(&self) -> u32 { + unsafe { llama_cpp_sys_2::llama_sampler_get_seed(self.sampler) } + } + /// Combines a list of samplers into a single sampler that applies each component sampler one /// after another. /// From 2031385f8b72a9a01d2809c955922897ee76cbb1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Feb 2025 16:44:10 +0000 Subject: [PATCH 05/65] chore(deps): bump anyhow from 1.0.95 to 1.0.96 Bumps [anyhow](https://github.com/dtolnay/anyhow) from 1.0.95 to 1.0.96. - [Release notes](https://github.com/dtolnay/anyhow/releases) - [Commits](https://github.com/dtolnay/anyhow/compare/1.0.95...1.0.96) --- updated-dependencies: - dependency-name: anyhow dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0b76942f..6c5851b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -68,9 +68,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.95" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" [[package]] name = "base64" diff --git a/Cargo.toml b/Cargo.toml index 2541f7cc..def0c7eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" cc = "1.2.15" -anyhow = "1.0.95" +anyhow = "1.0.96" clap = "4.5.31" encoding_rs = "0.8.35" tracing-subscriber = { version = "0.3", features = ["json"] } From 3997277b22462e39dc5a8c3389573877a5ee33cb Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Mon, 24 Feb 2025 16:48:20 +0000 Subject: [PATCH 06/65] Bump version to 0.1.100 [skip ci] --- Cargo.lock | 8 ++++---- examples/embeddings/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- llama-cpp-2/Cargo.toml | 2 +- llama-cpp-sys-2/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0b76942f..c0f04bfd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -277,7 +277,7 @@ checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "embeddings" -version = "0.1.99" +version = "0.1.100" dependencies = [ "anyhow", "clap", @@ -653,7 +653,7 @@ checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "llama-cpp-2" -version = "0.1.99" +version = "0.1.100" dependencies = [ "encoding_rs", "enumflags2", @@ -665,7 +665,7 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.99" +version = "0.1.100" dependencies = [ "bindgen", "cc", @@ -1105,7 +1105,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple" -version = "0.1.99" +version = "0.1.100" dependencies = [ "anyhow", "clap", diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index 07a22ddc..a87a1b0e 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings" -version = "0.1.99" +version = "0.1.100" edition = "2021" [dependencies] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index a891fc7b..70e7deb0 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simple" -version = "0.1.99" +version = "0.1.100" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index cb96b4f5..c703a2be 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-2" description = "llama.cpp bindings for Rust" -version = "0.1.99" +version = "0.1.100" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index 8ab91931..3175c736 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-sys-2" description = "Low Level Bindings to llama.cpp" -version = "0.1.99" +version = "0.1.100" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" From 2946b7f9b1cd4b7a1b2e7607344e48b0e5a9bf8e Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Mon, 24 Feb 2025 19:15:59 +0000 Subject: [PATCH 07/65] Bump version to 0.1.101 [skip ci] --- Cargo.lock | 8 ++++---- examples/embeddings/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- llama-cpp-2/Cargo.toml | 2 +- llama-cpp-sys-2/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 66cf2087..3f1c9f7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -277,7 +277,7 @@ checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "embeddings" -version = "0.1.100" +version = "0.1.101" dependencies = [ "anyhow", "clap", @@ -653,7 +653,7 @@ checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "llama-cpp-2" -version = "0.1.100" +version = "0.1.101" dependencies = [ "encoding_rs", "enumflags2", @@ -665,7 +665,7 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.100" +version = "0.1.101" dependencies = [ "bindgen", "cc", @@ -1105,7 +1105,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple" -version = "0.1.100" +version = "0.1.101" dependencies = [ "anyhow", "clap", diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index a87a1b0e..f1067b8d 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings" -version = "0.1.100" +version = "0.1.101" edition = "2021" [dependencies] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index 70e7deb0..b3173a21 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simple" -version = "0.1.100" +version = "0.1.101" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index c703a2be..27a34b2d 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-2" description = "llama.cpp bindings for Rust" -version = "0.1.100" +version = "0.1.101" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index 3175c736..fe45ce25 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-sys-2" description = "Low Level Bindings to llama.cpp" -version = "0.1.100" +version = "0.1.101" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" From b30abebcabfcdbe0a3cf9f511227710abc7707de Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Tue, 25 Feb 2025 11:45:56 +0100 Subject: [PATCH 08/65] explicitly apply optimization flags to msvc compiler also on other profiles than debug --- llama-cpp-sys-2/build.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index ec4ac7ce..5710c472 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -277,10 +277,7 @@ fn main() { config.define("GGML_BLAS", "OFF"); } - if (cfg!(debug_assertions) - || std::env::var("PROFILE").as_ref().map(String::as_str) == Ok("debug")) - && matches!(target_os, TargetOs::Windows(WindowsVariant::Msvc)) - && profile == "Release" + if (matches!(target_os, TargetOs::Windows(WindowsVariant::Msvc)) && matches!(profile.as_str(), "Release" | "RelWithDebInfo" | "MinSizeRel")) { // Debug Rust builds under MSVC turn off optimization even though we're ideally building the release profile of llama.cpp. // Looks like an upstream bug: From 1d49ebf6a784458105dede65a4f8440049623ad3 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Tue, 25 Feb 2025 12:22:03 +0100 Subject: [PATCH 09/65] use CString::new instead of Cstring::from_Str to init chat template --- llama-cpp-2/src/model.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index 6425dc79..9c83a795 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -4,7 +4,7 @@ use std::num::NonZeroU16; use std::os::raw::c_int; use std::path::Path; use std::ptr::NonNull; -use std::str::{FromStr, Utf8Error}; +use std::str::Utf8Error; use crate::context::params::LlamaContextParams; use crate::context::LlamaContext; @@ -47,7 +47,7 @@ impl LlamaChatTemplate { /// Create a new template from a string. This can either be the name of a llama.cpp [chat template](https://github.com/ggerganov/llama.cpp/blob/8a8c4ceb6050bd9392609114ca56ae6d26f5b8f5/src/llama-chat.cpp#L27-L61) /// like "chatml" or "llama3" or an actual Jinja template for llama.cpp to interpret. pub fn new(template: &str) -> Result { - Ok(Self(CString::from_str(template)?)) + Ok(Self(CString::new(template)?)) } /// Accesses the template as a c string reference. From cfa76bd567516d897aae85d868a3b26fc8295134 Mon Sep 17 00:00:00 2001 From: Vitali Lovich Date: Fri, 21 Feb 2025 18:11:36 -0800 Subject: [PATCH 10/65] Don't imply dynamic llama.cpp just because CUDA is on Link against CUDA statically as well to maintain consistency with GGML_STATIC although technically that's our discretion. --- Cargo.lock | 10 ++++++++++ llama-cpp-2/Cargo.toml | 1 + llama-cpp-sys-2/Cargo.toml | 5 ++++- llama-cpp-sys-2/build.rs | 31 ++++++++++++++++++++++++++++++- 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48f1a2a1..85918c3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -336,6 +336,15 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +[[package]] +name = "find_cuda_helper" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9f9e65c593dd01ac77daad909ea4ad17f0d6d1776193fc8ea766356177abdad" +dependencies = [ + "glob", +] + [[package]] name = "flate2" version = "1.0.30" @@ -670,6 +679,7 @@ dependencies = [ "bindgen", "cc", "cmake", + "find_cuda_helper", "glob", "walkdir", ] diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index e836af25..97028540 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -21,6 +21,7 @@ encoding_rs = { workspace = true } [features] default = ["openmp", "android-shared-stdcxx"] cuda = ["llama-cpp-sys-2/cuda"] +cuda-no-vmm = ["cuda", "llama-cpp-sys-2/cuda-no-vmm"] metal = ["llama-cpp-sys-2/metal"] dynamic-link = ["llama-cpp-sys-2/dynamic-link"] vulkan = ["llama-cpp-sys-2/vulkan"] diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index 5cf25353..6854794d 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -74,15 +74,18 @@ include = [ bindgen = { workspace = true } cc = { workspace = true, features = ["parallel"] } cmake = "0.1" +find_cuda_helper = "0.2.0" glob = "0.3.2" walkdir = "2" [features] cuda = [] +# Disables the need to dynamically link against libcuda.so / cuda.dll +cuda-no-vmm = ["cuda"] metal = [] dynamic-link = [] vulkan = [] native = [] openmp = [] # Only has an impact on Android. -shared-stdcxx = [] +shared-stdcxx = [] \ No newline at end of file diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index ec4ac7ce..206baddf 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -179,7 +179,7 @@ fn main() { let target_dir = get_cargo_target_dir().unwrap(); let manifest_dir = env::var("CARGO_MANIFEST_DIR").expect("Failed to get CARGO_MANIFEST_DIR"); let llama_src = Path::new(&manifest_dir).join("llama.cpp"); - let build_shared_libs = cfg!(feature = "cuda") || cfg!(feature = "dynamic-link"); + let build_shared_libs = cfg!(feature = "dynamic-link"); let build_shared_libs = std::env::var("LLAMA_BUILD_SHARED_LIBS") .map(|v| v == "1") @@ -355,6 +355,10 @@ fn main() { if cfg!(feature = "cuda") { config.define("GGML_CUDA", "ON"); + + if cfg!(feature = "cuda-no-vmm") { + config.define("GGML_CUDA_NO_VMM", "ON"); + } } // Android doesn't have OpenMP support AFAICT and openmp is a default feature. Do this here @@ -394,6 +398,31 @@ fn main() { ); println!("cargo:rustc-link-search={}", build_dir.display()); + if cfg!(feature = "cuda") && !build_shared_libs { + println!("cargo:rerun-if-env-changed=CUDA_PATH"); + + for lib_dir in find_cuda_helper::find_cuda_lib_dirs() { + println!("cargo:rustc-link-search=native={}", lib_dir.display()); + } + + // Logic from ggml-cuda/CMakeLists.txt + println!("cargo:rustc-link-lib=static=cudart_static"); + if matches!(target_os, TargetOs::Windows(_)) { + println!("cargo:rustc-link-lib=static=cublas"); + println!("cargo:rustc-link-lib=static=cublasLt"); + } else { + println!("cargo:rustc-link-lib=static=cublas_static"); + println!("cargo:rustc-link-lib=static=cublasLt_static"); + } + + // Need to link against libcuda.so unless GGML_CUDA_NO_VMM is defined. + if !cfg!(feature = "cuda-no-vmm") { + println!("cargo:rustc-link-lib=cuda"); + } + + println!("cargo:rustc-link-lib=static=culibos"); + } + // Link libraries let llama_libs_kind = if build_shared_libs { "dylib" } else { "static" }; let llama_libs = extract_lib_names(&out_dir, build_shared_libs); From ef3a8a37f59245bb65e98702ba6f3bde717b1396 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 25 Feb 2025 19:56:04 +0000 Subject: [PATCH 11/65] Bump version to 0.1.102 [skip ci] --- Cargo.lock | 8 ++++---- examples/embeddings/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- llama-cpp-2/Cargo.toml | 2 +- llama-cpp-sys-2/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 37fcddd5..447bf77f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -277,7 +277,7 @@ checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "embeddings" -version = "0.1.101" +version = "0.1.102" dependencies = [ "anyhow", "clap", @@ -662,7 +662,7 @@ checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "llama-cpp-2" -version = "0.1.101" +version = "0.1.102" dependencies = [ "encoding_rs", "enumflags2", @@ -674,7 +674,7 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.101" +version = "0.1.102" dependencies = [ "bindgen", "cc", @@ -1115,7 +1115,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple" -version = "0.1.101" +version = "0.1.102" dependencies = [ "anyhow", "clap", diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index f1067b8d..c96551ec 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings" -version = "0.1.101" +version = "0.1.102" edition = "2021" [dependencies] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index b3173a21..cb5d4e07 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simple" -version = "0.1.101" +version = "0.1.102" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index 8235ca03..d85ca5af 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-2" description = "llama.cpp bindings for Rust" -version = "0.1.101" +version = "0.1.102" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index d37a143a..0d07eefd 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-sys-2" description = "Low Level Bindings to llama.cpp" -version = "0.1.101" +version = "0.1.102" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" From 748d58d05774d049bf91ee963ef7405d8d332618 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 25 Feb 2025 20:50:58 +0000 Subject: [PATCH 12/65] Bump version to 0.1.103 [skip ci] --- Cargo.lock | 8 ++++---- examples/embeddings/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- llama-cpp-2/Cargo.toml | 2 +- llama-cpp-sys-2/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 447bf77f..f581047b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -277,7 +277,7 @@ checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "embeddings" -version = "0.1.102" +version = "0.1.103" dependencies = [ "anyhow", "clap", @@ -662,7 +662,7 @@ checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "llama-cpp-2" -version = "0.1.102" +version = "0.1.103" dependencies = [ "encoding_rs", "enumflags2", @@ -674,7 +674,7 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.102" +version = "0.1.103" dependencies = [ "bindgen", "cc", @@ -1115,7 +1115,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple" -version = "0.1.102" +version = "0.1.103" dependencies = [ "anyhow", "clap", diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index c96551ec..c3c5b533 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings" -version = "0.1.102" +version = "0.1.103" edition = "2021" [dependencies] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index cb5d4e07..98867c57 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simple" -version = "0.1.102" +version = "0.1.103" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index d85ca5af..bf52467c 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-2" description = "llama.cpp bindings for Rust" -version = "0.1.102" +version = "0.1.103" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index 0d07eefd..6db13f98 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-sys-2" description = "Low Level Bindings to llama.cpp" -version = "0.1.102" +version = "0.1.103" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" From 727419ab4d30e7a53c482bbf44428d41c61e8e9a Mon Sep 17 00:00:00 2001 From: Kusaanko <39370373+kusaanko@users.noreply.github.com> Date: Wed, 26 Feb 2025 18:28:33 +0900 Subject: [PATCH 13/65] Fix to check should be the event recorded --- llama-cpp-2/src/log.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/llama-cpp-2/src/log.rs b/llama-cpp-2/src/log.rs index 1c324b4b..e77f94bb 100644 --- a/llama-cpp-2/src/log.rs +++ b/llama-cpp-2/src/log.rs @@ -142,16 +142,18 @@ impl State { let (meta, fields) = meta_for_level(level); tracing::dispatcher::get_default(|dispatcher| { - dispatcher.event(&tracing::Event::new( - meta, - &meta.fields().value_set(&[ - (&fields.message, Some(&text as &dyn tracing::field::Value)), - ( - &fields.target, - module.as_ref().map(|s| s as &dyn tracing::field::Value), - ), - ]), - )); + if dispatcher.enabled(meta) { + dispatcher.event(&tracing::Event::new( + meta, + &meta.fields().value_set(&[ + (&fields.message, Some(&text as &dyn tracing::field::Value)), + ( + &fields.target, + module.as_ref().map(|s| s as &dyn tracing::field::Value), + ), + ]), + )); + } }); } From 5f3a29ed9445a50a64acc4d331c8a1e914e0fe91 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Wed, 26 Feb 2025 11:58:49 +0100 Subject: [PATCH 14/65] copy src into target, not src --- llama-cpp-sys-2/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index 6179ab60..e2a56b38 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -379,7 +379,7 @@ fn main() { std::fs::rename(&build_info_src,&build_info_target).unwrap_or_else(|move_e| { // Rename may fail if the target directory is on a different filesystem/disk from the source. // Fall back to copy + delete to achieve the same effect in this case. - std::fs::copy(&build_info_src, &build_info_src).unwrap_or_else(|copy_e| { + std::fs::copy(&build_info_src, &build_info_target).unwrap_or_else(|copy_e| { panic!("Failed to rename {build_info_src:?} to {build_info_target:?}. Move failed with {move_e:?} and copy failed with {copy_e:?}"); }); std::fs::remove_file(&build_info_src).unwrap_or_else(|e| { From bde92481525aa4c21c3de74911b46f4615880e2e Mon Sep 17 00:00:00 2001 From: Vitali Lovich Date: Fri, 28 Feb 2025 22:36:03 -0800 Subject: [PATCH 15/65] Expose n_head_kv --- llama-cpp-2/src/model.rs | 7 +++++++ llama-cpp-sys-2/llama.cpp | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index 9c83a795..69e938c5 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -484,6 +484,13 @@ impl LlamaModel { u32::try_from(unsafe { llama_cpp_sys_2::llama_model_n_head(self.model.as_ptr()) }).unwrap() } + /// Returns the number of KV attention heads. + pub fn n_head_kv(&self) -> u32 { + // It's never possible for this to panic because while the API interface is defined as an int32_t, + // the field it's accessing is a uint32_t. + u32::try_from(unsafe { llama_cpp_sys_2::llama_model_n_head_kv(self.model.as_ptr()) }).unwrap() + } + /// Returns the rope type of the model. pub fn rope_type(&self) -> Option { match unsafe { llama_cpp_sys_2::llama_model_rope_type(self.model.as_ptr()) } { diff --git a/llama-cpp-sys-2/llama.cpp b/llama-cpp-sys-2/llama.cpp index 300907b2..06c2b156 160000 --- a/llama-cpp-sys-2/llama.cpp +++ b/llama-cpp-sys-2/llama.cpp @@ -1 +1 @@ -Subproject commit 300907b2110cc17b4337334dc397e05de2d8f5e0 +Subproject commit 06c2b1561d8b882bc018554591f8c35eb04ad30e From 4809af31e29828565733138538ce8651dc92718a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Mar 2025 05:09:39 +0000 Subject: [PATCH 16/65] chore(deps): bump docker/setup-qemu-action from 3.4.0 to 3.6.0 Bumps [docker/setup-qemu-action](https://github.com/docker/setup-qemu-action) from 3.4.0 to 3.6.0. - [Release notes](https://github.com/docker/setup-qemu-action/releases) - [Commits](https://github.com/docker/setup-qemu-action/compare/4574d27a4764455b42196d70a065bc6853246a25...29109295f81e9208d7d86ff1c6c12d2833863392) --- updated-dependencies: - dependency-name: docker/setup-qemu-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/llama-cpp-rs-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llama-cpp-rs-check.yml b/.github/workflows/llama-cpp-rs-check.yml index 170a1d29..62f0a0ab 100644 --- a/.github/workflows/llama-cpp-rs-check.yml +++ b/.github/workflows/llama-cpp-rs-check.yml @@ -45,7 +45,7 @@ jobs: - name: checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Setup QEMU - uses: docker/setup-qemu-action@4574d27a4764455b42196d70a065bc6853246a25 + uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 with: platforms: arm64,amd64 - name: Set up Docker Buildx From c8a569086accd2821026dfaf840f5125c7397aaa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Mar 2025 05:58:20 +0000 Subject: [PATCH 17/65] chore(deps): bump anyhow from 1.0.96 to 1.0.97 Bumps [anyhow](https://github.com/dtolnay/anyhow) from 1.0.96 to 1.0.97. - [Release notes](https://github.com/dtolnay/anyhow/releases) - [Commits](https://github.com/dtolnay/anyhow/compare/1.0.96...1.0.97) --- updated-dependencies: - dependency-name: anyhow dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f581047b..abf819d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -68,9 +68,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.96" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" +checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" [[package]] name = "base64" diff --git a/Cargo.toml b/Cargo.toml index def0c7eb..74629883 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" cc = "1.2.15" -anyhow = "1.0.96" +anyhow = "1.0.97" clap = "4.5.31" encoding_rs = "0.8.35" tracing-subscriber = { version = "0.3", features = ["json"] } From 792828aa10caa0ad9190eb0bdefaab887add06c0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 5 Mar 2025 23:27:13 +0000 Subject: [PATCH 18/65] chore(deps): bump docker/setup-buildx-action from 3.9.0 to 3.10.0 Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 3.9.0 to 3.10.0. - [Release notes](https://github.com/docker/setup-buildx-action/releases) - [Commits](https://github.com/docker/setup-buildx-action/compare/f7ce87c1d6bead3e36075b2ce75da1f6cc28aaca...b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2) --- updated-dependencies: - dependency-name: docker/setup-buildx-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/llama-cpp-rs-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llama-cpp-rs-check.yml b/.github/workflows/llama-cpp-rs-check.yml index 62f0a0ab..5277c82d 100644 --- a/.github/workflows/llama-cpp-rs-check.yml +++ b/.github/workflows/llama-cpp-rs-check.yml @@ -49,7 +49,7 @@ jobs: with: platforms: arm64,amd64 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@f7ce87c1d6bead3e36075b2ce75da1f6cc28aaca + uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 - name: Build uses: docker/build-push-action@v6 with: From 958a1d0f1d1f4def8df70e7a11952a8a0bcddd1f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 5 Mar 2025 23:27:21 +0000 Subject: [PATCH 19/65] chore(deps): bump cc from 1.2.15 to 1.2.16 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.2.15 to 1.2.16. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.15...cc-v1.2.16) --- updated-dependencies: - dependency-name: cc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index abf819d6..bef01d80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,9 +109,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "cc" -version = "1.2.15" +version = "1.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af" +checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" dependencies = [ "jobserver", "libc", diff --git a/Cargo.toml b/Cargo.toml index 74629883..f7d4fa9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ hf-hub = { version = "0.3.2" } criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" -cc = "1.2.15" +cc = "1.2.16" anyhow = "1.0.97" clap = "4.5.31" encoding_rs = "0.8.35" From 4ba3962587081a458ce521ed8f72226b2dcd184e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 7 Mar 2025 17:05:02 +0000 Subject: [PATCH 20/65] chore(deps): bump ring from 0.17.8 to 0.17.13 Bumps [ring](https://github.com/briansmith/ring) from 0.17.8 to 0.17.13. - [Changelog](https://github.com/briansmith/ring/blob/main/RELEASES.md) - [Commits](https://github.com/briansmith/ring/commits) --- updated-dependencies: - dependency-name: ring dependency-type: indirect ... Signed-off-by: dependabot[bot] --- Cargo.lock | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bef01d80..ed22933b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -635,7 +635,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.52.5", + "windows-targets 0.48.5", ] [[package]] @@ -957,15 +957,14 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.8" +version = "0.17.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee" dependencies = [ "cc", "cfg-if", "getrandom", "libc", - "spin", "untrusted", "windows-sys 0.52.0", ] @@ -1131,12 +1130,6 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -1437,7 +1430,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.48.0", ] [[package]] From e1b544819093e06a1b04a8cbcd3da157820067a0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 17 Mar 2025 06:03:15 +0000 Subject: [PATCH 21/65] chore(deps): bump clap from 4.5.31 to 4.5.32 Bumps [clap](https://github.com/clap-rs/clap) from 4.5.31 to 4.5.32. - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/v4.5.31...clap_complete-v4.5.32) --- updated-dependencies: - dependency-name: clap dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 16 ++++++++-------- Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ed22933b..5e97a30f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,9 +146,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.31" +version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "027bb0d98429ae334a8698531da7077bdf906419543a35a55c2cb1b66437d767" +checksum = "6088f3ae8c3608d19260cd7445411865a485688711b78b5be70d78cd96136f83" dependencies = [ "clap_builder", "clap_derive", @@ -156,9 +156,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.31" +version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5589e0cba072e0f3d23791efac0fd8627b49c829c196a492e88168e6a669d863" +checksum = "22a7ef7f676155edfb82daa97f99441f3ebf4a58d5e32f295a56259f1b6facc8" dependencies = [ "anstream", "anstyle", @@ -168,9 +168,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.28" +version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" dependencies = [ "heck", "proc-macro2", @@ -635,7 +635,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.5", ] [[package]] @@ -1430,7 +1430,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f7d4fa9e..d5854438 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ pprof = "0.13.0" bindgen = "0.69.5" cc = "1.2.16" anyhow = "1.0.97" -clap = "4.5.31" +clap = "4.5.32" encoding_rs = "0.8.35" tracing-subscriber = { version = "0.3", features = ["json"] } From 15882655ca7dbf9e99254425554d9c2f86e5d971 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Mar 2025 05:31:16 +0000 Subject: [PATCH 22/65] chore(deps): bump cc from 1.2.16 to 1.2.17 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.2.16 to 1.2.17. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.16...cc-v1.2.17) --- updated-dependencies: - dependency-name: cc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e97a30f..26391b55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,9 +109,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "cc" -version = "1.2.16" +version = "1.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" +checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" dependencies = [ "jobserver", "libc", diff --git a/Cargo.toml b/Cargo.toml index d5854438..615fe4fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ hf-hub = { version = "0.3.2" } criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" -cc = "1.2.16" +cc = "1.2.17" anyhow = "1.0.97" clap = "4.5.32" encoding_rs = "0.8.35" From 21eee351b0a06170cc6fe9e80121bf05da37b047 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Mon, 24 Mar 2025 23:47:52 +0100 Subject: [PATCH 23/65] reimplement get_chat_template --- llama-cpp-2/src/lib.rs | 11 +++-- llama-cpp-2/src/model.rs | 93 ++++++++++----------------------------- llama-cpp-sys-2/llama.cpp | 2 +- 3 files changed, 33 insertions(+), 73 deletions(-) diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs index 3d79337f..6e251728 100644 --- a/llama-cpp-2/src/lib.rs +++ b/llama-cpp-2/src/lib.rs @@ -69,9 +69,14 @@ pub enum LLamaCppError { /// There was an error while getting the chat template from a model. #[derive(Debug, Eq, PartialEq, thiserror::Error)] pub enum ChatTemplateError { - /// gguf has no chat template - #[error("the model has no meta val - returned code {0}")] - MissingTemplate(i32), + /// gguf has no chat template (by that name) + #[error("chat template not found - returned null pointer")] + MissingTemplate, + + /// chat template contained a null byte + #[error("null byte in string {0}")] + NullError(#[from] NulError), + /// The chat template was not valid utf8. #[error(transparent)] Utf8Error(#[from] std::str::Utf8Error), diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index 69e938c5..e566e400 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -506,83 +506,38 @@ impl LlamaModel { } } - fn get_chat_template_impl( - &self, - capacity: usize, - ) -> Result { - // longest known template is about 1200 bytes from llama.cpp - // TODO: Once MaybeUninit support is better, this can be converted to use that instead of dummy initializing such a large array. - let mut chat_temp = vec![b'*' as u8; capacity]; - let chat_name = - CStr::from_bytes_with_nul(b"tokenizer.chat_template\0").expect("should have null byte"); - - let ret = unsafe { - llama_cpp_sys_2::llama_model_meta_val_str( - self.model.as_ptr(), - chat_name.as_ptr(), - chat_temp.as_mut_ptr() as *mut c_char, - chat_temp.len(), - ) - }; - - if ret < 0 { - return Err(InternalChatTemplateError::Permanent( - ChatTemplateError::MissingTemplate(ret), - )); - } - - let returned_len = ret as usize; - - if ret as usize >= capacity { - // >= is important because if the returned length is equal to capacity, it means we're missing a trailing null - // since the returned length doesn't count the trailing null. - return Err(InternalChatTemplateError::RetryWithLargerBuffer( - returned_len, - )); - } - - assert_eq!( - chat_temp.get(returned_len), - Some(&0), - "should end with null byte" - ); - - chat_temp.resize(returned_len + 1, 0); - - Ok(LlamaChatTemplate(unsafe { - CString::from_vec_with_nul_unchecked(chat_temp) - })) - } - - /// Get chat template from model. If this fails, you may either want to fail to chat or pick the - /// specific shortcode that llama.cpp supports templates it has baked-in directly into its codebase - /// as fallbacks when the model doesn't contain. NOTE: If you don't specify a chat template, then - /// it uses chatml by default which is unlikely to actually be the correct template for your model - /// and you'll get weird results back. + /// Get chat template from model by name. If the name is None, the default chat template will be returned. /// /// You supply this into [Self::apply_chat_template] to get back a string with the appropriate template /// substitution applied to convert a list of messages into a prompt the LLM can use to complete /// the chat. /// + /// You could also use an external jinja parser, like minijinja, to parse jinja templates not + /// supported by the llama.cpp template engine.. + /// /// # Errors /// - /// * If the model has no chat template + /// * If the model has no chat template by that name /// * If the chat template is not a valid [`CString`]. - #[allow(clippy::missing_panics_doc)] // we statically know this will not panic as - pub fn get_chat_template(&self) -> Result { - // Typical chat templates are quite small. Let's start with a small allocation likely to succeed. - // Ideally the performance of this would be negligible but uninitialized arrays in Rust are currently - // still not well supported so we end up initializing the chat template buffer twice. One idea might - // be to use a very small value here that will likely fail (like 0 or 1) and then use that to initialize. - // Not sure which approach is the most optimal but in practice this should work well. - match self.get_chat_template_impl(200) { - Ok(t) => Ok(t), - Err(InternalChatTemplateError::Permanent(e)) => Err(e), - Err(InternalChatTemplateError::RetryWithLargerBuffer(actual_len)) => match self.get_chat_template_impl(actual_len + 1) { - Ok(t) => Ok(t), - Err(InternalChatTemplateError::Permanent(e)) => Err(e), - Err(InternalChatTemplateError::RetryWithLargerBuffer(unexpected_len)) => panic!("Was told that the template length was {actual_len} but now it's {unexpected_len}"), - } + pub fn get_chat_template( + &self, + name: Option<&str>, + ) -> Result { + let name_cstr = name.map(CString::new); + let name_ptr = match name_cstr { + Some(Ok(name)) => name.as_ptr(), + _ => std::ptr::null(), + }; + let result = + unsafe { llama_cpp_sys_2::llama_model_chat_template(self.model.as_ptr(), name_ptr) }; + + // Convert result to Rust String if not null + if result.is_null() { + Err(ChatTemplateError::MissingTemplate) + } else { + let chat_template_cstr = unsafe { CStr::from_ptr(result) }; + let chat_template = CString::new(chat_template_cstr.to_bytes())?; + Ok(LlamaChatTemplate(chat_template)) } } diff --git a/llama-cpp-sys-2/llama.cpp b/llama-cpp-sys-2/llama.cpp index 06c2b156..2b65ae30 160000 --- a/llama-cpp-sys-2/llama.cpp +++ b/llama-cpp-sys-2/llama.cpp @@ -1 +1 @@ -Subproject commit 06c2b1561d8b882bc018554591f8c35eb04ad30e +Subproject commit 2b65ae30299b9c67e25c51ee567e9a2ef22279ab From 6c2640f1c3e0e9a295a92d3ddb88ccad5ee5782e Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Tue, 25 Mar 2025 12:07:14 +0100 Subject: [PATCH 24/65] rename get_chat_template to chat_template --- llama-cpp-2/src/model.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index e566e400..dd99d198 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -506,20 +506,20 @@ impl LlamaModel { } } - /// Get chat template from model by name. If the name is None, the default chat template will be returned. + /// Get chat template from model by name. If the name parameter is None, the default chat template will be returned. /// /// You supply this into [Self::apply_chat_template] to get back a string with the appropriate template /// substitution applied to convert a list of messages into a prompt the LLM can use to complete /// the chat. /// - /// You could also use an external jinja parser, like minijinja, to parse jinja templates not - /// supported by the llama.cpp template engine.. + /// You could also use an external jinja parser, like [minijinja](https://github.com/mitsuhiko/minijinja), + /// to parse jinja templates not supported by the llama.cpp template engine. /// /// # Errors /// /// * If the model has no chat template by that name /// * If the chat template is not a valid [`CString`]. - pub fn get_chat_template( + fn chat_template( &self, name: Option<&str>, ) -> Result { From eaf0782551c22fa1ae241ec8f2add7c8e5962b59 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Wed, 26 Mar 2025 11:23:17 +0100 Subject: [PATCH 25/65] make LlamaModel::chat_template public again --- llama-cpp-2/src/model.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index dd99d198..669ccf02 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -519,7 +519,7 @@ impl LlamaModel { /// /// * If the model has no chat template by that name /// * If the chat template is not a valid [`CString`]. - fn chat_template( + pub fn chat_template( &self, name: Option<&str>, ) -> Result { From 6ed6248b091502ff072d11916c5bb3145a669ad2 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Wed, 26 Mar 2025 15:54:51 +0100 Subject: [PATCH 26/65] rename references to get_chat_template in doc strings --- llama-cpp-2/src/model.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index 669ccf02..00b5bea6 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -36,7 +36,7 @@ pub struct LlamaLoraAdapter { pub(crate) lora_adapter: NonNull, } -/// A performance-friendly wrapper around [LlamaModel::get_chat_template] which is then +/// A performance-friendly wrapper around [LlamaModel::chat_template] which is then /// fed into [LlamaModel::apply_chat_template] to convert a list of messages into an LLM /// prompt. Internally the template is stored as a CString to avoid round-trip conversions /// within the FFI. @@ -627,7 +627,7 @@ impl LlamaModel { /// use "chatml", then just do `LlamaChatTemplate::new("chatml")` or any other model name or template /// string. /// - /// Use [Self::get_chat_template] to retrieve the template baked into the model (this is the preferred + /// Use [Self::chat_template] to retrieve the template baked into the model (this is the preferred /// mechanism as using the wrong chat template can result in really unexpected responses from the LLM). /// /// You probably want to set `add_ass` to true so that the generated template string ends with a the From 593257eb3d2621dd058e33e4a57e0390750fa1bb Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Wed, 26 Mar 2025 16:00:25 +0100 Subject: [PATCH 27/65] remove unused error type --- llama-cpp-2/src/lib.rs | 6 ------ llama-cpp-2/src/model.rs | 5 ++--- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs index 6e251728..a1857950 100644 --- a/llama-cpp-2/src/lib.rs +++ b/llama-cpp-2/src/lib.rs @@ -82,12 +82,6 @@ pub enum ChatTemplateError { Utf8Error(#[from] std::str::Utf8Error), } -enum InternalChatTemplateError { - Permanent(ChatTemplateError), - /// the buffer was too small. - RetryWithLargerBuffer(usize), -} - /// Failed to Load context #[derive(Debug, Eq, PartialEq, thiserror::Error)] pub enum LlamaContextLoadError { diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index 00b5bea6..cb4a33bd 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -13,9 +13,8 @@ use crate::model::params::LlamaModelParams; use crate::token::LlamaToken; use crate::token_type::{LlamaTokenAttr, LlamaTokenAttrs}; use crate::{ - ApplyChatTemplateError, ChatTemplateError, InternalChatTemplateError, LlamaContextLoadError, - LlamaLoraAdapterInitError, LlamaModelLoadError, NewLlamaChatMessageError, StringToTokenError, - TokenToStringError, + ApplyChatTemplateError, ChatTemplateError, LlamaContextLoadError, LlamaLoraAdapterInitError, + LlamaModelLoadError, NewLlamaChatMessageError, StringToTokenError, TokenToStringError, }; pub mod params; From ad4f7f5ab7372a1788d52c0136ab4c34d35e0110 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 31 Mar 2025 05:41:54 +0000 Subject: [PATCH 28/65] chore(deps): bump clap from 4.5.32 to 4.5.34 Bumps [clap](https://github.com/clap-rs/clap) from 4.5.32 to 4.5.34. - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.32...clap_complete-v4.5.34) --- updated-dependencies: - dependency-name: clap dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 26391b55..1e0f2685 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,9 +146,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.32" +version = "4.5.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6088f3ae8c3608d19260cd7445411865a485688711b78b5be70d78cd96136f83" +checksum = "e958897981290da2a852763fe9cdb89cd36977a5d729023127095fa94d95e2ff" dependencies = [ "clap_builder", "clap_derive", @@ -156,9 +156,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.32" +version = "4.5.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22a7ef7f676155edfb82daa97f99441f3ebf4a58d5e32f295a56259f1b6facc8" +checksum = "83b0f35019843db2160b5bb19ae09b4e6411ac33fc6a712003c33e03090e2489" dependencies = [ "anstream", "anstyle", diff --git a/Cargo.toml b/Cargo.toml index 615fe4fd..1bc7de90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ pprof = "0.13.0" bindgen = "0.69.5" cc = "1.2.17" anyhow = "1.0.97" -clap = "4.5.32" +clap = "4.5.34" encoding_rs = "0.8.35" tracing-subscriber = { version = "0.3", features = ["json"] } From cdb2cef9c9ae9e5cfd8f5463697b71b871d6816c Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Wed, 26 Mar 2025 15:53:18 +0100 Subject: [PATCH 29/65] implement metadata fetching methods on LlamaModel --- llama-cpp-2/src/lib.rs | 15 +++++++ llama-cpp-2/src/model.rs | 94 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 2 deletions(-) diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs index a1857950..3f3d7c00 100644 --- a/llama-cpp-2/src/lib.rs +++ b/llama-cpp-2/src/lib.rs @@ -82,6 +82,21 @@ pub enum ChatTemplateError { Utf8Error(#[from] std::str::Utf8Error), } +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +pub enum MetaValError { + #[error("model does not have metadata key: {0}")] + MissingKey(String), + + #[error("null byte in string {0}")] + NullError(#[from] NulError), + + #[error("FromUtf8Error {0}")] + FromUtf8Error(#[from] FromUtf8Error), + + #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")] + NegativeReturn(i32), +} + /// Failed to Load context #[derive(Debug, Eq, PartialEq, thiserror::Error)] pub enum LlamaContextLoadError { diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index cb4a33bd..b8cd26bb 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -13,8 +13,9 @@ use crate::model::params::LlamaModelParams; use crate::token::LlamaToken; use crate::token_type::{LlamaTokenAttr, LlamaTokenAttrs}; use crate::{ - ApplyChatTemplateError, ChatTemplateError, LlamaContextLoadError, LlamaLoraAdapterInitError, - LlamaModelLoadError, NewLlamaChatMessageError, StringToTokenError, TokenToStringError, + ApplyChatTemplateError, ChatTemplateError, LlamaContextLoadError, + LlamaLoraAdapterInitError, LlamaModelLoadError, MetaValError, NewLlamaChatMessageError, + StringToTokenError, TokenToStringError, }; pub mod params; @@ -490,6 +491,59 @@ impl LlamaModel { u32::try_from(unsafe { llama_cpp_sys_2::llama_model_n_head_kv(self.model.as_ptr()) }).unwrap() } + /// Get metadata value as a string by key name + pub fn meta_val_str(&self, key: &str) -> Result { + let key_cstring = CString::new(key)?; + let key_ptr = key_cstring.as_ptr(); + + extract_meta_string( + |buf_ptr, buf_len| unsafe { + llama_cpp_sys_2::llama_model_meta_val_str( + self.model.as_ptr(), + key_ptr, + buf_ptr, + buf_len, + ) + }, + 256, + ) + } + + /// Get the number of metadata key/value pairs + pub fn meta_count(&self) -> i32 { + unsafe { llama_cpp_sys_2::llama_model_meta_count(self.model.as_ptr()) } + } + + /// Get metadata key name by index + pub fn meta_key_by_index(&self, index: i32) -> Result { + extract_meta_string( + |buf_ptr, buf_len| unsafe { + llama_cpp_sys_2::llama_model_meta_key_by_index( + self.model.as_ptr(), + index, + buf_ptr, + buf_len, + ) + }, + 256, + ) + } + + /// Get metadata value as a string by index + pub fn meta_val_str_by_index(&self, index: i32) -> Result { + extract_meta_string( + |buf_ptr, buf_len| unsafe { + llama_cpp_sys_2::llama_model_meta_val_str_by_index( + self.model.as_ptr(), + index, + buf_ptr, + buf_len, + ) + }, + 256, + ) + } + /// Returns the rope type of the model. pub fn rope_type(&self) -> Option { match unsafe { llama_cpp_sys_2::llama_model_rope_type(self.model.as_ptr()) } { @@ -690,6 +744,42 @@ impl LlamaModel { } } +/// Generic helper function for extracting string values from the C API +/// This are specifically useful for the the metadata functions, where we pass in a buffer +/// to be populated by a string, not yet knowing if the buffer is large enough. +/// If the buffer was not large enough, we get the correct length back, which can be used to +/// construct a buffer of appropriate size. +fn extract_meta_string(c_function: F, capacity: usize) -> Result +where + F: Fn(*mut c_char, usize) -> i32, +{ + let mut buffer = vec![0u8; capacity]; + + // call the foreign function + let result = c_function(buffer.as_mut_ptr() as *mut c_char, buffer.len()); + if result < 0 { + return Err(MetaValError::NegativeReturn(result)); + } + + // check if the response fit in our buffer + let returned_len = result as usize; + if returned_len >= capacity { + // buffer wasn't large enough, try again with the correct capacity. + return extract_meta_string(c_function, returned_len + 1); + } + + // verify null termination + debug_assert_eq!( + buffer.get(returned_len), + Some(&0), + "should end with null byte" + ); + + // resize, convert, and return + buffer.truncate(returned_len); + Ok(String::from_utf8(buffer)?) +} + impl Drop for LlamaModel { fn drop(&mut self) { unsafe { llama_cpp_sys_2::llama_free_model(self.model.as_ptr()) } From c33b4a59b0c4f61d6f911b8cc1a400f0971c8e4a Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Wed, 26 Mar 2025 16:43:32 +0100 Subject: [PATCH 30/65] doc comments on MetaValError, remove unused variant --- llama-cpp-2/src/lib.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs index 3f3d7c00..1ffbca2b 100644 --- a/llama-cpp-2/src/lib.rs +++ b/llama-cpp-2/src/lib.rs @@ -82,17 +82,18 @@ pub enum ChatTemplateError { Utf8Error(#[from] std::str::Utf8Error), } +/// Failed fetching metadata value #[derive(Debug, Eq, PartialEq, thiserror::Error)] pub enum MetaValError { - #[error("model does not have metadata key: {0}")] - MissingKey(String), - + /// The provided string contains an unexpected null-byte #[error("null byte in string {0}")] NullError(#[from] NulError), + /// The returned data contains invalid UTF8 data #[error("FromUtf8Error {0}")] FromUtf8Error(#[from] FromUtf8Error), + /// Got negative return value. This happens if the key or index queried does not exist. #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")] NegativeReturn(i32), } From 5369ff22cf66d74b9bcb47c8c970cb516dfa2329 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 4 Apr 2025 20:54:56 +0000 Subject: [PATCH 31/65] chore(deps): bump openssl from 0.10.70 to 0.10.72 Bumps [openssl](https://github.com/sfackler/rust-openssl) from 0.10.70 to 0.10.72. - [Release notes](https://github.com/sfackler/rust-openssl/releases) - [Commits](https://github.com/sfackler/rust-openssl/compare/openssl-v0.10.70...openssl-v0.10.72) --- updated-dependencies: - dependency-name: openssl dependency-version: 0.10.72 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e0f2685..25a5d202 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -762,9 +762,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "openssl" -version = "0.10.70" +version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61cfb4e166a8bb8c9b55c500bc2308550148ece889be90f609377e58140f42c6" +checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ "bitflags", "cfg-if", @@ -794,9 +794,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.105" +version = "0.9.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b22d5b84be05a8d6947c7cb71f7c849aa0f112acd4bf51c2a7c1c988ac0a9dc" +checksum = "8288979acd84749c744a9014b4382d42b8f7b2592847b5afb2ed29e5d16ede07" dependencies = [ "cc", "libc", From 26a3ec79ffab91d45a6df1875d02ffc716a32396 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Apr 2025 06:16:05 +0000 Subject: [PATCH 32/65] chore(deps): bump clap from 4.5.34 to 4.5.35 Bumps [clap](https://github.com/clap-rs/clap) from 4.5.34 to 4.5.35. - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.34...clap_complete-v4.5.35) --- updated-dependencies: - dependency-name: clap dependency-version: 4.5.35 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 25a5d202..7205fa1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,9 +146,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.34" +version = "4.5.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e958897981290da2a852763fe9cdb89cd36977a5d729023127095fa94d95e2ff" +checksum = "d8aa86934b44c19c50f87cc2790e19f54f7a67aedb64101c2e1a2e5ecfb73944" dependencies = [ "clap_builder", "clap_derive", @@ -156,9 +156,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.34" +version = "4.5.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83b0f35019843db2160b5bb19ae09b4e6411ac33fc6a712003c33e03090e2489" +checksum = "2414dbb2dd0695280da6ea9261e327479e9d37b0630f6b53ba2a11c60c679fd9" dependencies = [ "anstream", "anstyle", diff --git a/Cargo.toml b/Cargo.toml index 1bc7de90..5cedd4e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ pprof = "0.13.0" bindgen = "0.69.5" cc = "1.2.17" anyhow = "1.0.97" -clap = "4.5.34" +clap = "4.5.35" encoding_rs = "0.8.35" tracing-subscriber = { version = "0.3", features = ["json"] } From c9cbb3dc45af7a60c2420e9fa6b8aa3cfae1bcfc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Apr 2025 06:16:29 +0000 Subject: [PATCH 33/65] chore(deps): bump cc from 1.2.17 to 1.2.18 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.2.17 to 1.2.18. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.17...cc-v1.2.18) --- updated-dependencies: - dependency-name: cc dependency-version: 1.2.18 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 25a5d202..0972b987 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,9 +109,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "cc" -version = "1.2.17" +version = "1.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" +checksum = "525046617d8376e3db1deffb079e91cef90a89fc3ca5c185bbf8c9ecdd15cd5c" dependencies = [ "jobserver", "libc", diff --git a/Cargo.toml b/Cargo.toml index 1bc7de90..bcfa119c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ hf-hub = { version = "0.3.2" } criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" -cc = "1.2.17" +cc = "1.2.18" anyhow = "1.0.97" clap = "4.5.34" encoding_rs = "0.8.35" From 2641a6d1cf77f5813fc810e4a5866a86ec7b9784 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Apr 2025 05:40:09 +0000 Subject: [PATCH 34/65] chore(deps): bump cc from 1.2.18 to 1.2.19 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.2.18 to 1.2.19. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.18...cc-v1.2.19) --- updated-dependencies: - dependency-name: cc dependency-version: 1.2.19 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e03e19a..de6766c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,9 +109,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "cc" -version = "1.2.18" +version = "1.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "525046617d8376e3db1deffb079e91cef90a89fc3ca5c185bbf8c9ecdd15cd5c" +checksum = "8e3a13707ac958681c13b39b458c073d0d9bc8a22cb1b2f4c8e55eb72c13f362" dependencies = [ "jobserver", "libc", diff --git a/Cargo.toml b/Cargo.toml index 4d5cf748..05031bca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ hf-hub = { version = "0.3.2" } criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" -cc = "1.2.18" +cc = "1.2.19" anyhow = "1.0.97" clap = "4.5.35" encoding_rs = "0.8.35" From 1fd0bd20a3c724ad5a925c2bd7d3030f66cdb620 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Apr 2025 05:40:21 +0000 Subject: [PATCH 35/65] chore(deps): bump clap from 4.5.35 to 4.5.36 Bumps [clap](https://github.com/clap-rs/clap) from 4.5.35 to 4.5.36. - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.35...clap_complete-v4.5.36) --- updated-dependencies: - dependency-name: clap dependency-version: 4.5.36 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e03e19a..c4c708e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,9 +146,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.35" +version = "4.5.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8aa86934b44c19c50f87cc2790e19f54f7a67aedb64101c2e1a2e5ecfb73944" +checksum = "2df961d8c8a0d08aa9945718ccf584145eee3f3aa06cddbeac12933781102e04" dependencies = [ "clap_builder", "clap_derive", @@ -156,9 +156,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.35" +version = "4.5.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2414dbb2dd0695280da6ea9261e327479e9d37b0630f6b53ba2a11c60c679fd9" +checksum = "132dbda40fb6753878316a489d5a1242a8ef2f0d9e47ba01c951ea8aa7d013a5" dependencies = [ "anstream", "anstyle", diff --git a/Cargo.toml b/Cargo.toml index 4d5cf748..6b527cd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ pprof = "0.13.0" bindgen = "0.69.5" cc = "1.2.18" anyhow = "1.0.97" -clap = "4.5.35" +clap = "4.5.36" encoding_rs = "0.8.35" tracing-subscriber = { version = "0.3", features = ["json"] } From 9657f021dfe64629838c0e61d1fd19322e52fc82 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Apr 2025 15:18:08 +0000 Subject: [PATCH 36/65] chore(deps): bump anyhow from 1.0.97 to 1.0.98 Bumps [anyhow](https://github.com/dtolnay/anyhow) from 1.0.97 to 1.0.98. - [Release notes](https://github.com/dtolnay/anyhow/releases) - [Commits](https://github.com/dtolnay/anyhow/compare/1.0.97...1.0.98) --- updated-dependencies: - dependency-name: anyhow dependency-version: 1.0.98 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ffdd81da..e2e7bf63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -68,9 +68,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.97" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "base64" diff --git a/Cargo.toml b/Cargo.toml index cab5fce6..656fce3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" cc = "1.2.19" -anyhow = "1.0.97" +anyhow = "1.0.98" clap = "4.5.36" encoding_rs = "0.8.35" tracing-subscriber = { version = "0.3", features = ["json"] } From 7c0a01a813241c1770af9acabbda31595ebbcc87 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Wed, 30 Apr 2025 18:16:44 +0200 Subject: [PATCH 37/65] LLAMA_CURL=OFF --- llama-cpp-sys-2/build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index e2a56b38..5b39722d 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -267,6 +267,7 @@ fn main() { config.define("LLAMA_BUILD_TESTS", "OFF"); config.define("LLAMA_BUILD_EXAMPLES", "OFF"); config.define("LLAMA_BUILD_SERVER", "OFF"); + config.define("LLAMA_CURL", "OFF"); config.define( "BUILD_SHARED_LIBS", From e5082c1fa8fdf45a2ca42cabeb1104de9284b408 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Wed, 30 Apr 2025 18:24:09 +0200 Subject: [PATCH 38/65] update llama.cpp to b5233 --- llama-cpp-sys-2/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama-cpp-sys-2/llama.cpp b/llama-cpp-sys-2/llama.cpp index 2b65ae30..ceda28ef 160000 --- a/llama-cpp-sys-2/llama.cpp +++ b/llama-cpp-sys-2/llama.cpp @@ -1 +1 @@ -Subproject commit 2b65ae30299b9c67e25c51ee567e9a2ef22279ab +Subproject commit ceda28ef8e310a8dee60bf275077a3eedae8e36c From 5d0ea24854db06d6b13385224282f0580bb14819 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Thu, 1 May 2025 13:23:43 +0200 Subject: [PATCH 39/65] add cmake to test build dockerfile --- test-build.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-build.Dockerfile b/test-build.Dockerfile index 8540d2f9..daa3a709 100644 --- a/test-build.Dockerfile +++ b/test-build.Dockerfile @@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04 FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} as base-cuda # Install requirements for rustup install + bindgen: https://rust-lang.github.io/rust-bindgen/requirements.html -RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y curl llvm-dev libclang-dev clang pkg-config libssl-dev +RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y curl llvm-dev libclang-dev clang pkg-config libssl-dev cmake RUN curl https://sh.rustup.rs -sSf | bash -s -- -y ENV PATH=/root/.cargo/bin:$PATH From 496c3685417495caa8c7b73a87939a50ee92d9f5 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Thu, 1 May 2025 14:53:52 +0200 Subject: [PATCH 40/65] add git to dockerfile --- test-build.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-build.Dockerfile b/test-build.Dockerfile index daa3a709..ca017457 100644 --- a/test-build.Dockerfile +++ b/test-build.Dockerfile @@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04 FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} as base-cuda # Install requirements for rustup install + bindgen: https://rust-lang.github.io/rust-bindgen/requirements.html -RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y curl llvm-dev libclang-dev clang pkg-config libssl-dev cmake +RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y curl llvm-dev libclang-dev clang pkg-config libssl-dev cmake git RUN curl https://sh.rustup.rs -sSf | bash -s -- -y ENV PATH=/root/.cargo/bin:$PATH From ddd380a8ef61577bbc1a96015343888d5e8846d4 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Thu, 1 May 2025 14:54:24 +0200 Subject: [PATCH 41/65] change casing of 'AS' to fix docker build warning --- test-build.Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test-build.Dockerfile b/test-build.Dockerfile index ca017457..383e0973 100644 --- a/test-build.Dockerfile +++ b/test-build.Dockerfile @@ -1,6 +1,6 @@ ARG CUDA_VERSION=12.3.1 ARG UBUNTU_VERSION=22.04 -FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} as base-cuda +FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS base-cuda # Install requirements for rustup install + bindgen: https://rust-lang.github.io/rust-bindgen/requirements.html RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y curl llvm-dev libclang-dev clang pkg-config libssl-dev cmake git @@ -10,7 +10,7 @@ ENV PATH=/root/.cargo/bin:$PATH COPY . . RUN cargo build --bin simple --features cuda -FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} as base-cuda-runtime +FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS base-cuda-runtime COPY --from=base-cuda /target/debug/simple /usr/local/bin/simple From 9f7bd2503bfd059e178f3cba6e29d35507c15d24 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Thu, 1 May 2025 17:46:50 +0200 Subject: [PATCH 42/65] add llama backend init to the doc tests that depend on it --- llama-cpp-2/src/lib.rs | 4 ++++ llama-cpp-2/src/sampling.rs | 2 ++ 2 files changed, 6 insertions(+) diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs index 1ffbca2b..f2ac5313 100644 --- a/llama-cpp-2/src/lib.rs +++ b/llama-cpp-2/src/lib.rs @@ -217,6 +217,8 @@ pub enum LlamaLoraAdapterRemoveError { /// get the time (in microseconds) according to llama.cpp /// ``` /// # use llama_cpp_2::llama_time_us; +/// # use llama_cpp_2::llama_backend::LlamaBackend; +/// let backend = LlamaBackend::init().unwrap(); /// let time = llama_time_us(); /// assert!(time > 0); /// ``` @@ -311,6 +313,8 @@ pub enum ApplyChatTemplateError { /// /// ``` /// # use std::time::Duration; +/// # use llama_cpp_2::llama_backend::LlamaBackend; +/// let backend = LlamaBackend::init().unwrap(); /// use llama_cpp_2::ggml_time_us; /// /// let start = ggml_time_us(); diff --git a/llama-cpp-2/src/sampling.rs b/llama-cpp-2/src/sampling.rs index a659ab73..96feb402 100644 --- a/llama-cpp-2/src/sampling.rs +++ b/llama-cpp-2/src/sampling.rs @@ -117,6 +117,8 @@ impl LlamaSampler { /// data_array::LlamaTokenDataArray /// }; /// use llama_cpp_2::sampling::LlamaSampler; + /// use llama_cpp_2::llama_backend::LlamaBackend; + /// let backend = LlamaBackend::init().unwrap(); /// /// let mut data_array = LlamaTokenDataArray::new(vec![ /// LlamaTokenData::new(LlamaToken(0), 0., 0.), From eed54cb07b6dc0c82ce68c8e11b541ab0875176f Mon Sep 17 00:00:00 2001 From: Dennis Keck <26092524+fellhorn@users.noreply.github.com> Date: Fri, 2 May 2025 09:50:22 +0200 Subject: [PATCH 43/65] Fix common/minja missing in cargo publish --- llama-cpp-sys-2/Cargo.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index 6db13f98..4fd842ee 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -12,9 +12,9 @@ include = [ "build.rs", "/src", - "/llama.cpp/common/*.h", - "/llama.cpp/common/*.hpp", - "/llama.cpp/common/*.cpp", + "/llama.cpp/common/**/*.h", + "/llama.cpp/common/**/*.hpp", + "/llama.cpp/common/**/*.cpp", "/llama.cpp/ggml/include/*.h", "/llama.cpp/ggml/src/*.h", "/llama.cpp/ggml/src/*.c", @@ -88,4 +88,4 @@ vulkan = [] native = [] openmp = [] # Only has an impact on Android. -shared-stdcxx = [] \ No newline at end of file +shared-stdcxx = [] From 1587e0c48489dbd0455439752226c732296f9782 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Sat, 3 May 2025 16:02:42 +0000 Subject: [PATCH 44/65] Bump version to 0.1.104 [skip ci] --- Cargo.lock | 8 ++++---- examples/embeddings/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- llama-cpp-2/Cargo.toml | 2 +- llama-cpp-sys-2/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e2e7bf63..b09b1d55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -277,7 +277,7 @@ checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "embeddings" -version = "0.1.103" +version = "0.1.104" dependencies = [ "anyhow", "clap", @@ -662,7 +662,7 @@ checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "llama-cpp-2" -version = "0.1.103" +version = "0.1.104" dependencies = [ "encoding_rs", "enumflags2", @@ -674,7 +674,7 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.103" +version = "0.1.104" dependencies = [ "bindgen", "cc", @@ -1114,7 +1114,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple" -version = "0.1.103" +version = "0.1.104" dependencies = [ "anyhow", "clap", diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index c3c5b533..beeaa170 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings" -version = "0.1.103" +version = "0.1.104" edition = "2021" [dependencies] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index 98867c57..bc628993 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simple" -version = "0.1.103" +version = "0.1.104" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index bf52467c..8bb61458 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-2" description = "llama.cpp bindings for Rust" -version = "0.1.103" +version = "0.1.104" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index 4fd842ee..fb4f53ed 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-sys-2" description = "Low Level Bindings to llama.cpp" -version = "0.1.103" +version = "0.1.104" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" From fd06d2183d05691349fc118d17cfeaeb7af11b1b Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Sat, 3 May 2025 16:04:46 +0000 Subject: [PATCH 45/65] Bump version to 0.1.105 [skip ci] --- Cargo.lock | 8 ++++---- examples/embeddings/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- llama-cpp-2/Cargo.toml | 2 +- llama-cpp-sys-2/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b09b1d55..176491bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -277,7 +277,7 @@ checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "embeddings" -version = "0.1.104" +version = "0.1.105" dependencies = [ "anyhow", "clap", @@ -662,7 +662,7 @@ checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "llama-cpp-2" -version = "0.1.104" +version = "0.1.105" dependencies = [ "encoding_rs", "enumflags2", @@ -674,7 +674,7 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.104" +version = "0.1.105" dependencies = [ "bindgen", "cc", @@ -1114,7 +1114,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple" -version = "0.1.104" +version = "0.1.105" dependencies = [ "anyhow", "clap", diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index beeaa170..3387baf7 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings" -version = "0.1.104" +version = "0.1.105" edition = "2021" [dependencies] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index bc628993..c088180a 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simple" -version = "0.1.104" +version = "0.1.105" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index 8bb61458..47b77512 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-2" description = "llama.cpp bindings for Rust" -version = "0.1.104" +version = "0.1.105" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index fb4f53ed..4ec679ea 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-sys-2" description = "Low Level Bindings to llama.cpp" -version = "0.1.104" +version = "0.1.105" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" From 955f8d6f5233c4df83bd0dd1613f4a9fa344b79c Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Sat, 3 May 2025 16:06:59 +0000 Subject: [PATCH 46/65] Bump version to 0.1.106 [skip ci] --- Cargo.lock | 8 ++++---- examples/embeddings/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- llama-cpp-2/Cargo.toml | 2 +- llama-cpp-sys-2/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 176491bb..47fbdd05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -277,7 +277,7 @@ checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "embeddings" -version = "0.1.105" +version = "0.1.106" dependencies = [ "anyhow", "clap", @@ -662,7 +662,7 @@ checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "llama-cpp-2" -version = "0.1.105" +version = "0.1.106" dependencies = [ "encoding_rs", "enumflags2", @@ -674,7 +674,7 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.105" +version = "0.1.106" dependencies = [ "bindgen", "cc", @@ -1114,7 +1114,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple" -version = "0.1.105" +version = "0.1.106" dependencies = [ "anyhow", "clap", diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index 3387baf7..abe54e1e 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings" -version = "0.1.105" +version = "0.1.106" edition = "2021" [dependencies] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index c088180a..d70d82ae 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simple" -version = "0.1.105" +version = "0.1.106" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index 47b77512..dc5fe012 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-2" description = "llama.cpp bindings for Rust" -version = "0.1.105" +version = "0.1.106" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index 4ec679ea..fecfdb7c 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-sys-2" description = "Low Level Bindings to llama.cpp" -version = "0.1.105" +version = "0.1.106" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" From e417032897a8de07b615de35bdca4ca639557de9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 3 May 2025 19:36:27 +0000 Subject: [PATCH 47/65] chore(deps): bump clap from 4.5.36 to 4.5.37 Bumps [clap](https://github.com/clap-rs/clap) from 4.5.36 to 4.5.37. - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.36...clap_complete-v4.5.37) --- updated-dependencies: - dependency-name: clap dependency-version: 4.5.37 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 47fbdd05..9eb7f89e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,9 +146,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.36" +version = "4.5.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2df961d8c8a0d08aa9945718ccf584145eee3f3aa06cddbeac12933781102e04" +checksum = "eccb054f56cbd38340b380d4a8e69ef1f02f1af43db2f0cc817a4774d80ae071" dependencies = [ "clap_builder", "clap_derive", @@ -156,9 +156,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.36" +version = "4.5.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "132dbda40fb6753878316a489d5a1242a8ef2f0d9e47ba01c951ea8aa7d013a5" +checksum = "efd9466fac8543255d3b1fcad4762c5e116ffe808c8a3043d4263cd4fd4862a2" dependencies = [ "anstream", "anstyle", diff --git a/Cargo.toml b/Cargo.toml index 656fce3d..ac92122a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ pprof = "0.13.0" bindgen = "0.69.5" cc = "1.2.19" anyhow = "1.0.98" -clap = "4.5.36" +clap = "4.5.37" encoding_rs = "0.8.35" tracing-subscriber = { version = "0.3", features = ["json"] } From 1c1f8885eca1408581e43c6cab69261f28de799b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 3 May 2025 19:36:30 +0000 Subject: [PATCH 48/65] chore(deps): bump cc from 1.2.19 to 1.2.20 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.2.19 to 1.2.20. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.19...cc-v1.2.20) --- updated-dependencies: - dependency-name: cc dependency-version: 1.2.20 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 47fbdd05..1a9bbcf2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,9 +109,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "cc" -version = "1.2.19" +version = "1.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3a13707ac958681c13b39b458c073d0d9bc8a22cb1b2f4c8e55eb72c13f362" +checksum = "8691782945451c1c383942c4874dbe63814f61cb57ef773cda2972682b7bb3c0" dependencies = [ "jobserver", "libc", diff --git a/Cargo.toml b/Cargo.toml index 656fce3d..043607b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ hf-hub = { version = "0.3.2" } criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" -cc = "1.2.19" +cc = "1.2.21" anyhow = "1.0.98" clap = "4.5.36" encoding_rs = "0.8.35" From 99f5e5c95e968d7c11e848fb02621f5d8fe6abb2 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Sat, 3 May 2025 19:37:29 +0000 Subject: [PATCH 49/65] Bump version to 0.1.107 [skip ci] --- Cargo.lock | 8 ++++---- examples/embeddings/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- llama-cpp-2/Cargo.toml | 2 +- llama-cpp-sys-2/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 47fbdd05..2eaec111 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -277,7 +277,7 @@ checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "embeddings" -version = "0.1.106" +version = "0.1.107" dependencies = [ "anyhow", "clap", @@ -662,7 +662,7 @@ checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "llama-cpp-2" -version = "0.1.106" +version = "0.1.107" dependencies = [ "encoding_rs", "enumflags2", @@ -674,7 +674,7 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.106" +version = "0.1.107" dependencies = [ "bindgen", "cc", @@ -1114,7 +1114,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple" -version = "0.1.106" +version = "0.1.107" dependencies = [ "anyhow", "clap", diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index abe54e1e..2df63a2e 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings" -version = "0.1.106" +version = "0.1.107" edition = "2021" [dependencies] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index d70d82ae..540486ba 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simple" -version = "0.1.106" +version = "0.1.107" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index dc5fe012..67e2e118 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-2" description = "llama.cpp bindings for Rust" -version = "0.1.106" +version = "0.1.107" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index fecfdb7c..f998598d 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-sys-2" description = "Low Level Bindings to llama.cpp" -version = "0.1.106" +version = "0.1.107" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" From 4f41ba6342cf9ad91362082569b4db4df86168a0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 05:48:34 +0000 Subject: [PATCH 50/65] chore(deps): bump clap from 4.5.37 to 4.5.38 Bumps [clap](https://github.com/clap-rs/clap) from 4.5.37 to 4.5.38. - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.37...clap_complete-v4.5.38) --- updated-dependencies: - dependency-name: clap dependency-version: 4.5.38 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 566171c4..838b5b7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,9 +146,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.37" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eccb054f56cbd38340b380d4a8e69ef1f02f1af43db2f0cc817a4774d80ae071" +checksum = "ed93b9805f8ba930df42c2590f05453d5ec36cbb85d018868a5b24d31f6ac000" dependencies = [ "clap_builder", "clap_derive", @@ -156,9 +156,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.37" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd9466fac8543255d3b1fcad4762c5e116ffe808c8a3043d4263cd4fd4862a2" +checksum = "379026ff283facf611b0ea629334361c4211d1b12ee01024eec1591133b04120" dependencies = [ "anstream", "anstyle", diff --git a/Cargo.toml b/Cargo.toml index 88925096..472760d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ pprof = "0.13.0" bindgen = "0.69.5" cc = "1.2.21" anyhow = "1.0.98" -clap = "4.5.37" +clap = "4.5.38" encoding_rs = "0.8.35" tracing-subscriber = { version = "0.3", features = ["json"] } From 3597d01132a76c515f8b73f78c9ff7ca5d585068 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 05:48:44 +0000 Subject: [PATCH 51/65] chore(deps): bump cc from 1.2.21 to 1.2.22 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.2.21 to 1.2.22. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.21...cc-v1.2.22) --- updated-dependencies: - dependency-name: cc dependency-version: 1.2.22 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 566171c4..856f8763 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,9 +109,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "cc" -version = "1.2.21" +version = "1.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8691782945451c1c383942c4874dbe63814f61cb57ef773cda2972682b7bb3c0" +checksum = "32db95edf998450acc7881c932f94cd9b05c87b4b2599e8bab064753da4acfd1" dependencies = [ "jobserver", "libc", diff --git a/Cargo.toml b/Cargo.toml index 88925096..f08a2969 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ hf-hub = { version = "0.3.2" } criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" -cc = "1.2.21" +cc = "1.2.22" anyhow = "1.0.98" clap = "4.5.37" encoding_rs = "0.8.35" From 7e136733f8eb093797fc650f04dddb32f1c5c407 Mon Sep 17 00:00:00 2001 From: Chen Xu Date: Sat, 17 May 2025 19:18:24 +0800 Subject: [PATCH 52/65] Fix MacOS build --- llama-cpp-sys-2/build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index 5b39722d..d2aef770 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -237,6 +237,7 @@ fn main() { .header("wrapper.h") .clang_arg(format!("-I{}", llama_src.join("include").display())) .clang_arg(format!("-I{}", llama_src.join("ggml/include").display())) + .clang_arg(format!("--target={}", env::var("HOST").unwrap())) .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) .derive_partialeq(true) .allowlist_function("ggml_.*") From b6048f05afed3775a1af21a16e38709437764c75 Mon Sep 17 00:00:00 2001 From: Chen Xu Date: Sat, 17 May 2025 19:32:03 +0800 Subject: [PATCH 53/65] Use `TARGET` instead of `HOST` --- llama-cpp-sys-2/build.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index d2aef770..b2eb559c 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -232,12 +232,16 @@ fn main() { .to_string(), ); + let bindgen_target = env::var("TARGET").or_else(|_| { + env::var("HOST") + }).expect("Failed to get TARGET or HOST environment variable"); + // Bindings let bindings = bindgen::Builder::default() .header("wrapper.h") .clang_arg(format!("-I{}", llama_src.join("include").display())) .clang_arg(format!("-I{}", llama_src.join("ggml/include").display())) - .clang_arg(format!("--target={}", env::var("HOST").unwrap())) + .clang_arg(format!("--target={}", bindgen_target)) .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) .derive_partialeq(true) .allowlist_function("ggml_.*") From b9993ffc740ec30eb26cec11a1d5625f13c2daf9 Mon Sep 17 00:00:00 2001 From: Chen Xu Date: Sat, 17 May 2025 23:46:51 +0800 Subject: [PATCH 54/65] Use existing target --- llama-cpp-sys-2/build.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index b2eb559c..df654053 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -232,16 +232,12 @@ fn main() { .to_string(), ); - let bindgen_target = env::var("TARGET").or_else(|_| { - env::var("HOST") - }).expect("Failed to get TARGET or HOST environment variable"); - // Bindings let bindings = bindgen::Builder::default() .header("wrapper.h") .clang_arg(format!("-I{}", llama_src.join("include").display())) .clang_arg(format!("-I{}", llama_src.join("ggml/include").display())) - .clang_arg(format!("--target={}", bindgen_target)) + .clang_arg(format!("--target={}", target_triple)) .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) .derive_partialeq(true) .allowlist_function("ggml_.*") @@ -315,10 +311,7 @@ fn main() { } else { config.define("ANDROID_PLATFORM", "android-28"); } - if target_triple.contains("aarch64") { - config.cflag("-march=armv8.7a"); - config.cxxflag("-march=armv8.7a"); - } else if target_triple.contains("armv7") { + if target_triple.contains("aarch64") || target_triple.contains("armv7") { config.cflag("-march=armv8.7a"); config.cxxflag("-march=armv8.7a"); } else if target_triple.contains("x86_64") { From 9012e88d599d23efe83c90c59ecb3ad92597b8ea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 05:24:18 +0000 Subject: [PATCH 55/65] chore(deps): bump cc from 1.2.22 to 1.2.23 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.2.22 to 1.2.23. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.22...cc-v1.2.23) --- updated-dependencies: - dependency-name: cc dependency-version: 1.2.23 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a4e4566b..3f7f3918 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,9 +109,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "cc" -version = "1.2.22" +version = "1.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32db95edf998450acc7881c932f94cd9b05c87b4b2599e8bab064753da4acfd1" +checksum = "5f4ac86a9e5bc1e2b3449ab9d7d3a6a405e3d1bb28d7b9be8614f55846ae3766" dependencies = [ "jobserver", "libc", diff --git a/Cargo.toml b/Cargo.toml index f4e2fd98..f1b63014 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ hf-hub = { version = "0.3.2" } criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" -cc = "1.2.22" +cc = "1.2.23" anyhow = "1.0.98" clap = "4.5.38" encoding_rs = "0.8.35" From 59864fd5b3c0e8226116972fed59c3b0158f214e Mon Sep 17 00:00:00 2001 From: Britt Lewis Date: Sat, 24 May 2025 13:22:29 -0400 Subject: [PATCH 56/65] remove deprecated kv view & rename kv cache -> kv self APIs * bump llama.cpp to b5474 (259469c), latest release as of 2025-05-24 - https://github.com/ggml-org/llama.cpp/commits/259469c4b57c1a32606353bcac52ba683424a990 --- llama-cpp-2/src/context/kv_cache.rs | 141 +++------------------------- llama-cpp-sys-2/llama.cpp | 2 +- 2 files changed, 12 insertions(+), 131 deletions(-) diff --git a/llama-cpp-2/src/context/kv_cache.rs b/llama-cpp-2/src/context/kv_cache.rs index d90a6b8a..14f5b5a6 100644 --- a/llama-cpp-2/src/context/kv_cache.rs +++ b/llama-cpp-2/src/context/kv_cache.rs @@ -28,7 +28,7 @@ impl LlamaContext<'_> { /// * `dest` - The sequence id to copy the cache to. /// * `size` - The size of the cache to copy. pub fn copy_cache(&mut self, src: i32, dest: i32, size: i32) { - unsafe { llama_cpp_sys_2::llama_kv_cache_seq_cp(self.context.as_ptr(), src, dest, 0, size) } + unsafe { llama_cpp_sys_2::llama_kv_self_seq_cp(self.context.as_ptr(), src, dest, 0, size) } } /// Copy the cache from one sequence to another. @@ -58,7 +58,7 @@ impl LlamaContext<'_> { .map_or(Ok(-1), i32::try_from) .map_err(KvCacheConversionError::P1TooLarge)?; unsafe { - llama_cpp_sys_2::llama_kv_cache_seq_cp(self.context.as_ptr(), src, dest, p0, p1); + llama_cpp_sys_2::llama_kv_self_seq_cp(self.context.as_ptr(), src, dest, p0, p1); } Ok(()) } @@ -92,18 +92,18 @@ impl LlamaContext<'_> { let p1 = p1 .map_or(Ok(-1), i32::try_from) .map_err(KvCacheConversionError::P1TooLarge)?; - Ok(unsafe { llama_cpp_sys_2::llama_kv_cache_seq_rm(self.context.as_ptr(), src, p0, p1) }) + Ok(unsafe { llama_cpp_sys_2::llama_kv_self_seq_rm(self.context.as_ptr(), src, p0, p1) }) } /// Returns the number of used KV cells (i.e. have at least one sequence assigned to them) #[must_use] pub fn get_kv_cache_used_cells(&self) -> i32 { - unsafe { llama_cpp_sys_2::llama_get_kv_cache_used_cells(self.context.as_ptr()) } + unsafe { llama_cpp_sys_2::llama_kv_self_used_cells(self.context.as_ptr()) } } /// Clear the KV cache pub fn clear_kv_cache(&mut self) { - unsafe { llama_cpp_sys_2::llama_kv_cache_clear(self.context.as_ptr()) } + unsafe { llama_cpp_sys_2::llama_kv_self_clear(self.context.as_ptr()) } } /// Removes all tokens that do not belong to the specified sequence @@ -112,7 +112,7 @@ impl LlamaContext<'_> { /// /// * `seq_id` - The sequence id to keep pub fn llama_kv_cache_seq_keep(&mut self, seq_id: i32) { - unsafe { llama_cpp_sys_2::llama_kv_cache_seq_keep(self.context.as_ptr(), seq_id) } + unsafe { llama_cpp_sys_2::llama_kv_self_seq_keep(self.context.as_ptr(), seq_id) } } #[allow(clippy::doc_markdown)] @@ -147,7 +147,7 @@ impl LlamaContext<'_> { .map_or(Ok(-1), i32::try_from) .map_err(KvCacheConversionError::P1TooLarge)?; unsafe { - llama_cpp_sys_2::llama_kv_cache_seq_add(self.context.as_ptr(), seq_id, p0, p1, delta); + llama_cpp_sys_2::llama_kv_self_seq_add(self.context.as_ptr(), seq_id, p0, p1, delta); } Ok(()) } @@ -183,7 +183,7 @@ impl LlamaContext<'_> { .map_or(Ok(-1), i32::try_from) .map_err(KvCacheConversionError::P1TooLarge)?; let d = c_int::from(d.get()); - unsafe { llama_cpp_sys_2::llama_kv_cache_seq_div(self.context.as_ptr(), seq_id, p0, p1, d) } + unsafe { llama_cpp_sys_2::llama_kv_self_seq_div(self.context.as_ptr(), seq_id, p0, p1, d) } Ok(()) } @@ -194,7 +194,7 @@ impl LlamaContext<'_> { /// * `seq_id` - The sequence id to get the max position for #[must_use] pub fn kv_cache_seq_pos_max(&self, seq_id: i32) -> i32 { - unsafe { llama_cpp_sys_2::llama_kv_cache_seq_pos_max(self.context.as_ptr(), seq_id) } + unsafe { llama_cpp_sys_2::llama_kv_self_seq_pos_max(self.context.as_ptr(), seq_id) } } /// Defragment the KV cache @@ -202,130 +202,11 @@ impl LlamaContext<'_> { /// - lazily on next [`LlamaContext::decode`] /// - explicitly with [`Self::kv_cache_update`] pub fn kv_cache_defrag(&mut self) { - unsafe { llama_cpp_sys_2::llama_kv_cache_defrag(self.context.as_ptr()) } + unsafe { llama_cpp_sys_2::llama_kv_self_defrag(self.context.as_ptr()) } } /// Apply the KV cache updates (such as K-shifts, defragmentation, etc.) pub fn kv_cache_update(&mut self) { - unsafe { llama_cpp_sys_2::llama_kv_cache_update(self.context.as_ptr()) } - } - - /// Returns the number of tokens in the KV cache (slow, use only for debug) - /// If a KV cell has multiple sequences assigned to it, it will be counted multiple times - #[must_use] - pub fn get_kv_cache_token_count(&self) -> i32 { - unsafe { llama_cpp_sys_2::llama_get_kv_cache_token_count(self.context.as_ptr()) } - } - - /// Create an empty KV cache view. (use only for debugging purposes) - /// - /// # Parameters - /// - /// * `n_max_seq` - Maximum number of sequences that can exist in a cell. It's not an error - /// if there are more sequences in a cell than this value, however they will - /// not be visible in the view `cells_sequences`. - #[must_use] - pub fn new_kv_cache_view(&self, n_max_seq: i32) -> KVCacheView { - let view = - unsafe { llama_cpp_sys_2::llama_kv_cache_view_init(self.context.as_ptr(), n_max_seq) }; - KVCacheView { view, ctx: self } - } -} - -/// Information associated with an individual cell in the KV cache view. -#[derive(Debug)] -pub struct KVCacheViewCell { - /// The position for this cell. Takes KV cache shifts into account. - /// May be negative if the cell is not populated. - pub pos: llama_cpp_sys_2::llama_pos, -} - -/// An updateable view of the KV cache. (use only for debugging purposes) -#[derive(Debug)] -pub struct KVCacheView<'a> { - ctx: &'a LlamaContext<'a>, - view: llama_cpp_sys_2::llama_kv_cache_view, -} - -impl KVCacheView<'_> { - /// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes) - pub fn update(&mut self) { - unsafe { - llama_cpp_sys_2::llama_kv_cache_view_update(self.ctx.context.as_ptr(), &mut self.view); - } - } - - /// Number of KV cache cells. This will be the same as the context size. - #[must_use] - pub fn n_cells(&self) -> i32 { - self.view.n_cells - } - - /// Number of tokens in the cache. For example, if there are two populated - /// cells, the first with 1 sequence id in it and the second with 2 sequence - /// ids then you'll have 3 tokens. - #[must_use] - pub fn token_count(&self) -> i32 { - self.view.token_count - } - - /// Number of populated cache cells. - #[must_use] - pub fn used_cells(&self) -> i32 { - self.view.used_cells - } - - /// Maximum contiguous empty slots in the cache. - #[must_use] - pub fn max_contiguous(&self) -> i32 { - self.view.max_contiguous - } - - /// Index to the start of the `max_contiguous` slot range. Can be negative - /// when cache is full. - #[must_use] - pub fn max_contiguous_idx(&self) -> i32 { - self.view.max_contiguous_idx - } - - /// Information for individual cells. - /// - /// # Panics - /// - /// - if `n_cells` does not fit into usize. - pub fn cells(&self) -> impl Iterator { - unsafe { - std::slice::from_raw_parts( - self.view.cells, - usize::try_from(self.view.n_cells).expect("failed to fit n_cells into usize"), - ) - } - .iter() - .map(|&cell| KVCacheViewCell { pos: cell.pos }) - } - - /// The sequences for each cell. There will be `n_max_seq` items per cell. - /// - /// # Panics - /// - /// - if `n_cells * n_max_seq` does not fit into usize. - /// - if `n_max_seq` does not fit into usize. - pub fn cells_sequences(&self) -> impl Iterator { - unsafe { - std::slice::from_raw_parts( - self.view.cells_sequences, - usize::try_from(self.view.n_cells * self.view.n_seq_max) - .expect("failed to fit n_cells * n_max_seq into usize"), - ) - } - .chunks(usize::try_from(self.view.n_seq_max).expect("failed to fit n_max_seq into usize")) - } -} - -impl Drop for KVCacheView<'_> { - fn drop(&mut self) { - unsafe { - llama_cpp_sys_2::llama_kv_cache_view_free(&mut self.view); - } + unsafe { llama_cpp_sys_2::llama_kv_self_update(self.context.as_ptr()) } } } diff --git a/llama-cpp-sys-2/llama.cpp b/llama-cpp-sys-2/llama.cpp index ceda28ef..259469c4 160000 --- a/llama-cpp-sys-2/llama.cpp +++ b/llama-cpp-sys-2/llama.cpp @@ -1 +1 @@ -Subproject commit ceda28ef8e310a8dee60bf275077a3eedae8e36c +Subproject commit 259469c4b57c1a32606353bcac52ba683424a990 From b0839c391ebbb74efda2d2852603f595c94e7ff3 Mon Sep 17 00:00:00 2001 From: Britt Lewis Date: Sat, 24 May 2025 13:24:37 -0400 Subject: [PATCH 57/65] update llama.cpp org-ref --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 625b54c7..0dfa7e0d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "llama-cpp-sys-2/llama.cpp"] path = llama-cpp-sys-2/llama.cpp - url = https://github.com/ggerganov/llama.cpp + url = https://github.com/ggml-org/llama.cpp From f8d986b3f699e04b3fba3a1da7dadb9c9773fc62 Mon Sep 17 00:00:00 2001 From: Britt Lewis Date: Sat, 24 May 2025 14:44:19 -0400 Subject: [PATCH 58/65] disable building tools post upstream reorganization * https://github.com/ggml-org/llama.cpp/pull/13249 --- llama-cpp-sys-2/build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index df654053..156eb4b4 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -268,6 +268,7 @@ fn main() { config.define("LLAMA_BUILD_TESTS", "OFF"); config.define("LLAMA_BUILD_EXAMPLES", "OFF"); config.define("LLAMA_BUILD_SERVER", "OFF"); + config.define("LLAMA_BUILD_TOOLS", "OFF"); config.define("LLAMA_CURL", "OFF"); config.define( From ff4784e62db6fe15446f325d430a876454b3ec0e Mon Sep 17 00:00:00 2001 From: Britt Lewis Date: Sat, 24 May 2025 14:45:34 -0400 Subject: [PATCH 59/65] cargo fmt in build.rs --- llama-cpp-sys-2/build.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index 156eb4b4..f545ff9a 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -280,7 +280,11 @@ fn main() { config.define("GGML_BLAS", "OFF"); } - if (matches!(target_os, TargetOs::Windows(WindowsVariant::Msvc)) && matches!(profile.as_str(), "Release" | "RelWithDebInfo" | "MinSizeRel")) + if (matches!(target_os, TargetOs::Windows(WindowsVariant::Msvc)) + && matches!( + profile.as_str(), + "Release" | "RelWithDebInfo" | "MinSizeRel" + )) { // Debug Rust builds under MSVC turn off optimization even though we're ideally building the release profile of llama.cpp. // Looks like an upstream bug: From 9bd1cc40c2ef2fc3e1e52e68f6a905c2ca9541cc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 May 2025 05:53:17 +0000 Subject: [PATCH 60/65] chore(deps): bump cc from 1.2.23 to 1.2.24 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.2.23 to 1.2.24. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.23...cc-v1.2.24) --- updated-dependencies: - dependency-name: cc dependency-version: 1.2.24 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3f7f3918..dbb23a13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,9 +109,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "cc" -version = "1.2.23" +version = "1.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4ac86a9e5bc1e2b3449ab9d7d3a6a405e3d1bb28d7b9be8614f55846ae3766" +checksum = "16595d3be041c03b09d08d0858631facccee9221e579704070e6e9e4915d3bc7" dependencies = [ "jobserver", "libc", diff --git a/Cargo.toml b/Cargo.toml index f1b63014..91a7e87a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ hf-hub = { version = "0.3.2" } criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" -cc = "1.2.23" +cc = "1.2.24" anyhow = "1.0.98" clap = "4.5.38" encoding_rs = "0.8.35" From 57afeb6c436eab973bfff25c898950c41a0fd0ee Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Mon, 26 May 2025 21:04:55 +0000 Subject: [PATCH 61/65] Bump version to 0.1.108 [skip ci] --- Cargo.lock | 8 ++++---- examples/embeddings/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- llama-cpp-2/Cargo.toml | 2 +- llama-cpp-sys-2/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dbb23a13..dae55148 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -277,7 +277,7 @@ checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "embeddings" -version = "0.1.107" +version = "0.1.108" dependencies = [ "anyhow", "clap", @@ -662,7 +662,7 @@ checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "llama-cpp-2" -version = "0.1.107" +version = "0.1.108" dependencies = [ "encoding_rs", "enumflags2", @@ -674,7 +674,7 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.107" +version = "0.1.108" dependencies = [ "bindgen", "cc", @@ -1114,7 +1114,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple" -version = "0.1.107" +version = "0.1.108" dependencies = [ "anyhow", "clap", diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index 2df63a2e..21013e3d 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings" -version = "0.1.107" +version = "0.1.108" edition = "2021" [dependencies] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index 540486ba..dbf3b965 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simple" -version = "0.1.107" +version = "0.1.108" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index 67e2e118..3ef28505 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-2" description = "llama.cpp bindings for Rust" -version = "0.1.107" +version = "0.1.108" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index f998598d..8ce03271 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-sys-2" description = "Low Level Bindings to llama.cpp" -version = "0.1.107" +version = "0.1.108" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" From af7b3f33458036ac98425b2352abc7b49b46de61 Mon Sep 17 00:00:00 2001 From: Russell Wong Date: Tue, 27 May 2025 18:29:51 +0930 Subject: [PATCH 62/65] fixed llama.cpp build issue on ARM (Apple aarch64) --- llama-cpp-sys-2/build.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index f545ff9a..8e94e81d 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -336,6 +336,16 @@ fn main() { } } + if matches!(target_os, TargetOs::Linux) + && target_triple.contains("aarch64") + && !env::var(format!("CARGO_FEATURE_{}", "native".to_uppercase())).is_ok() + { + // If the native feature is not enabled, we take off the native ARM64 support. + // It is useful in docker environments where the native feature is not enabled. + config.define("GGML_NATIVE", "OFF"); + config.define("GGML_CPU_ARM_ARCH", "armv8-a"); + } + if cfg!(feature = "vulkan") { config.define("GGML_VULKAN", "ON"); match target_os { From 2ad42a81baa437ce7bb145d31a5860a740220f4f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Jun 2025 06:42:17 +0000 Subject: [PATCH 63/65] chore(deps): bump clap from 4.5.38 to 4.5.39 Bumps [clap](https://github.com/clap-rs/clap) from 4.5.38 to 4.5.39. - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.38...clap_complete-v4.5.39) --- updated-dependencies: - dependency-name: clap dependency-version: 4.5.39 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dae55148..38e0c30b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,9 +146,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.38" +version = "4.5.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed93b9805f8ba930df42c2590f05453d5ec36cbb85d018868a5b24d31f6ac000" +checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f" dependencies = [ "clap_builder", "clap_derive", @@ -156,9 +156,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.38" +version = "4.5.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "379026ff283facf611b0ea629334361c4211d1b12ee01024eec1591133b04120" +checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51" dependencies = [ "anstream", "anstyle", diff --git a/Cargo.toml b/Cargo.toml index 91a7e87a..c608eed2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ pprof = "0.13.0" bindgen = "0.69.5" cc = "1.2.24" anyhow = "1.0.98" -clap = "4.5.38" +clap = "4.5.39" encoding_rs = "0.8.35" tracing-subscriber = { version = "0.3", features = ["json"] } From ff82d97b5e2c8a6a53d843eda38443ae993198f9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Jun 2025 06:44:04 +0000 Subject: [PATCH 64/65] chore(deps): bump cc from 1.2.24 to 1.2.25 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.2.24 to 1.2.25. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.24...cc-v1.2.25) --- updated-dependencies: - dependency-name: cc dependency-version: 1.2.25 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dae55148..5d9c1244 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,9 +109,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "cc" -version = "1.2.24" +version = "1.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16595d3be041c03b09d08d0858631facccee9221e579704070e6e9e4915d3bc7" +checksum = "d0fc897dc1e865cc67c0e05a836d9d3f1df3cbe442aa4a9473b18e12624a4951" dependencies = [ "jobserver", "libc", diff --git a/Cargo.toml b/Cargo.toml index 91a7e87a..f954c609 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ hf-hub = { version = "0.3.2" } criterion = "0.5.1" pprof = "0.13.0" bindgen = "0.69.5" -cc = "1.2.24" +cc = "1.2.25" anyhow = "1.0.98" clap = "4.5.38" encoding_rs = "0.8.35" From e83fa543edba8ca50e4d9ae38ec02ebb78de9800 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 5 Jun 2025 19:22:02 +0000 Subject: [PATCH 65/65] Bump version to 0.1.109 [skip ci] --- Cargo.lock | 8 ++++---- examples/embeddings/Cargo.toml | 2 +- examples/simple/Cargo.toml | 2 +- llama-cpp-2/Cargo.toml | 2 +- llama-cpp-sys-2/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d1e4dcce..dbb9e394 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -277,7 +277,7 @@ checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "embeddings" -version = "0.1.108" +version = "0.1.109" dependencies = [ "anyhow", "clap", @@ -662,7 +662,7 @@ checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "llama-cpp-2" -version = "0.1.108" +version = "0.1.109" dependencies = [ "encoding_rs", "enumflags2", @@ -674,7 +674,7 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.108" +version = "0.1.109" dependencies = [ "bindgen", "cc", @@ -1114,7 +1114,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simple" -version = "0.1.108" +version = "0.1.109" dependencies = [ "anyhow", "clap", diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index 21013e3d..eb993289 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings" -version = "0.1.108" +version = "0.1.109" edition = "2021" [dependencies] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index dbf3b965..28d0ee6d 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simple" -version = "0.1.108" +version = "0.1.109" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index 3ef28505..fb2f9f57 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-2" description = "llama.cpp bindings for Rust" -version = "0.1.108" +version = "0.1.109" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs" diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index 8ce03271..3dd9b94e 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "llama-cpp-sys-2" description = "Low Level Bindings to llama.cpp" -version = "0.1.108" +version = "0.1.109" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/utilityai/llama-cpp-rs"