10000 llama : move sampling code into llama-sampling · ggml-org/llama.cpp@d4f8f52 · GitHub
[go: up one dir, main page]

Skip to content

Commit d4f8f52

Browse files
committed
llama : move sampling code into llama-sampling
ggml-ci
1 parent 1666f92 commit d4f8f52

File tree

7 files changed

+757
-699
lines changed

7 files changed

+757
-699
lines changed

Makefile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -868,6 +868,7 @@ OBJ_GGML += \
868868

869869
OBJ_LLAMA = \
870870
src/llama.o \
871+
src/llama-sampling.o \
871872
src/unicode.o \
872873
src/unicode-data.o
873874

@@ -1047,6 +1048,7 @@ src/unicode-data.o: \
10471048

10481049
src/llama.o: \
10491050
src/llama.cpp \
1051+
src/llama-impl.h \
10501052
src/unicode.h \
10511053
include/llama.h \
10521054
ggml/include/ggml-cuda.h \
@@ -1056,6 +1058,13 @@ src/llama.o: \
10561058
ggml/include/ggml-backend.h
10571059
$(CXX) $(CXXFLAGS) -c $< -o $@
10581060

1061+
src/llama-sampling.o: \
1062+
src/llama-sampling.cpp \
1063+
src/llama-sampling.h \
1064+
src/llama-impl.h \
1065+
include/llama.h
1066+
$(CXX) $(CXXFLAGS) -c $< -o $@
1067+
10591068
$(LIB_LLAMA): \
10601069
$(OBJ_LLAMA) \
10611070
$(LIB_GGML)

include/llama.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,12 +1081,6 @@ extern "C" {
10811081
llama_token_data_array * candidates,
10821082
float temp);
10831083

1084-
/// @details Apply constraints from grammar
1085-
LLAMA_API void llama_sample_grammar(
1086-
struct llama_context * ctx,
1087-
llama_token_data_array * candidates,
1088-
const struct llama_grammar * grammar);
1089-
10901084
/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
10911085
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
10921086
/// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
@@ -1124,6 +1118,12 @@ extern "C" {
11241118
struct llama_context * ctx,
11251119
llama_token_data_array * candidates);
11261120

1121+
/// @details Apply constraints from grammar
1122+
LLAMA_API void llama_sample_grammar(
1123+
struct llama_context * ctx,
1124+
llama_token_data_array * candidates,
1125+
const struct llama_grammar * grammar);
1126+
11271127
/// @details Accepts the sampled token into the grammar
11281128
LLAMA_API void llama_grammar_accept_token(
11291129
struct llama_context * ctx,

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ endif()
1414
add_library(llama
1515
../include/llama.h
1616
llama.cpp
17+
llama-sampling.cpp
1718
unicode.h
1819
unicode.cpp
1920
unicode-data.cpp

src/llama-impl.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#pragma once
2+
3+
#define LLAMA_API_INTERNAL
4+
#include "llama.h"
5+
6+
#include <array>
7+
#include <set>
8+
#include <map>
9+
#include <cstdint>
10+
#include <random>
11+
12+
#ifdef __has_include
13+
#if __has_include(<unistd.h>)
14+
#include <unistd.h>
15+
#if defined(_POSIX_MAPPED_FILES)
16+
#include <sys/mman.h>
17+
#include <fcntl.h>
18+
#endif
19+
#if defined(_POSIX_MEMLOCK_RANGE)
20+
#include <sys/resource.h>
21+
#endif
22+
#endif
23+
#endif
24+
25+
// bump if necessary
26+
#define LLAMA_MAX_NODES 8192
27+
#define LLAMA_MAX_LAYERS 256
28+
#define LLAMA_MAX_EXPERTS 160 // DeepSeekV2
29+
30+
#ifdef __GNUC__
31+
#ifdef __MINGW32__
32+
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
33+
#else
34+
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
35+
#endif
36+
#else
37+
#define LLAMA_ATTRIBUTE_FORMAT(...)
38+
#endif
39+
40+
//
41+
// logging
42+
//
43+
44+
LLAMA_ATTRIBUTE_FORMAT(2, 3)
45+
void llama_log_internal (ggml_log_level level, const char * format, ...);
46+
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
47+
48+
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
49+
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
50+
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)

0 commit comments

Comments
 (0)
0