8000 llama : move vocab, grammar and sampling into separate files by ggerganov · Pull Request #8508 · ggml-org/llama.cpp · GitHub
[go: up one dir, main page]

Skip to content

llama : move vocab, grammar and sampling into separate files #8508

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 23, 2024
Merged
Next Next commit
llama : move sampling code into llama-sampling
ggml-ci
  • Loading branch information
ggerganov committed Jul 22, 2024
commit 0ddc8e361c5ea1bdab8dc14c5658e95ea9e5e731
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,7 @@ OBJ_GGML += \

OBJ_LLAMA = \
src/llama.o \
src/llama-sampling.o \
src/unicode.o \
src/unicode-data.o

Expand Down Expand Up @@ -1055,6 +1056,7 @@ src/unicode-data.o: \

src/llama.o: \
src/llama.cpp \
src/llama-impl.h \
src/unicode.h \
include/llama.h \
ggml/include/ggml-cuda.h \
Expand All @@ -1064,6 +1066,13 @@ src/llama.o: \
ggml/include/ggml-backend.h
$(CXX) $(CXXFLAGS) -c $< -o $@

src/llama-sampling.o: \
src/llama-sampling.cpp \
src/llama-sampling.h \
src/llama-impl.h \
include/llama.h
$(CXX) $(CXXFLAGS) -c $< -o $@

$(LIB_LLAMA): \
$(OBJ_LLAMA) \
$(LIB_GGML)
Expand Down
12 changes: 6 additions & 6 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -1084,12 +1084,6 @@ extern "C" {
llama_token_data_array * candidates,
float temp);

/// @details Apply constraints from grammar
LLAMA_API void llama_sample_grammar(
struct llama_context * ctx,
llama_token_data_array * candidates,
const struct llama_grammar * grammar);

/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
/// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
Expand Down Expand Up 10000 @@ -1127,6 +1121,12 @@ extern "C" {
struct llama_context * ctx,
llama_token_data_array * candidates);

/// @details Apply constraints from grammar
LLAMA_API void llama_sample_grammar(
struct llama_context * ctx,
llama_token_data_array * candidates,
const struct llama_grammar * grammar);

/// @details Accepts the sampled token into the grammar
LLAMA_API void llama_grammar_accept_token(
struct llama_context * ctx,
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ endif()
add_library(llama
../include/llama.h
llama.cpp
llama-sampling.cpp
unicode.h
unicode.cpp
unicode-data.cpp
Expand Down
50 changes: 50 additions & 0 deletions src/llama-impl.h
64BA
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#pragma once

#define LLAMA_API_INTERNAL
#include "llama.h"

#include <array>
#include <set>
#include <map>
#include <cstdint>
#include <random>

#ifdef __has_include
#if __has_include(<unistd.h>)
#include <unistd.h>
#if defined(_POSIX_MAPPED_FILES)
#include <sys/mman.h>
#include <fcntl.h>
#endif
#if defined(_POSIX_MEMLOCK_RANGE)
#include <sys/resource.h>
#endif
#endif
#endif

// bump if necessary
#define LLAMA_MAX_NODES 8192
#define LLAMA_MAX_LAYERS 256
#define LLAMA_MAX_EXPERTS 160 // DeepSeekV2

#ifdef __GNUC__
#ifdef __MINGW32__
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
#else
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
#endif
#else
#define LLAMA_ATTRIBUTE_FORMAT(...)
#endif

//
// logging
//

LLAMA_ATTRIBUTE_FORMAT(2, 3)
void llama_log_internal (ggml_log_level level, const char * format, ...);
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);

#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
Loading
0