8000 Merge 'origin/master' into hipblas · LostRuins/koboldcpp@a836529 · GitHub
[go: up one dir, main page]

Skip to content

Commit a836529

Browse files
committed
Merge 'origin/master' into hipblas
2 parents 85f902d + 254a7a7 commit a836529

32 files changed

+7749
-736
lines changed

.devops/full.Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,6 @@ COPY . .
1616

1717
RUN make
1818

19+
ENV LC_ALL=C.utf8
20+
1921
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/main.Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,6 @@ FROM ubuntu:$UBUNTU_VERSION as runtime
1515

1616
COPY --from=build /app/main /main
1717

18+
ENV LC_ALL=C.utf8
19+
1820
ENTRYPOINT [ "/main" ]

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ on:
1010
push:
1111
branches:
1212
- master
13-
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp']
13+
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
1414
pull_request:
1515
types: [opened, synchronize, reopened]
16-
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp']
16+
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
1717

1818
env:
1919
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,9 @@ target_link_libraries(llama PRIVATE
464464
if (BUILD_SHARED_LIBS)
465465
set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
466466
target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD)
467+
if (LLAMA_METAL)
468+
set_target_prop B429 erties(llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
469+
endif()
467470
endif()
468471

469472
if (GGML_SOURCES_CUDA)

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
107107
# Usage AVX-only
108108
#CFLAGS += -mfma -mf16c -mavx
109109
#CXXFLAGS += -mfma -mf16c -mavx
110+
111+
# Usage SSSE3-only (Not is SSE3!)
112+
#CFLAGS += -mssse3
113+
#CXXFLAGS += -mssse3
110114
endif
111115

112116
ifneq ($(filter ppc64%,$(UNAME_M)),)
@@ -123,6 +127,7 @@ endif
123127

124128
ifndef LLAMA_NO_K_QUANTS
125129
CFLAGS += -DGGML_USE_K_QUANTS
130+
CXXFLAGS += -DGGML_USE_K_QUANTS
126131
OBJS += k_quants.o
127132
endif
128133

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ Building the program with BLAS support may lead to some performance improvements
308308

309309
- #### BLIS
310310

311-
Check [BLIS.md](BLIS.md) for more information.
311+
Check [BLIS.md](docs/BLIS.md) for more information.
312312

313313
- #### Intel MKL
314314

SHA256SUMS

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
700df0d3013b703a806d2ae7f1bfb8e59814e3d06ae78be0c66368a50059f33d models/7B/consolidated.00.pth
22
666a4bb533b303bdaf89e1b6a3b6f93535d868de31d903afdc20983dc526c847 models/7B/ggml-model-f16.bin
3-
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q4_0.bin
3+
ec2f2d1f0dfb73b72a4cbac7fa121abbe04c37ab327125a38248f930c0f09ddf models/7B/ggml-model-q4_0.bin
44
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q4_1.bin
55
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q5_0.bin
66
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q5_1.bin
77
7e89e242ddc0dd6f060b43ca219ce8b3e8f08959a72cb3c0855df8bb04d46265 models/7B/params.json
88
745bf4e29a4dd6f411e72976d92b452da1b49168a4f41c951cfcc8051823cf08 models/13B/consolidated.00.pth
99
d5ccbcc465c71c0de439a5aeffebe8344c68a519bce70bc7f9f92654ee567085 models/13B/consolidated.01.pth
1010
2b206e9b21fb1076f11cafc624e2af97c9e48ea09312a0962153acc20d45f808 models/13B/ggml-model-f16.bin
11-
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q4_0.bin
11+
fad169e6f0f575402cf75945961cb4a8ecd824ba4da6be2af831f320c4348fa5 models/13B/ggml-model-q4_0.bin
1212
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q4_1.bin
1313
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q5_0.bin
1414
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q5_1.bin
@@ -18,7 +18,7 @@ e23294a58552d8cdec5b7e8abb87993b97ea6eced4178ff2697c02472539d067 models/30B/con
1818
24a87f01028cbd3a12de551dcedb712346c0b5cbdeff1454e0ddf2df9b675378 models/30B/consolidated.02.pth
1919
1adfcef71420886119544949767f6a56cb6339b4d5fcde755d80fe68b49de93b models/30B/consolidated.03.pth
2020
7e1b524061a9f4b27c22a12d6d2a5bf13b8ebbea73e99f218809351ed9cf7d37 models/30B/ggml-model-f16.bin
21-
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q4_0.bin
21+
d2a441403944819492ec8c2002cc36fa38468149bfb4b7b4c52afc7bd9a7166d models/30B/ggml-model-q4_0.bin
2222
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q4_1.bin
2323
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q5_0.bin
2424
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q5_1.bin
@@ -32,7 +32,7 @@ a287c0dfe49081626567c7fe87f74cce5831f58e459b427b5e05567641f47b78 models/65B/con
3232
72b4eba67a1a3b18cb67a85b70f8f1640caae9b40033ea943fb166bd80a7b36b models/65B/consolidated.06.pth
3333
d27f5b0677d7ff129ceacd73fd461c4d06910ad7787cf217b249948c3f3bc638 models/65B/consolidated.07.pth
3434
60758f2384d74e423dffddfd020ffed9d3bb186ebc54506f9c4a787d0f5367b0 models/65B/ggml-model-f16.bin
35-
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q4_0.bin
35+
cde053439fa4910ae454407e2717cc46cc2c2b4995c00c93297a2b52e790fa92 models/65B/ggml-model-q4_0.bin
3636
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q4_1.bin
3737
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q5_0.bin
3838
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q5_1.bin

examples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ else()
3737
add_subdirectory(save-load-state)
3838
add_subdirectory(benchmark)
3939
add_subdirectory(baby-llama)
40+
add_subdirectory(train-text-from-scratch)
4041
if (LLAMA_METAL)
4142
add_subdirectory(metal)
4243
endif()

examples/baby-llama/baby-llama.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,34 +79,39 @@ struct ggml_tensor * randomize_tensor_normal(
7979
int ndims,
8080
const int64_t ne[],
8181
struct random_normal_distribution * rnd) {
82+
float scale = 1.0; // xavier
8283
switch (ndims) {
8384
case 1:
85+
scale /= sqrtf(ne[0]);
8486
for (int i0 = 0; i0 < ne[0]; i0++) {
85-
((float *)tensor->data)[i0] = frand_normal(rnd);
87+
((float *)tensor->data)[i0] = scale * frand_normal(rnd);
8688
}
8789
break;
8890
case 2:
91+
scale /= sqrtf(ne[0]+ne[1]);
8992
for (int i1 = 0; i1 < ne[1]; i1++) {
9093
for (int i0 = 0; i0 < ne[0]; i0++) {
91-
((float *)tensor->data)[i1*ne[0] + i0] = frand_normal(rnd);
94+
((float *)tensor->data)[i1*ne[0] + i0] = scale * frand_normal(rnd);
9295
}
9396
}
9497
break;
9598
case 3:
99+
scale /= sqrtf(ne[0]+ne[1]);
96100
for (int i2 = 0; i2 < ne[2]; i2++) {
97101
for (int i1 = 0; i1 < ne[1]; i1++) {
98102
for (int i0 = 0; i0 < ne[0]; i0++) {
99-
((float *)tensor->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand_normal(rnd);
103+
((float *)tensor->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = scale * frand_normal(rnd);
100104
}
101105
}
102106
}
103107
break;
104108
case 4:
109+
scale /= sqrtf(ne[0]+ne[1]);
105110
for (int i3 = 0; i3 < ne[3]; i3++) {
106111
for (int i2 = 0; i2 < ne[2]; i2++) {
107112
for (int i1 = 0; i1 < ne[1]; i1++) {
108113
for (int i0 = 0; i0 < ne[0]; i0++) {
109-
((float *)tensor->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand_normal(rnd);
114+
((float *)tensor->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = scale * frand_normal(rnd);
110115
}
111116
}
112117
}
@@ -148,8 +153,8 @@ struct llama_hparams_lora {
148153
uint32_t n_rot = 64;
149154
uint32_t n_lora = 64;
150155

151-
bool operator!=(const llama_hparams & other) const {
152-
return memcmp(this, &other, sizeof(llama_hparams));
156+
bool operator!=(const llama_hparams_lora & other) const {
157+
return memcmp(this, &other, sizeof(llama_hparams_lora)) != 0;
153158
}
154159
};
155160

examples/common.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
331331
}
332332
#else
333333
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n");
334+
#endif // GGML_USE_CUBLAS
335+
} else if (arg == "--low-vram" || arg == "-lv") {
336+
#ifdef GGML_USE_CUBLAS
337+
params.low_vram = true;
338+
#else
339+
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set lower vram usage.\n");
334340
#endif // GGML_USE_CUBLAS
335341
} else if (arg == "--no-mmap") {
336342
params.use_mmap = false;
@@ -479,6 +485,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
479485
fprintf(stderr, " -ts SPLIT --tensor-split SPLIT\n");
480486
fprintf(stderr, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
481487
fprintf(stderr, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n" );
488+
fprintf(stderr, " -lv, --low-vram don't allocate VRAM scratch buffer\n" );
482489
#endif
483490
fprintf(stderr, " --mtest compute maximum memory usage\n");
484491
fprintf(stderr, " --export export the computation graph to 'llama.ggml'\n");
@@ -528,6 +535,7 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) {
528535
lparams.n_gpu_layers = params.n_gpu_layers;
529536
lparams.main_gpu = params.main_gpu;
530537
memcpy(lparams.tensor_split, params.tensor_split, LLAMA_MAX_DEVICES*sizeof(float));
538+
lparams.low_vram = params.low_vram;
531539
lparams.seed = params.seed;
532540
lparams.f16_kv = params.memory_f16;
533541
lparams.use_mmap = params.use_mmap;
@@ -632,6 +640,9 @@ void console_set_color(console_state & con_st, console_color_t color) {
632640
case CONSOLE_COLOR_USER_INPUT:
633641
fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_GREEN);
634642
break;
643+
case CONSOLE_COLOR_ERROR:
644+
fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_RED);
645+
break;
635646
}
636647
con_st.color = color;
637648
fflush(con_st.out);

0 commit comments

Comments
 (0)
0