8000 WIP Feature/cpp by jmikedupont2 · Pull Request #4155 · ggml-org/llama.cpp · GitHub
[go: up one dir, main page]

Skip to content

WIP Feature/cpp #4155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
rebased and trimmed down
now compiling again,
  • Loading branch information
mike dupont committed Nov 21, 2023
commit ee9b0bceebfbbf0239d565cd714b43261e353cb3
81 changes: 58 additions & 23 deletions CMakeLists.txt
10000 10000 < 67F4 td class="blob-code blob-code-deletion js-file-line"> add_compile_options(-pg)
Original file line number Diff line number Diff line change
@@ -1,8 +1,34 @@
cmake_minimum_required(VERSION 3.13) # for add_link_options
project("llama.cpp" C CXX)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
if (NOT MSVC)
set(cuda_flags -Wno-pedantic)
endif()

set(LLAMA_CUBLAS ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(LLAMA_CUDA_F16 ON)
set(LLAMA_ACCELERATE ON)
set(LLAMA_K_QUANTS ON)

#-DLLAMA_NATIVE=off
set(LLAMA_AVX ON)
set(LLAMA_AVX2 OFF)
set(LLAMA_AVX512 OFF)
set(LLAMA_FMA OFF)
set(LLAMA_F16C OFF)
set(CMAKE_CUDA_FLAGS "--verbose") #
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
set(CUDACXX /usr/local/cuda-12.3/bin/nvcc)
set(CMAKE_CUDA_COMPILER /usr/local/cuda-12.3/bin/nvcc)
set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda-12.3)
#GGML_USE_CUBLAS

#set(CMAKE_EXE_LINKER_FLAGS -pg)
#set(CMAKE_SHARED_LINKER_FLAGS -pg)

set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE)

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
Expand Down Expand Up @@ -44,7 +70,7 @@ endif()

# general
option(LLAMA_STATIC "llama: static link libraries" OFF)
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
option(LLAMA_NATIVE "llama: enable -march=native flag" OFF)
option(LLAMA_LTO "llama: enable link time optimization" OFF)

# debug
Expand Down Expand Up @@ -77,9 +103,9 @@ endif()

# 3rd party libs
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
option(LLAMA_BLAS "llama: use BLAS" OFF)
option(LLAMA_BLAS "llama: use BLAS" ON)
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
option(LLAMA_CUBLAS "llama: use CUDA" OFF)
option(LLAMA_CUBLAS "llama: use CUDA" ON)
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF)
Expand All @@ -104,7 +130,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example"
# Compile flags
#

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)
Expand Down Expand Up @@ -230,7 +256,12 @@ if (LLAMA_BLAS)

message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
add_compile_options(${BLAS_LINKER_FLAGS})
add_compile_definitions(GGML_USE_OPENBLAS)

# from https://github.com/NVIDIA/cutlass
make_directory("${PROJECT_BINARY_DIR}/nvcc_tmp")
set(cuda_flags --keep "SHELL:--keep-dir ${PROJECT_BINARY_DIR}/nvcc_tmp" ${cuda_flags})

# add_compile_definitions(GGML_USE_OPENBLAS)
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
add_compile_definitions(GGML_BLAS_USE_MKL)
endif()
Expand Down Expand Up @@ -272,6 +303,7 @@ if (LLAMA_CUBLAS)
endif()
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})

if (DEFINED LLAMA_CUDA_DMMV_Y)
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
endif()
Expand Down Expand Up @@ -312,7 +344,7 @@ if (LLAMA_MPI)
if (MPI_C_FOUND)
message(STATUS "MPI found")
set(GGML_HEADERS_MPI ggml-mpi.h)
set(GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h)
set(GGML_SOURCES_MPI ggml-mpi.cpp ggml-mpi.h)
add_compile_definitions(GGML_USE_MPI)
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
if (NOT MSVC)
Expand Down Expand Up @@ -390,14 +422,15 @@ endif()

if (LLAMA_ALL_WARNINGS)
if (NOT MSVC)
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
# -Wpedantic
set(warning_flags -Wall -Wextra -Wcast-qual -Wno-unused-function)
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn -fpermissive)
set(host_cxx_flags "")

if (CMAKE_C_COMPILER_ID MATCHES "Clang")
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
set(host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi)
set(host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi -fpermissive)

if (
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
Expand All @@ -407,37 +440,37 @@ if (LLAMA_ALL_WARNINGS)
endif()
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
set(c_flags ${c_flags} -Wdouble-promotion)
set(host_cxx_flags ${host_cxx_flags} -Wno-array-bounds)
set(host_cxx_flags ${host_cxx_flags} -Wno-array-bounds -fpermissive)

if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
set(host_cxx_flags ${host_cxx_flags} -Wno-format-truncation)
set(host_cxx_flags ${host_cxx_flags} -Wno-format-truncation -fpermissive)
endif()
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
set(host_cxx_flags ${host_cxx_flags} -Wextra-semi)
set(host_cxx_flags ${host_cxx_flags} -Wextra-semi -fpermissive)
endif()
endif()
else()
# todo : msvc
endif()

set(c_flags ${c_flags} ${warning_flags})
set(cxx_flags ${cxx_flags} ${warning_flags})
set(c_flags ${c_flags} -save-temps --verbose ${warning_flags})
set(cxx_flags ${cxx_flags} -fpermissive -save-temps --verbose ${warning_flags})
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")

endif()

if (NOT MSVC)
set(cuda_flags -Wno-pedantic)
endif()
set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})

list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
if (NOT cuda_host_flags STREQUAL "")
set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
endif()

#
set(cuda_flags --verbose -G ${cuda_flags})

add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")

if (WIN32)
Expand Down Expand Up @@ -485,8 +518,10 @@ if (NOT MSVC)
add_link_options(-static-libgcc -static-libstdc++)
endif()
endif()
add_link_options("-Wl,-Map=${TARGET}.map")

if (LLAMA_GPROF)
add_compile_options(-pg)
endif()
endif()

Expand Down Expand Up @@ -645,13 +680,13 @@ if (GGML_USE_CPU_HBM)
endif()

add_library(ggml OBJECT
ggml.c
ggml.cpp
ggml.h
ggml-alloc.c
ggml-alloc.cpp
ggml-alloc.h
ggml-backend.c
ggml-backend.cpp
ggml-backend.h
ggml-quants.c
ggml-quants.cpp
ggml-quants.h
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
Expand Down
20 changes: 10 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ endif
# keep standard at C11 and C++11
MK_CPPFLAGS = -I. -Icommon
MK_CFLAGS = -std=c11 -fPIC
MK_CXXFLAGS = -std=c++11 -fPIC
MK_CXXFLAGS = -std=c++17 -fPIC -fpermissive

# -Ofast tends to produce faster code, but may not be available for some compilers.
ifdef LLAMA_FAST
Expand Down Expand Up @@ -502,7 +502,7 @@ ggml-metal.o: ggml-metal.m ggml-metal.h
endif # LLAMA_METAL

ifdef LLAMA_MPI
ggml-mpi.o: ggml-mpi.c ggml-mpi.h
ggml-mpi.o: ggml-mpi.cpp ggml-mpi.h
$(CC) $(CFLAGS) -c $< -o $@
endif # LLAMA_MPI

Expand Down Expand Up @@ -537,17 +537,17 @@ $(info )
# Build library
#

ggml.o: ggml.c ggml.h ggml-cuda.h
$(CC) $(CFLAGS) -c $< -o $@
ggml.o: ggml.cpp ggml.h ggml-cuda.h
$(CXX) $(CXXFLAGS) -c $< -o $@

ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
$(CC) $(CFLAGS) -c $< -o $@
ggml-alloc.o: ggml-alloc.cpp ggml.h ggml-alloc.h
$(CXX) $(CXXFLAGS) -c $< -o $@

ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
$(CC) $(CFLAGS) -c $< -o $@
ggml-backend.o: ggml-backend.cpp ggml.h ggml-backend.h
$(CXX) $(CXXFLAGS) -c $< -o $@

ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
$(CC) $(CFLAGS) -c $< -o $@
ggml-quants.o: ggml-quants.cpp ggml.h ggml-quants.h
$(CXX) $(CXXFLAGS) -c $< -o $@

OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ PROMPT_TEMPLATE=./prompts/chat-with-bob.txt PROMPT_CACHE_FILE=bob.prompt.bin \

The `grammars/` folder contains a handful of sample grammars. To write your own, check out the [GBNF Guide](./grammars/README.md).

For authoring more complex JSON grammars, you can also check out https://grammar.intrinsiclabs.ai/, a browser app that lets you write TypeScript interfaces which it compiles to GBNF grammars that you can save for local use. Note that the app is built and maintained by members of the community, please file any issues or FRs on [its repo](http://github.com/intrinsiclabsai/gbnfgen) and not this one.
For authoring more complex JSON grammars, you can also check out https://grammar.intrinsiclabs.ai/, a browser app that lets ygou write TypeScript interfaces which it compiles to GBNF grammars that you can save for local use. Note that the app is built and maintained by members of the community, please file any issues or FRs on [its repo](http://github.com/intrinsiclabsai/gbnfgen) and not this one.

### Instruction mode with Alpaca

Expand Down
Loading
0