E567 basic integration of PowerInfer · hodlen/llama-cpp-python@e83c965 · GitHub
[go: up one dir, main page]

Skip to content

Commit e83c965

Browse files
committed
basic integration of PowerInfer
1 parent f2901d8 commit e83c965

File tree

6 files changed

+35
-26
lines changed
  • < 10BC0 div style="width:100%;display:flex">
  • examples/high_level_api
  • llama_cpp
  • vendor
  • 6 files changed

    +35
    -26
    lines changed

    .gitignore

    Lines changed: 2 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -176,3 +176,5 @@ cython_debug/
    176176

    177177
    # downloaded model .bin files
    178178
    docker/open_llama/*.bin
    179+
    180+
    /.venv/**

    .gitmodules

    Lines changed: 3 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1,3 +1,6 @@
    11
    [submodule "vendor/llama.cpp"]
    22
    path = vendor/llama.cpp
    33
    url = https://github.com/ggerganov/llama.cpp.git
    4+
    [submodule "vendor/PowerInfer"]
    5+
    path = vendor/PowerInfer
    6+
    url = https://github.com/SJTU-IPADS/PowerInfer.git

    CMakeLists.txt

    Lines changed: 25 additions & 25 deletions
    Original file line numberDiff line numberDiff line change
    @@ -16,7 +16,7 @@ if (LLAMA_BUILD)
    1616
    set(LLAMA_FMA "Off" CACHE BOOL "llama: enable FMA" FORCE)
    1717
    set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE)
    1818
    endif()
    19-
    add_subdirectory(vendor/llama.cpp)
    19+
    add_subdirectory(vendor/PowerInfer)
    2020
    install(
    2121
    TARGETS llama
    2222
    LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    @@ -44,28 +44,28 @@ if (LLAMA_BUILD)
    4444
    DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    4545
    )
    4646

    47-
    # Building llava
    48-
    add_subdirectory(vendor/llama.cpp/examples/llava)
    49-
    set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
    50-
    # Set CUDA_ARCHITECTURES to OFF on windows
    51-
    if (WIN32)
    52-
    set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
    53-
    endif()
    54-
    install(
    55-
    TARGETS llava_shared
    56-
    LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    57-
    RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    58-
    ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    59-
    FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    60-
    RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    61-
    )
    62-
    # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
    63-
    install(
    64-
    TARGETS llava_shared
    65-
    LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    66-
    RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    67-
    ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    68-
    FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    69-
    RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    70-
    )
    47+
    # # Building llava
    48+
    # add_subdirectory(vendor/llama.cpp/examples/llava)
    49+
    # set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
    50+
    # # Set CUDA_ARCHITECTURES to OFF on windows
    51+
    # if (WIN32)
    52+
    # set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
    53+
    # endif()
    54+
    # install(
    55+
    # TARGETS llava_shared
    56+
    # LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    57+
    # RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    58+
    # ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    59+
    # FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    60+
    # RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
    61+
    # )
    62+
    # # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
    63+
    # install(
    64+
    # TARGETS llava_shared
    65+
    # LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    66+
    # RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    67+
    # ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    68+
    # FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    69+
    # RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
    70+
    # )
    7171
    endif()

    examples/high_level_api/high_level_api_inference.py

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -11,7 +11,7 @@
    1111

    1212
    output = llm(
    1313
    "Question: What are the names of the planets in the solar system? Answer: ",
    14-
    max_tokens=48,
    14+
    max_tokens=512,
    1515
    stop=["Q:", "\n"],
    1616
    echo=True,
    1717
    )

    llama_cpp/llama_cpp.py

    Lines changed: 3 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -305,12 +305,15 @@ class llama_model_params(Structure):
    305305
    _fields_ = [
    306306
    ("n_gpu_layers", c_int32),
    307307
    ("main_gpu", c_int32),
    308+
    ("vram_budget_gb", c_float),
    308309
    ("tensor_split", c_float_p),
    309310
    ("progress_callback", llama_progress_callback),
    310311
    ("progress_callback_user_data", c_void_p),
    311312
    ("vocab_only", c_bool),
    312313
    ("use_mmap", c_bool),
    313314
    ("use_mlock", c_bool),
    315+
    ("reset_gpu_index", c_bool),
    316+
    ("disable_gpu_index", c_bool),
    314317
    ]
    315318

    316319

    vendor/PowerInfer

    Submodule PowerInfer added at 9d72668

    0 commit comments

    Comments
     (0)
    0