@@ -401,7 +401,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
401
401
# LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
402
402
# LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
403
403
# LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
404
- # LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
404
+ # // LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
405
405
# // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
406
406
# // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
407
407
# LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
@@ -430,14 +430,16 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
430
430
# LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
431
431
# LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors
432
432
# LLAMA_FTYPE_MOSTLY_BF16 = 32, // except 1d tensors
433
-
433
+ # LLAMA_FTYPE_MOSTLY_Q4_0_4_4 = 33, // except 1d tensors
434
+ # LLAMA_FTYPE_MOSTLY_Q4_0_4_8 = 34, // except 1d tensors
435
+ # LLAMA_FTYPE_MOSTLY_Q4_0_8_8 = 35, // except 1d tensors
436
+ #
434
437
# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
435
438
# };
436
439
LLAMA_FTYPE_ALL_F32 = 0
437
440
LLAMA_FTYPE_MOSTLY_F16 = 1
438
441
LLAMA_FTYPE_MOSTLY_Q4_0 = 2
439
442
LLAMA_FTYPE_MOSTLY_Q4_1 = 3
440
- LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4
441
443
LLAMA_FTYPE_MOSTLY_Q8_0 = 7
442
444
LLAMA_FTYPE_MOSTLY_Q5_0 = 8
443
445
LLAMA_FTYPE_MOSTLY_Q5_1 = 9
@@ -464,6 +466,9 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
464
466
LLAMA_FTYPE_MOSTLY_IQ4_XS = 30
465
467
LLAMA_FTYPE_MOSTLY_IQ1_M = 31
466
468
LLAMA_FTYPE_MOSTLY_BF16 = 32
469
+ LLAMA_FTYPE_MOSTLY_Q4_0_4_4 = 33
470
+ LLAMA_FTYPE_MOSTLY_Q4_0_4_8 = 34
471
+ LLAMA_FTYPE_MOSTLY_Q4_0_8_8 = 35
467
472
LLAMA_FTYPE_GUESSED = 1024
468
473
469
474
# enum llama_rope_scaling_type {
@@ -1100,6 +1105,12 @@ class llama_chat_message(ctypes.Structure):
1100
1105
]
1101
1106
1102
1107
1108
+ # // lora adapter
1109
+ # struct llama_lora_adapter;
1110
+ llama_lora_adapter_p = ctypes .c_void_p
1111
+ llama_lora_adapter_p_ctypes = ctypes .POINTER (ctypes .c_void_p )
1112
+
1113
+
1103
1114
# // Helpers for getting default parameters
1104
1115
# LLAMA_API struct llama_model_params llama_model_default_params(void);
1105
1116
@ctypes_function (
@@ -1507,43 +1518,72 @@ def llama_model_quantize(
1507
1518
...
1508
1519
1509
1520
1510
- # // Apply a LoRA adapter to a loaded model
1511
- # // path_base_model is the path to a higher quality model to use as a base for
1512
- # // the layers modified by the adapter. Can be NULL to use the current loaded model.
1513
- # // The model needs to be reloaded before applying a new adapter, otherwise the adapter
1514
- # // will be applied on top of the previous one
1515
- # // Returns 0 on success
1516
- # LLAMA_API int32_t llama_model_apply_lora_from_file(
1517
- # const struct llama_model * model,
1518
- # const char * path_lora,
1519
- # float scale,
1520
- # const char * path_base_model,
1521
- # int32_t n_threads);
1522
- @ctypes_function (
1523
- "llama_model_apply_lora_from_file" ,
1524
- [
1525
- llama_model_p_ctypes ,
1526
- ctypes .c_char_p ,
1527
- ctypes .c_float ,
1528
- ctypes .c_char_p ,
1529
- ctypes .c_int32 ,
1530
- ],
1521
+ # // Load a LoRA adapter from file
1522
+ # // The loaded adapter will be associated to the given model, and will be free when the model is deleted
1523
+ # LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
1524
+ # struct llama_model * model,
1525
+ # const char * path_lora);
1526
+ @ctypes_function (
1527
+ "llama_lora_adapter_init" ,
1528
+ [llama_model_p_ctypes , ctypes .c_char_p ],
1529
+ llama_lora_adapter_p_ctypes ,
1530
+ )
1531
+ def llama_lora_adapter_init (
1532
+ model : llama_model_p , path_lora : bytes , /
1533
+ ) -> Optional [llama_lora_adapter_p ]:
1534
+ """Load a LoRA adapter from file
1535
+ The loaded adapter will be associated to the given model, and will be free when the model is deleted"""
1536
+ ...
1537
+
1538
+
1539
+ # // Add a loaded LoRA adapter to given context
1540
+ # // This will not modify model's weight
1541
+ # LLAMA_API int32_t llama_lora_adapter_set(
1542
+ # struct llama_context * ctx,
1543
+ # struct llama_lora_adapter * adapter,
1544
+ # float scale);
1545
+ @ctypes_function (
1546
+ "llama_lora_adapter_set" ,
1547
+ [llama_context_p_ctypes , llama_lora_adapter_p_ctypes , ctypes .c_float ],
1531
1548
ctypes .c_int32 ,
1532
1549
)
1533
- def llama_model_apply_lora_from_file (
1534
- model : llama_model_p ,
1535
- path_lora : Union [ctypes .c_char_p , bytes ],
1536
- scale : Union [ctypes .c_float , float ],
1537
- path_base_model : Union [ctypes .c_char_p , bytes , None ],
1538
- n_threads : Union [ctypes .c_int32 , int ],
1539
- / ,
1550
+ def llama_lora_adapter_set (
1551
+ ctx : llama_context_p , adapter : llama_lora_adapter_p , scale : float , /
1552
+ ) -> int :
1553
+ """Add a loaded LoRA adapter to given context
1554
+ This will not modify model's weight"""
1555
+ ...
1556
+
1557
+
1558
+ # // Remove a LoRA adapter from given context
1559
+ # // Return -1 if the adapter is not present in the context
1560
+ # LLAMA_API int32_t llama_lora_adapter_remove(
1561
+ # struct llama_context * ctx,
1562
+ # struct llama_lora_adapter * adapter);
1563
+ @ctypes_function (
1564
+ "llama_lora_adapter_remove" ,
1565
+ [llama_context_p_ctypes , llama_lora_adapter_p_ctypes ],
1566
+ ctypes .c_int32 ,
1567
+ )
1568
+ def llama_lora_adapter_remove (
1569
+ ctx : llama_context_p , adapter : llama_lora_adapter_p , /
1540
1570
) -> int :
1541
- """Apply a LoRA adapter to a loaded model
1542
- path_base_model is the path to a higher quality model to use as a base for
1543
- the layers modified by the adapter. Can be NULL to use the current loaded model.
1544
- The model needs to be reloaded before applying a new adapter, otherwise the adapter
1545
- will be applied on top of the previous one
1546
- Returns 0 on success"""
1571
+ """Remove a LoRA adapter from given context
1572
+ Return -1 if the adapter is not present in the context"""
1573
+ ...
1574
+
1575
+
1576
+ # // Manually free a LoRA adapter
1577
+ # // Note: loaded adapters will be free when the associated model is deleted
1578
+ # LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
1579
+ @ctypes_function (
1580
+ "llama_lora_adapter_free" ,
1581
+ [llama_lora_adapter_p_ctypes ],
1582
+ None ,
1583
+ )
1584
+ def llama_lora_adapter_free (adapter : llama_lora_adapter_p , / ):
1585
+ """Manually free a LoRA adapter
1586
+ Note: loaded adapters will be free when the associated model is deleted"""
1547
1587
...
1548
1588
1549
1589
0 commit comments