10000 pyright-based changes for tools/mtmd/legacy-models/convert_image_enc… · robbiemu/llama.cpp@c278aff · GitHub
[go: up one dir, main page]

Skip to content

Commit c278aff

Browse files
committed
pyright-based changes for tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py
Fix torch 2.5.1 / numpy 2.x compatibility in convert_image_encoder_to_gguf.py - Updated Tensor-to-array conversions to use `np.asarray(..., dtype=...)` per NumPy 2.x migration rules (avoids copy error on float16). - Used explicit typing and `cast(...)` to guide Pyright/Pylance under torch 2.5.1: - Annotated `model` as PreTrainedModel. - Re-cast `model.vision_model` to `CLIPVisionTransformer` to safely access `.encoder.layers`. - Replaced slice assignment with `__init__` to reset ModuleList contents. - Verified compatibility by converting `openai/clip-vit-base-patch32` using `--clip-model-is-openclip`.
1 parent ef77f28 commit c278aff

File tree

1 file changed

+18
-9
lines changed

1 file changed

+18
-9
lines changed

tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
import torch
77
import numpy as np
88
from gguf import *
9+
from typing import cast
10+
from torch.nn import ModuleList
11+
from transformers.models.clip.modeling_clip import CLIPVisionTransformer
12+
from transformers import PreTrainedModel
913
from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel, SiglipVisionModel
1014

1115
TEXT = "clip.text"
@@ -162,13 +166,13 @@ def bytes_to_unicode():
162166
ftype = 0
163167

164168
if args.clip_model_is_siglip:
165-
model = SiglipVisionModel.from_pretrained(dir_model)
169+
model: PreTrainedModel = SiglipVisionModel.from_pretrained(dir_model)
166170
processor = None
167171
elif args.clip_model_is_vision or args.clip_model_is_openclip:
168-
model = CLIPVisionModel.from_pretrained(dir_model)
172+
model: PreTrainedModel = CLIPVisionModel.from_pretrained(dir_model)
169173
processor = None
170174
else:
171-
model = CLIPModel.from_pretrained(dir_model)
175+
model: PreTrainedModel = CLIPModel.from_pretrained(dir_model)
172176
processor = CLIPProcessor.from_pretrained(dir_model)
173177

174178
fname_middle = None
@@ -350,9 +354,14 @@ def get_non_negative_vision_feature_layers(v_hparams):
350354
# By default, we drop the last layer for llava projector
351355
# models unless we have explicitly set vision feature layers
352356
if feature_layers is None:
353-
model.vision_model.encoder.layers.pop(-1)
357+
vision_model = cast(CLIPVisionTransformer, model.vision_model)
358+
encoder_layers = vision_model.encoder.layers
359+
encoder_layers.pop(-1)
354360
else:
355-
model.vision_model.encoder.layers = model.vision_model.encoder.layers[:max(feature_layers)]
361+
vision_model = cast(CLIPVisionTransformer, model.vision_model)
362+
encoder_layers = vision_model.encoder.layers
363+
encoder_layers = cast(ModuleList, encoder_layers)
364+
encoder_layers.__init__(encoder_layers[:max(feature_layers)])
356365

357366
projector = torch.load(args.llava_projector)
358367
for name, data in projector.items():
@@ -375,24 +384,24 @@ def get_non_negative_vision_feature_layers(v_hparams):
375384
continue
376385

377386
name = get_tensor_name(name)
378-
data = data.squeeze().numpy()
387+
data = np.ascontiguousarray(data.detach().cpu().squeeze().numpy())
379388

380389
n_dims = len(data.shape)
381390

382391
# ftype == 0 -> float32, ftype == 1 -> float16
383392
ftype_cur = 0
384393
if n_dims == 4:
385394
print(f"tensor {name} is always saved in f16")
386-
data = data.astype(np.float16)
395+
data = np.asarray(data, dtype=np.float16)
387396
ftype_cur = 1
388397
elif ftype == 1:
389398
if name[-7:] == ".weight" and n_dims == 2:
390399
print(" Converting to float16")
391-
data = data.astype(np.float16)
400+
data = np.asarray(data, dtype=np.float16)
392401
ftype_cur = 1
393402
else:
394403
print(" Converting to float32")
395-
data = data.astype(np.float32)
404+
data = np.asarray(data, dtype=np.float32)
396405
ftype_cur = 0
397406
else:
398407
if data.dtype != np.float32:

0 commit comments

Comments
 (0)
0