enable reorder_for_compute_comm_overlap

caikun-pjlab · caikun-pjlab · commit 0462005de22e · 2025-04-03T11:19:30.000+08:00
diff --git a/lmdeploy/pytorch/models/internvl.py b/lmdeploy/pytorch/models/internvl.py
@@ -383,6 +383,7 @@ def forward(
     ):
         if inputs_embeds is None and pixel_values is not None:
             # extract feature
+            torch._inductor.config.reorder_for_compute_comm_overlap = True
             torch._dynamo.mark_dynamic(pixel_values, 0)
             vit_embeds = self.extract_feature(pixel_values)
             lang_embeds = self.language_model.get_input_embeddings()(input_ids)
diff --git a/requirements/runtime_cuda.txt b/requirements/runtime_cuda.txt
@@ -16,8 +16,8 @@ safetensors
 sentencepiece
 shortuuid
 tiktoken
-torch<=2.5.1,>=2.0.0
-torchvision<=0.20.1,>=0.15.0
+torch<=2.6.0,>=2.0.0
+torchvision<=0.21.0,>=0.15.0
 transformers
-triton<=3.1.0,>=3.0.0; sys_platform == "linux"
+triton<=3.2.0,>=3.0.0; sys_platform == "linux"
 uvicorn