8000 random pad input ids (#3530) · InternLM/lmdeploy@20a4272 · GitHub
[go: up one dir, main page]

Skip to content

Commit 20a4272

Browse files
authored
random pad input ids (#3530)
1 parent 50b0ef7 commit 20a4272

File tree

6 files changed

+20
-6
lines changed

6 files changed

+20
-6
lines changed

lmdeploy/pytorch/backends/cuda/graph_runner.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,12 @@ def __init__(
4343
num_blocks: int,
4444
is_decoding: bool,
4545
pool: Tuple[int, int],
46+
model_config: ModelConfig,
4647
device: torch.device,
4748
):
4849
self.model = model
4950
self.ctx_mgr = model.ctx_mgr
51+
self.model_config = model_config
5052

5153
self.meta = CudaGraphMeta(
5254
max_batchs=max_batches,
@@ -56,6 +58,7 @@ def __init__(
5658
device=device,
5759
input_buffers=dict(),
5860
output_buffers=dict(),
61+
vocab_size=self.model_config.vocab_size,
5962
)
6063
self.device = device
6164
self.max_batches = max_batches
@@ -171,6 +174,7 @@ def __call__(self, **kwargs):
171174
num_blocks=self.num_blocks,
172175
is_decoding=is_decoding,
173176
pool=self.graph_pool_handle,
177+
model_config=self.model_config,
174178
device=self.device)
175179
runner.capture(**kwargs)
176180
self._runner_map[graph_key] = runner

lmdeploy/pytorch/engine/engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -835,7 +835,7 @@ def __make_dummy_inputs():
835835
num_loops = 1 if prefill else prefill_interval
836836
return dict(
837837
running=[],
838-
inputs=ModelInputs.make_dummy(1, is_decoding=not prefill),
838+
inputs=ModelInputs.make_dummy(1, is_decoding=not prefill, vocab_size=self.model_config.vocab_size),
839839
swap_in_map=dict(),
840840
swap_out_map=dict(),
841841
loop_count=num_loops,

lmdeploy/pytorch/engine/model_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ async def __long_context_single_forward(new_inputs, max_seqlen: int):
301301

302302
# compute dummy loop
303303
if dummy_loop > 0:
304-
dummy_inputs = ModelInputs.make_dummy(1, False, 'cuda')
304+
dummy_inputs = ModelInputs.make_dummy(1, False, 'cuda', vocab_size=self.model_config.vocab_size)
305305
for _ in range(dummy_loop):
306306
await __forward(dummy_inputs)
307307

lmdeploy/pytorch/model_inputs.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,9 +285,14 @@ def build_dp_meta(self):
285285
self.dp_meta = DPMeta.build(self.input_ids.numel())
286286

287287
@classmethod
288-
def make_dummy(cls, batch_size: int, is_decoding: bool, device: str = 'cpu', dummy_block_id: int = 0):
288+
def make_dummy(cls,
289+
batch_size: int,
290+
is_decoding: bool,
291+
device: str = 'cpu',
292+
dummy_block_id: int = 0,
293+
vocab_size: int = 1):
289294
"""make dummy inputs."""
290-
input_ids = torch.zeros((
295+
input_ids = torch.randint(0, vocab_size, (
291296
1,
292297
batch_size,
293298
), dtype=torch.long, device=device)

lmdeploy/pytorch/models/utils/cudagraph.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class CudaGraphMeta:
3333
device: torch.device
3434
input_buffers: BuffType = None
3535
output_buffers: BuffType = None
36+
vocab_size: int = 1
3637

3738

3839
class CudaGraphMixin:
@@ -58,7 +59,10 @@ def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, *args, **kwargs) ->
5859
device = graph_meta.device
5960

6061
input_buffers: BuffType = dict()
61-
input_buffers['input_ids'] = torch.zeros(1, max_tokens, dtype=torch.int64, device=device)
62+
input_buffers['input_ids'] = torch.randint(0,
63+
graph_meta.vocab_size, (1, max_tokens),
64+
dtype=torch.int64,
65+
device=device)
6266
input_buffers['position_ids'] = torch.zeros((1, max_tokens), dtype=torch.int64, device=device)
6367
if getattr(self.config, 'use_flash_mla', False) is True:
6468
import flash_mla_cuda
@@ -96,6 +100,7 @@ def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, input_ids: Tensor, p
96100
num_tokens = input_ids.size(-1)
97101

98102
# fill buffer
103+
input_buffers['input_ids'].random_(0, graph_meta.vocab_size)
99104
input_buffers['input_ids'][:, :num_tokens] = input_ids
100105
input_buffers['position_ids'][:, :num_tokens] = position_ids
101106
input_buffers['block_offsets'][:batch_size, :num_blocks] = block_offsets

lmdeploy/pytorch/nn/linear.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1675,7 +1675,7 @@ def build_qkv_proj(in_features: int,
16751675
device: Optional[torch.device] = None,
16761676
is_tp: bool = True,
16771677
num_replicate_kv_heads: int = 1,
1678-
dp_disable_tp: bool = False,
1678+
dp_disable_tp: bool = True,
16791679
all_reduce: bool = False,
16801680
dp_gather: bool = False):
16811681
"""build qkv proj."""

0 commit comments

Comments
 (0)
0