[Optimization][DeepSeekV3.2]Reducing slot_mapping compute frequency from twice per layer to a single pre-processing step. (#7367)

This commit is contained in:
ShaneGZhu
2026-04-16 19:54:12 +08:00
committed by GitHub
parent d2d633b05c
commit 2d8338f9e4
10 changed files with 73 additions and 146 deletions
@@ -59,6 +59,7 @@ def create_mock_config():
scheduler_config = Mock(spec=SchedulerConfig)
scheduler_config.max_num_seqs = 10
scheduler_config.max_num_batched_tokens = 2048
speculative_config = Mock(spec=SpeculativeConfig)
speculative_config.method = None