[Optimization][DeepSeekV3.2]Reducing slot_mapping compute frequency from twice per layer to a single pre-processing step. (#7367)

2026-04-23 00:17:25 +08:00 · 2026-04-16 19:54:12 +08:00
parent d2d633b05c
commit 2d8338f9e4
10 changed files with 73 additions and 146 deletions
@@ -59,6 +59,7 @@ def create_mock_config():

    scheduler_config = Mock(spec=SchedulerConfig)
    scheduler_config.max_num_seqs = 10
+    scheduler_config.max_num_batched_tokens = 2048

    speculative_config = Mock(spec=SpeculativeConfig)
    speculative_config.method = None