[Optimization][DeepSeekV3.2]Reducing slot_mapping compute frequency from twice per layer to a single pre-processing step. (#7367)

2026-04-23 00:17:25 +08:00 · 2026-04-16 19:54:12 +08:00
parent d2d633b05c
commit 2d8338f9e4
10 changed files with 73 additions and 146 deletions
@@ -85,6 +85,7 @@ class MockFDConfig:
        name = "default"
        splitwise_role = "mixed"
        max_num_seqs = 2
+        max_num_batched_tokens = 2048

    parallel_config = ParallelConfig()
    scheduler_config = SchedulerConfig()