[Optimization][DeepSeekV3.2]Reducing slot_mapping compute frequency from twice per layer to a single pre-processing step. (#7367)

This commit is contained in:
ShaneGZhu
2026-04-16 19:54:12 +08:00
committed by GitHub
parent d2d633b05c
commit 2d8338f9e4
10 changed files with 73 additions and 146 deletions
+1
View File
@@ -85,6 +85,7 @@ class MockFDConfig:
name = "default"
splitwise_role = "mixed"
max_num_seqs = 2
max_num_batched_tokens = 2048
parallel_config = ParallelConfig()
scheduler_config = SchedulerConfig()