mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimization][DeepSeekV3.2]Reducing slot_mapping compute frequency from twice per layer to a single pre-processing step. (#7367)
This commit is contained in:
@@ -59,6 +59,7 @@ def create_mock_config():
|
||||
|
||||
scheduler_config = Mock(spec=SchedulerConfig)
|
||||
scheduler_config.max_num_seqs = 10
|
||||
scheduler_config.max_num_batched_tokens = 2048
|
||||
|
||||
speculative_config = Mock(spec=SpeculativeConfig)
|
||||
speculative_config.method = None
|
||||
|
||||
Reference in New Issue
Block a user