mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Speculative Decoding] Fix multistep MTP in splitewise-prefill mode (#5723)
This commit is contained in:
@@ -1789,6 +1789,12 @@ class FDConfig:
|
||||
# It will hang when real batch_size < tp_size
|
||||
self.graph_opt_config.filter_capture_size(tp_size=self.parallel_config.tensor_parallel_size)
|
||||
|
||||
# adjust speculative config
|
||||
if self.speculative_config is not None and self.speculative_config.method == "mtp":
|
||||
if self.scheduler_config.splitwise_role == "prefill":
|
||||
self.speculative_config.num_speculative_tokens = 1
|
||||
self.speculative_config.num_model_steps = 1
|
||||
|
||||
if self.scheduler_config.splitwise_role == "mixed":
|
||||
self._disable_sequence_parallel_moe_if_needed("Mixed")
|
||||
self.model_config.moe_phase = MoEPhase(phase="prefill")
|
||||
|
||||
Reference in New Issue
Block a user