[Feature] Support mtp overlap schedule (#7001)

This commit is contained in:
sunxin
2026-04-01 14:24:26 +08:00
committed by GitHub
parent c6f0c5c3a6
commit c29e86fc9d
23 changed files with 215 additions and 138 deletions
+1 -1
View File
@@ -573,7 +573,7 @@ class EngineArgs:
self.enable_prefix_caching = False
if (
not current_platform.is_cuda()
or self.speculative_config is not None
or (self.speculative_config is not None and self.enable_logprob)
or self.splitwise_role == "prefill"
or self.dynamic_load_weight
):