[XPU] change XPU EP interface from xDeepEP to paddle (#5706)

* add ENV VAR to controll low lantency buffer
This commit is contained in:
zccjjj
2026-01-21 18:23:45 +08:00
committed by GitHub
parent 490a6551dc
commit 14a64e9b3b
12 changed files with 76 additions and 93 deletions
+1 -1
View File
@@ -1105,7 +1105,7 @@ class XPUModelRunner(ModelRunnerBase):
self.forward_meta.kv_signal_sender = self.share_inputs["kv_signal_sender"]
if (
self.fd_config.scheduler_config.splitwise_role == "mixed"
self.fd_config.scheduler_config.splitwise_role == "mixed" and envs.FD_XPU_ENABLE_MIXED_EP_MODE
): # Centralized scenario: the phase is initialized as "prefill" by default. During inference runtime, different types of batches can achieve phase switching at this point.
if_only_decode = self.only_decode()
self.fd_config.model_config.moe_phase.phase = "decode" if if_only_decode else "prefill"