mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[XPU] change XPU EP interface from xDeepEP to paddle (#5706)
* add ENV VAR to controll low lantency buffer
This commit is contained in:
@@ -1105,7 +1105,7 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
self.forward_meta.kv_signal_sender = self.share_inputs["kv_signal_sender"]
|
||||
|
||||
if (
|
||||
self.fd_config.scheduler_config.splitwise_role == "mixed"
|
||||
self.fd_config.scheduler_config.splitwise_role == "mixed" and envs.FD_XPU_ENABLE_MIXED_EP_MODE
|
||||
): # Centralized scenario: the phase is initialized as "prefill" by default. During inference runtime, different types of batches can achieve phase switching at this point.
|
||||
if_only_decode = self.only_decode()
|
||||
self.fd_config.model_config.moe_phase.phase = "decode" if if_only_decode else "prefill"
|
||||
|
||||
Reference in New Issue
Block a user