[Others] enable use PFCC deep_ep (#5822)

* upstream deep_ep

* fix bug

* fix bug

* modify env name
This commit is contained in:
ming1753
2026-01-05 18:07:01 +08:00
committed by GitHub
parent 8d384f9fd8
commit f50e1bcc16
2 changed files with 43 additions and 18 deletions
+41 -18
View File
@@ -20,8 +20,14 @@ import paddle
from paddle import nn
from paddleformers.utils.log import logger
from fastdeploy import envs
try:
from paddle.distributed.communication import deep_ep
if envs.FD_USE_PFCC_DEEP_EP:
paddle.compat.enable_torch_proxy(scope={"deep_ep"}) # Enable torch proxy before importing deep_ep
import deep_ep
else:
from paddle.distributed.communication import deep_ep
except:
logger.warning("import deep_ep Failed!")
@@ -280,23 +286,40 @@ class DeepEPEngine:
if self.deepep_engine is None:
raise RuntimeError("DeepEP buffer not initialized!")
(
packed_recv_x,
recv_expert_count,
handle,
_,
dispatch_hook,
) = self.deepep_engine.low_latency_dispatch(
hidden_states,
topk_idx,
expertwise_scale,
self.buffer.num_max_dispatch_tokens_per_rank,
self.num_experts,
use_fp8=use_fp8,
async_finish=False,
return_recv_hook=True,
num_per_channel=quant_group_size,
)
if envs.FD_USE_PFCC_DEEP_EP:
(
packed_recv_x,
recv_expert_count,
handle,
_,
dispatch_hook,
) = self.deepep_engine.low_latency_dispatch(
hidden_states,
topk_idx,
self.buffer.num_max_dispatch_tokens_per_rank,
self.num_experts,
use_fp8=use_fp8,
async_finish=False,
return_recv_hook=True,
)
else:
(
packed_recv_x,
recv_expert_count,
handle,
_,
dispatch_hook,
) = self.deepep_engine.low_latency_dispatch(
hidden_states,
topk_idx,
expertwise_scale,
self.buffer.num_max_dispatch_tokens_per_rank,
self.num_experts,
use_fp8=use_fp8,
async_finish=False,
return_recv_hook=True,
num_per_channel=quant_group_size,
)
return packed_recv_x, recv_expert_count, handle, dispatch_hook