mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
fix pfcc deep ep in low latency mode (#6440)
This commit is contained in:
@@ -196,10 +196,13 @@ class DeepEPBuffer:
|
||||
def _create_low_latency_buffer(self):
|
||||
if self.deepep_buffer is None:
|
||||
assert self.num_experts % self.ep_size == 0
|
||||
if self.ep_size // 8 > 1:
|
||||
num_qps_per_rank_now = self.ep_size // 8
|
||||
if envs.FD_USE_PFCC_DEEP_EP:
|
||||
num_qps_per_rank_now = self.num_experts // self.ep_size
|
||||
else:
|
||||
num_qps_per_rank_now = 1
|
||||
if self.ep_size // 8 > 1:
|
||||
num_qps_per_rank_now = self.ep_size // 8
|
||||
else:
|
||||
num_qps_per_rank_now = 1
|
||||
self.deepep_buffer = deep_ep.Buffer(
|
||||
self.group,
|
||||
self.num_nvl_bytes,
|
||||
|
||||
Reference in New Issue
Block a user