[Fix] Fix eplb bug and support fp8 load weight (#5178)

* fix eplb part2

* fix eplb part2

* fix eplb part2
This commit is contained in:
xiaoxiaohehe001
2025-11-24 15:31:37 +08:00
committed by GitHub
parent f5c1066245
commit 95f3c8c641
5 changed files with 27 additions and 8 deletions
+7 -5
View File
@@ -21,7 +21,7 @@ class RedundantExpertManager:
RedundantExpertManger
"""
def __init__(self, rank=0, ep_size=32, fd_config=None):
def __init__(self, rank=0, ep_size=64, fd_config=None):
self.logger = get_logger("eplb_expert_manager", "eplb_{0}.log".format(rank))
self.rank = rank
@@ -101,7 +101,7 @@ class RedundantExpertManager:
self.http_timeout = 1
# 重置重排状态: 'done' -> 'free'
self.rearrange_end_ts = 0
self.rearrange_reset_interval = 300
self.rearrange_reset_interval = 30
self.tensor_infos = None
@@ -250,8 +250,8 @@ class RedundantExpertManager:
eplb_strategy = self.eplb_config.redundant_expert_eplb_strategy
if is_init:
num_groups = 1
num_nodes = 2
num_gpus = 2 * 8
num_nodes = 8
num_gpus = 8 * 8
eplb_strategy = ""
# eplb
rank_expert_list, logical_to_physical_map, expert_count = rebalance_experts(
@@ -420,7 +420,9 @@ class RedundantExpertManager:
if not exist_fail and all_success:
# prefill需要等待调度屏蔽
if (
self.fd_config.splitwise_role == "decode"
self.fd_config.scheduler_config.splitwise_role == "mixed"
or self.fd_config.scheduler_config.splitwise_role == "decode"
or self.fd_config.scheduler_config.splitwise_role == "prefill"
or not self.eplb_config.redundant_expert_enable_schedule_cordon
):
self.logger.info("redundant_expert: allreduce_load_weight_result success, notify infer.py")