mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Fix] Fix eplb bug and support fp8 load weight (#5178)
* fix eplb part2 * fix eplb part2 * fix eplb part2
This commit is contained in:
@@ -21,7 +21,7 @@ class RedundantExpertManager:
|
||||
RedundantExpertManger
|
||||
"""
|
||||
|
||||
def __init__(self, rank=0, ep_size=32, fd_config=None):
|
||||
def __init__(self, rank=0, ep_size=64, fd_config=None):
|
||||
self.logger = get_logger("eplb_expert_manager", "eplb_{0}.log".format(rank))
|
||||
|
||||
self.rank = rank
|
||||
@@ -101,7 +101,7 @@ class RedundantExpertManager:
|
||||
self.http_timeout = 1
|
||||
# 重置重排状态: 'done' -> 'free'
|
||||
self.rearrange_end_ts = 0
|
||||
self.rearrange_reset_interval = 300
|
||||
self.rearrange_reset_interval = 30
|
||||
|
||||
self.tensor_infos = None
|
||||
|
||||
@@ -250,8 +250,8 @@ class RedundantExpertManager:
|
||||
eplb_strategy = self.eplb_config.redundant_expert_eplb_strategy
|
||||
if is_init:
|
||||
num_groups = 1
|
||||
num_nodes = 2
|
||||
num_gpus = 2 * 8
|
||||
num_nodes = 8
|
||||
num_gpus = 8 * 8
|
||||
eplb_strategy = ""
|
||||
# eplb
|
||||
rank_expert_list, logical_to_physical_map, expert_count = rebalance_experts(
|
||||
@@ -420,7 +420,9 @@ class RedundantExpertManager:
|
||||
if not exist_fail and all_success:
|
||||
# prefill需要等待调度屏蔽
|
||||
if (
|
||||
self.fd_config.splitwise_role == "decode"
|
||||
self.fd_config.scheduler_config.splitwise_role == "mixed"
|
||||
or self.fd_config.scheduler_config.splitwise_role == "decode"
|
||||
or self.fd_config.scheduler_config.splitwise_role == "prefill"
|
||||
or not self.eplb_config.redundant_expert_enable_schedule_cordon
|
||||
):
|
||||
self.logger.info("redundant_expert: allreduce_load_weight_result success, notify infer.py")
|
||||
|
||||
Reference in New Issue
Block a user