[Feature] Support redundant expert for eplb (#5918)

* [BugFix] support redundant expert for eplb

* support redundant expert for eplb

* support redundant expert for eplb

* update

* fix ci eplb
This commit is contained in:
xiaoxiaohehe001
2026-01-09 17:13:24 +08:00
committed by GitHub
parent e6cdea4492
commit 00a01ae024
11 changed files with 36 additions and 17 deletions
@@ -277,7 +277,7 @@ class XPUEPRunner:
moe_topk=self.top_k,
apply_norm_weight=True, # apply_norm_weight
enable_softmax_top_k_fused=False,
redundant_ep_rank_num_plus_one=layer.fd_config.model_config.redundant_experts_num + 1,
redundant_ep_rank_num_plus_one=layer.fd_config.eplb_config.redundant_experts_num + 1,
)
else:
topk_idx, topk_weights = fastdeploy.model_executor.ops.xpu.moe_topk_select(
+2 -2
View File
@@ -472,7 +472,7 @@ class EPRunner:
expert_id_to_ep_rank_array=expert_id_to_ep_rank_array,
expert_in_rank_num_list=expert_in_rank_num_list,
tokens_per_expert_stats_list=tokens_per_expert_stats_list,
redundant_ep_rank_num_plus_one=layer.fd_config.model_config.redundant_experts_num + 1,
redundant_ep_rank_num_plus_one=layer.fd_config.eplb_config.redundant_experts_num + 1,
)
else:
topk_idx, topk_weights = fastdeploy.model_executor.ops.gpu.moe_redundant_topk_select(
@@ -484,7 +484,7 @@ class EPRunner:
moe_topk=self.top_k,
apply_norm_weight=True,
enable_softmax_top_k_fused=False,
redundant_ep_rank_num_plus_one=layer.fd_config.model_config.redundant_experts_num + 1,
redundant_ep_rank_num_plus_one=layer.fd_config.eplb_config.redundant_experts_num + 1,
)
else:
if layer.topk_method == "noaux_tc":
@@ -84,7 +84,7 @@ class MoEMethodBase(QuantMethodBase):
"num_max_dispatch_tokens_per_rank": layer.fd_config.model_config.num_max_dispatch_tokens_per_rank,
"ep_size": layer.ep_size,
"ep_rank": layer.ep_rank,
"redundant_experts_num": layer.fd_config.model_config.redundant_experts_num,
"redundant_experts_num": layer.fd_config.eplb_config.redundant_experts_num,
"ep_group": layer.fd_config.parallel_config.ep_group,
}
+9 -7
View File
@@ -467,13 +467,18 @@ class FusedMoE(nn.Layer):
"""
logical_expert_ids = [
i
% (
self.fd_config.model_config.moe_num_experts[0]
if isinstance(self.fd_config.model_config.moe_num_experts, list)
else self.fd_config.model_config.moe_num_experts
)
for i in range(
self.expert_id_offset,
self.expert_id_offset + self.num_local_experts,
)
]
ep_rank_to_expert_id_list = [i for i in range(self.num_experts)]
if self.redundant_table_manger is not None and is_rearrange is True:
if self.redundant_table_manger is not None:
(
ep_rank_to_expert_id_list,
expert_id_to_ep_rank_array,
@@ -487,10 +492,7 @@ class FusedMoE(nn.Layer):
down_proj_weights = []
if isinstance(state_dict, list):
state_dict = dict(state_dict)
is_ffn_merged = (
up_gate_proj_expert_weight_key.format(logical_expert_ids[0] if is_rearrange else self.expert_id_offset)
in state_dict
)
is_ffn_merged = up_gate_proj_expert_weight_key.format(logical_expert_ids[0]) in state_dict
if is_ffn_merged:
for expert_idx in logical_expert_ids:
down_proj_expert_weight_key_name = down_proj_expert_weight_key.format(expert_idx)
@@ -498,7 +500,7 @@ class FusedMoE(nn.Layer):
up_gate_proj_weights.append(
get_tensor(
(
state_dict.pop(up_gate_proj_expert_weight_key_name)
state_dict[up_gate_proj_expert_weight_key_name]
if up_gate_proj_expert_weight_key_name in state_dict
else up_gate_proj_expert_weight_key_name
),
@@ -508,7 +510,7 @@ class FusedMoE(nn.Layer):
down_proj_weights.append(
get_tensor(
(
state_dict.pop(down_proj_expert_weight_key_name)
state_dict[down_proj_expert_weight_key_name]
if down_proj_expert_weight_key_name in state_dict
else down_proj_expert_weight_key_name
),