mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-24 01:29:57 +08:00
[RL] support moe-topk use topk_reduce_func (#7218)
* support moe-topk use topk_reduce_func * fix ep error * fix ut * fix ut
This commit is contained in:
@@ -509,6 +509,7 @@ class EPRunner:
|
||||
expert_in_rank_num_list=expert_in_rank_num_list,
|
||||
tokens_per_expert_stats_list=tokens_per_expert_stats_list,
|
||||
redundant_ep_rank_num_plus_one=layer.fd_config.eplb_config.redundant_experts_num + 1,
|
||||
topk_reduce_func=getattr(layer, "topk_reduce_func", None),
|
||||
)
|
||||
else:
|
||||
topk_idx, topk_weights = fastdeploy.model_executor.ops.gpu.moe_redundant_topk_select(
|
||||
@@ -534,6 +535,7 @@ class EPRunner:
|
||||
layer.routed_scaling_factor,
|
||||
layer.gate_correction_bias,
|
||||
getattr(layer, "renormalize", True),
|
||||
topk_reduce_func=getattr(layer, "topk_reduce_func", None),
|
||||
)
|
||||
else:
|
||||
topk_idx, topk_weights = fastdeploy.model_executor.ops.gpu.moe_topk_select(
|
||||
|
||||
Reference in New Issue
Block a user