mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix]Dev fix custom ar unstable result (#4437)
This commit is contained in:
@@ -194,7 +194,7 @@ class DeepSeekV3MoE(nn.Layer):
|
||||
moe_out = moe_out + shared_experts_out
|
||||
# We do to TP all reduce after the sum of experts.
|
||||
if self.tp_size > 1:
|
||||
tensor_model_parallel_all_reduce(moe_out)
|
||||
moe_out = tensor_model_parallel_all_reduce(moe_out)
|
||||
return moe_out
|
||||
|
||||
|
||||
|
||||
@@ -300,7 +300,7 @@ class Ernie4_5_VLMoE(nn.Layer):
|
||||
if self.num_shared_experts > 0:
|
||||
hidden_states += shared_experts_out
|
||||
if self.tp_size > 1:
|
||||
tensor_model_parallel_all_reduce(hidden_states)
|
||||
hidden_states = tensor_model_parallel_all_reduce(hidden_states)
|
||||
return hidden_states
|
||||
|
||||
|
||||
|
||||
@@ -167,7 +167,7 @@ class Glm4Moe(nn.Layer):
|
||||
out = out + shared_experts_out
|
||||
# We do to TP all reduce after the sum of experts.
|
||||
if self.tensor_parallel_size > 1:
|
||||
tensor_model_parallel_all_reduce(out, self.tp_group)
|
||||
out = tensor_model_parallel_all_reduce(out, self.tp_group)
|
||||
return out
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user