[BugFix]Dev fix custom ar unstable result (#4437)

This commit is contained in:
chen
2025-10-17 11:47:16 +08:00
committed by GitHub
parent 6160145f82
commit b134e6afe6
17 changed files with 25 additions and 24 deletions
@@ -194,7 +194,7 @@ class DeepSeekV3MoE(nn.Layer):
moe_out = moe_out + shared_experts_out
# We do to TP all reduce after the sum of experts.
if self.tp_size > 1:
tensor_model_parallel_all_reduce(moe_out)
moe_out = tensor_model_parallel_all_reduce(moe_out)
return moe_out
@@ -300,7 +300,7 @@ class Ernie4_5_VLMoE(nn.Layer):
if self.num_shared_experts > 0:
hidden_states += shared_experts_out
if self.tp_size > 1:
tensor_model_parallel_all_reduce(hidden_states)
hidden_states = tensor_model_parallel_all_reduce(hidden_states)
return hidden_states
+1 -1
View File
@@ -167,7 +167,7 @@ class Glm4Moe(nn.Layer):
out = out + shared_experts_out
# We do to TP all reduce after the sum of experts.
if self.tensor_parallel_size > 1:
tensor_model_parallel_all_reduce(out, self.tp_group)
out = tensor_model_parallel_all_reduce(out, self.tp_group)
return out