[TSP] last_norm allgather move to model.py (#5924)

* support_lastnorm_gather_split_dev

* support_lastnorm_gather_split_dev1

* support_lastnorm_gather_split_dev3

* support_lastnorm_gather_split_dev4

* support_lastnorm_gather_split_dev5
This commit is contained in:
xiaoluomi
2026-01-08 15:36:33 +08:00
committed by GitHub
parent 8e11d719f3
commit 2bb838fed9
9 changed files with 30 additions and 8 deletions
@@ -592,6 +592,9 @@ class DeepSeekV3Model(nn.Layer):
)
out = self.norm(hidden_states, residual, forward_meta=forward_meta)[0]
if self.norm.is_last_norm and self.norm.fd_config.parallel_config.use_sequence_parallel_moe:
out = self.norm.allgather(out, forward_meta.ids_remove_padding.shape[0])
return out