[Model] tp+ep support v1_loader (#5465)

* [Model] tp+ep support v1_loader

* fix

* fix mtp_linear

* fix mtp_linear

* fix

* fix

* fix v0 loader

* fix

* Add get_tensor for ep

* fix linear weight_loader

* fix typo

* fix
This commit is contained in:
Longzhi Wang
2025-12-18 14:31:54 +08:00
committed by GitHub
parent c89a62e550
commit d8587e987e
8 changed files with 48 additions and 20 deletions
+4 -2
View File
@@ -273,10 +273,13 @@ class FusedMoE(nn.Layer):
if not param._is_initialized():
param.initialize()
weight_need_transpose = getattr(param, "weight_need_transpose", False)
if self.ep_size > 1 or weight_need_transpose:
loaded_weight = get_tensor(loaded_weight)
if shard_id is None:
# 1.gate up fused in disk
if weight_need_transpose:
loaded_weight = get_tensor(loaded_weight)
loaded_weight = loaded_weight.transpose([1, 0])
output_size = param[expert_id - self.expert_id_offset].shape[SHARD_ID_TO_SHARDED_DIM["gate"]]
shard_offsets = [
@@ -292,7 +295,6 @@ class FusedMoE(nn.Layer):
self.weight_loader(param, loaded_weight_shard, expert_id, shard_id, "fused")
else:
if weight_need_transpose and source != "fused":
loaded_weight = get_tensor(loaded_weight)
loaded_weight = loaded_weight.transpose([1, 0])
# 2.gate up splited in disk
assert shard_id in ["gate", "down", "up"]