mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
[Model] tp+ep support v1_loader (#5465)
* [Model] tp+ep support v1_loader * fix * fix mtp_linear * fix mtp_linear * fix * fix * fix v0 loader * fix * Add get_tensor for ep * fix linear weight_loader * fix typo * fix
This commit is contained in:
@@ -273,10 +273,13 @@ class FusedMoE(nn.Layer):
|
||||
if not param._is_initialized():
|
||||
param.initialize()
|
||||
weight_need_transpose = getattr(param, "weight_need_transpose", False)
|
||||
|
||||
if self.ep_size > 1 or weight_need_transpose:
|
||||
loaded_weight = get_tensor(loaded_weight)
|
||||
|
||||
if shard_id is None:
|
||||
# 1.gate up fused in disk
|
||||
if weight_need_transpose:
|
||||
loaded_weight = get_tensor(loaded_weight)
|
||||
loaded_weight = loaded_weight.transpose([1, 0])
|
||||
output_size = param[expert_id - self.expert_id_offset].shape[SHARD_ID_TO_SHARDED_DIM["gate"]]
|
||||
shard_offsets = [
|
||||
@@ -292,7 +295,6 @@ class FusedMoE(nn.Layer):
|
||||
self.weight_loader(param, loaded_weight_shard, expert_id, shard_id, "fused")
|
||||
else:
|
||||
if weight_need_transpose and source != "fused":
|
||||
loaded_weight = get_tensor(loaded_weight)
|
||||
loaded_weight = loaded_weight.transpose([1, 0])
|
||||
# 2.gate up splited in disk
|
||||
assert shard_id in ["gate", "down", "up"]
|
||||
|
||||
Reference in New Issue
Block a user