mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Intel HPU] enable MoE EP for hpu (#5855)
* enable HPU MoE EP * MoE intermediate_scale stack * enable loader_v1 esp for tensor_wise_fp8 TP or EP * modify activation_scale name
This commit is contained in:
@@ -565,6 +565,12 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
|
||||
("attn.cache_v_scale", "cachev_matmul.activation_scale", None, None),
|
||||
("attn.cache_k_zp", "cachek_matmul.activation_zero_point", None, None),
|
||||
("attn.cache_v_zp", "cachev_matmul.activation_zero_point", None, None),
|
||||
("act_scale", "in_scale", None, None),
|
||||
("attn.q_scale", "q_matmul.in_scale", None, None),
|
||||
("attn.s_scale", "s_matmul.in_scale", None, None),
|
||||
("attn.cache_k_scale", "cachek_matmul.in_scale", None, None),
|
||||
("attn.cache_v_scale", "cachev_matmul.in_scale", None, None),
|
||||
("up_gate_proj_in_scale", "up_gate_proj.in_scale", None, None),
|
||||
]
|
||||
|
||||
expert_params_mapping = []
|
||||
@@ -590,7 +596,10 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
|
||||
(param, weight, exp, shard, False) for param, weight, exp, shard in general_params_mapping
|
||||
] + [(param, weight, exp, shard, True) for param, weight, exp, shard in expert_params_mapping]
|
||||
checkpoint_to_fd_key_fn = rename_offline_ckpt_suffix_to_fd_suffix(
|
||||
fd_config=self.fd_config, ckpt_weight_suffix="quant_weight", ckpt_scale_suffix="weight_scale"
|
||||
fd_config=self.fd_config,
|
||||
ckpt_weight_suffix="quant_weight",
|
||||
ckpt_scale_suffix="weight_scale",
|
||||
ckpt_act_suffix="activation_scale",
|
||||
)
|
||||
params_dict = dict(self.named_parameters())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user