[Intel HPU] enable MoE EP for hpu (#5855)

* enable HPU MoE EP

* MoE intermediate_scale stack

* enable loader_v1 esp for tensor_wise_fp8 TP or EP

* modify activation_scale name
This commit is contained in:
Cheng Yanfei
2026-01-15 13:08:00 +08:00
committed by GitHub
parent 7c56041272
commit fbcccaa750
9 changed files with 177 additions and 11 deletions
@@ -565,6 +565,12 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
("attn.cache_v_scale", "cachev_matmul.activation_scale", None, None),
("attn.cache_k_zp", "cachek_matmul.activation_zero_point", None, None),
("attn.cache_v_zp", "cachev_matmul.activation_zero_point", None, None),
("act_scale", "in_scale", None, None),
("attn.q_scale", "q_matmul.in_scale", None, None),
("attn.s_scale", "s_matmul.in_scale", None, None),
("attn.cache_k_scale", "cachek_matmul.in_scale", None, None),
("attn.cache_v_scale", "cachev_matmul.in_scale", None, None),
("up_gate_proj_in_scale", "up_gate_proj.in_scale", None, None),
]
expert_params_mapping = []
@@ -590,7 +596,10 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
(param, weight, exp, shard, False) for param, weight, exp, shard in general_params_mapping
] + [(param, weight, exp, shard, True) for param, weight, exp, shard in expert_params_mapping]
checkpoint_to_fd_key_fn = rename_offline_ckpt_suffix_to_fd_suffix(
fd_config=self.fd_config, ckpt_weight_suffix="quant_weight", ckpt_scale_suffix="weight_scale"
fd_config=self.fd_config,
ckpt_weight_suffix="quant_weight",
ckpt_scale_suffix="weight_scale",
ckpt_act_suffix="activation_scale",
)
params_dict = dict(self.named_parameters())