[Intel HPU] enable MoE EP for hpu (#5855)

* enable HPU MoE EP * MoE intermediate_scale stack * enable loader_v1 esp for tensor_wise_fp8 TP or EP * modify activation_scale name
2026-04-23 00:17:25 +08:00 · 2026-01-15 13:08:00 +08:00
parent 7c56041272
commit fbcccaa750
9 changed files with 177 additions and 11 deletions
@@ -565,6 +565,12 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
            ("attn.cache_v_scale", "cachev_matmul.activation_scale", None, None),
            ("attn.cache_k_zp", "cachek_matmul.activation_zero_point", None, None),
            ("attn.cache_v_zp", "cachev_matmul.activation_zero_point", None, None),
+            ("act_scale", "in_scale", None, None),
+            ("attn.q_scale", "q_matmul.in_scale", None, None),
+            ("attn.s_scale", "s_matmul.in_scale", None, None),
+            ("attn.cache_k_scale", "cachek_matmul.in_scale", None, None),
+            ("attn.cache_v_scale", "cachev_matmul.in_scale", None, None),
+            ("up_gate_proj_in_scale", "up_gate_proj.in_scale", None, None),
        ]

        expert_params_mapping = []
@@ -590,7 +596,10 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
            (param, weight, exp, shard, False) for param, weight, exp, shard in general_params_mapping
        ] + [(param, weight, exp, shard, True) for param, weight, exp, shard in expert_params_mapping]
        checkpoint_to_fd_key_fn = rename_offline_ckpt_suffix_to_fd_suffix(
-            fd_config=self.fd_config, ckpt_weight_suffix="quant_weight", ckpt_scale_suffix="weight_scale"
+            fd_config=self.fd_config,
+            ckpt_weight_suffix="quant_weight",
+            ckpt_scale_suffix="weight_scale",
+            ckpt_act_suffix="activation_scale",
        )
        params_dict = dict(self.named_parameters())