[Intel HPU] enable MoE EP for hpu (#5855)

* enable HPU MoE EP * MoE intermediate_scale stack * enable loader_v1 esp for tensor_wise_fp8 TP or EP * modify activation_scale name
2026-04-24 01:29:57 +08:00 · 2026-01-15 13:08:00 +08:00
parent 7c56041272
commit fbcccaa750
9 changed files with 177 additions and 11 deletions
@@ -468,7 +468,10 @@ def multi_switch_config_context(*changes):


 def rename_offline_ckpt_suffix_to_fd_suffix(
-    fd_config, ckpt_weight_suffix: str = "quant_weight", ckpt_scale_suffix="weight_scale"
+    fd_config,
+    ckpt_weight_suffix: str = "quant_weight",
+    ckpt_scale_suffix="weight_scale",
+    ckpt_act_suffix="activation_scale",
 ):
    """
    Create a function to rename checkpoint key suffixes for FastDeploy.
@@ -489,6 +492,10 @@ def rename_offline_ckpt_suffix_to_fd_suffix(
        ckpt_weight_suffix: "weight",
        ckpt_scale_suffix: "weight_scale_inv",
    }
+    tensor_wise_fp8_suffix_map = {
+        ckpt_weight_suffix: "weight",
+        ckpt_act_suffix: "in_scale",
+    }
    moe_quant_type = ""
    dense_quant_type = ""
    if fd_config.quant_config is not None:
@@ -505,6 +512,8 @@ def rename_offline_ckpt_suffix_to_fd_suffix(
        # Can be extended to other offline quantization suffixes if needed.
        if (is_moe and moe_quant_type == "block_wise_fp8") or (not is_moe and dense_quant_type == "block_wise_fp8"):
            fd_suffix_map = fp8_suffix_map
+        if (is_moe and moe_quant_type == "tensor_wise_fp8") or (not is_moe and dense_quant_type == "tensor_wise_fp8"):
+            fd_suffix_map = tensor_wise_fp8_suffix_map
        for ckpt_suffix, fd_suffix in fd_suffix_map.items():
            if re.search(rf"{ckpt_suffix}$", loaded_weight_name):
                loaded_weight_name = loaded_weight_name.replace(ckpt_suffix, fd_suffix)