mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-24 01:29:57 +08:00
[Intel HPU] enable MoE EP for hpu (#5855)
* enable HPU MoE EP * MoE intermediate_scale stack * enable loader_v1 esp for tensor_wise_fp8 TP or EP * modify activation_scale name
This commit is contained in:
@@ -468,7 +468,10 @@ def multi_switch_config_context(*changes):
|
||||
|
||||
|
||||
def rename_offline_ckpt_suffix_to_fd_suffix(
|
||||
fd_config, ckpt_weight_suffix: str = "quant_weight", ckpt_scale_suffix="weight_scale"
|
||||
fd_config,
|
||||
ckpt_weight_suffix: str = "quant_weight",
|
||||
ckpt_scale_suffix="weight_scale",
|
||||
ckpt_act_suffix="activation_scale",
|
||||
):
|
||||
"""
|
||||
Create a function to rename checkpoint key suffixes for FastDeploy.
|
||||
@@ -489,6 +492,10 @@ def rename_offline_ckpt_suffix_to_fd_suffix(
|
||||
ckpt_weight_suffix: "weight",
|
||||
ckpt_scale_suffix: "weight_scale_inv",
|
||||
}
|
||||
tensor_wise_fp8_suffix_map = {
|
||||
ckpt_weight_suffix: "weight",
|
||||
ckpt_act_suffix: "in_scale",
|
||||
}
|
||||
moe_quant_type = ""
|
||||
dense_quant_type = ""
|
||||
if fd_config.quant_config is not None:
|
||||
@@ -505,6 +512,8 @@ def rename_offline_ckpt_suffix_to_fd_suffix(
|
||||
# Can be extended to other offline quantization suffixes if needed.
|
||||
if (is_moe and moe_quant_type == "block_wise_fp8") or (not is_moe and dense_quant_type == "block_wise_fp8"):
|
||||
fd_suffix_map = fp8_suffix_map
|
||||
if (is_moe and moe_quant_type == "tensor_wise_fp8") or (not is_moe and dense_quant_type == "tensor_wise_fp8"):
|
||||
fd_suffix_map = tensor_wise_fp8_suffix_map
|
||||
for ckpt_suffix, fd_suffix in fd_suffix_map.items():
|
||||
if re.search(rf"{ckpt_suffix}$", loaded_weight_name):
|
||||
loaded_weight_name = loaded_weight_name.replace(ckpt_suffix, fd_suffix)
|
||||
|
||||
Reference in New Issue
Block a user