mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Intel HPU] enable MoE EP for hpu (#5855)
* enable HPU MoE EP * MoE intermediate_scale stack * enable loader_v1 esp for tensor_wise_fp8 TP or EP * modify activation_scale name
This commit is contained in:
@@ -278,6 +278,8 @@ def load_ep_checkpoint(cls: PretrainedModel, model_path: str, fd_config: FDConfi
|
||||
down_proj_scale_key = f"ernie.{prefix_layer_name}.{i}.mlp.experts.{j}.down_proj.weight_scale"
|
||||
|
||||
down_proj_in_scale_key = f"ernie.{prefix_layer_name}.{i}.mlp.experts.{j}.down_proj.activation_scale"
|
||||
# single up_gate_proj.activation_scale for all mlp.experts
|
||||
up_gate_proj_in_scale_key = f"ernie.layers.{i}.mlp.experts.up_gate_proj.activation_scale"
|
||||
num_local_ffn_keys.append(up_gate_proj_key)
|
||||
num_local_ffn_keys.append(down_proj_key)
|
||||
num_local_ffn_keys.append(up_gate_proj_quant_key)
|
||||
@@ -285,6 +287,7 @@ def load_ep_checkpoint(cls: PretrainedModel, model_path: str, fd_config: FDConfi
|
||||
num_local_ffn_keys.append(up_gate_proj_scale_key)
|
||||
num_local_ffn_keys.append(down_proj_scale_key)
|
||||
num_local_ffn_keys.append(down_proj_in_scale_key)
|
||||
num_local_ffn_keys.append(up_gate_proj_in_scale_key)
|
||||
|
||||
# for EP w4a8, we need all expert's activation_scale for up_gate_proj
|
||||
num_experts = fd_config.model_config.moe_num_experts
|
||||
|
||||
Reference in New Issue
Block a user