mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Feature] support w4afp8 v1_loader and v0_loader(tp>1) (#5757)
* support * fix * support w4afp8 v1_loader and v0_loader * fix * fix test * fix test * fix test * fix moe.py * add test_ernie_4_5_w4afp8 * add test * delete tensor * fix test * fix * add * fix test
This commit is contained in:
@@ -110,7 +110,11 @@ class Ernie4_5_MoE(nn.Layer):
|
||||
if hasattr(fd_config.quant_config, "moe_quant_type"):
|
||||
moe_quant_type = fd_config.quant_config.moe_quant_type
|
||||
|
||||
if moe_quant_type == "w4a8" or moe_quant_type == "w4afp8":
|
||||
if moe_quant_type == "w4a8" or (
|
||||
moe_quant_type == "w4afp8"
|
||||
and fd_config.model_config.is_quantized
|
||||
and not fd_config.quant_config.moe_dynamic_quant
|
||||
):
|
||||
weight_key_map = {
|
||||
"gate_weight_key": f"{prefix}.gate.weight",
|
||||
"gate_correction_bias_key": f"{prefix}.moe_statics.e_score_correction_bias",
|
||||
@@ -121,6 +125,19 @@ class Ernie4_5_MoE(nn.Layer):
|
||||
"up_gate_proj_expert_in_scale_key": f"{prefix}.experts.{{}}.up_gate_proj.activation_scale",
|
||||
"down_proj_expert_in_scale_key": f"{prefix}.experts.{{}}.down_proj.activation_scale",
|
||||
}
|
||||
elif (
|
||||
moe_quant_type == "w4afp8"
|
||||
and fd_config.model_config.is_quantized
|
||||
and fd_config.quant_config.moe_dynamic_quant
|
||||
):
|
||||
weight_key_map = {
|
||||
"gate_weight_key": f"{prefix}.gate.weight",
|
||||
"gate_correction_bias_key": f"{prefix}.moe_statics.e_score_correction_bias",
|
||||
"up_gate_proj_expert_weight_key": f"{prefix}.experts.{{}}.up_gate_proj.quant_weight",
|
||||
"down_proj_expert_weight_key": f"{prefix}.experts.{{}}.down_proj.quant_weight",
|
||||
"up_gate_proj_expert_weight_scale_key": f"{prefix}.experts.{{}}.up_gate_proj.weight_scale",
|
||||
"down_proj_expert_weight_scale_key": f"{prefix}.experts.{{}}.down_proj.weight_scale",
|
||||
}
|
||||
elif moe_quant_type == "w4w2":
|
||||
weight_key_map = {
|
||||
"gate_weight_key": f"{prefix}.gate.weight",
|
||||
@@ -223,6 +240,7 @@ class Ernie4_5_MoE(nn.Layer):
|
||||
gate=self.gate,
|
||||
forward_meta=forward_meta,
|
||||
)
|
||||
|
||||
if self.num_shared_experts > 0:
|
||||
s_x = self.shared_experts(hidden_states)
|
||||
out = out + s_x
|
||||
|
||||
Reference in New Issue
Block a user