mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Iluvatar GPU] Modify the names of some variables (#3273)
This commit is contained in:
@@ -79,25 +79,27 @@ def group_gemm(
|
||||
def iluvatar_moe_expert_ffn(
|
||||
permute_input: paddle.Tensor,
|
||||
tokens_expert_prefix_sum: paddle.Tensor,
|
||||
ffn1_weight: paddle.Tensor,
|
||||
ffn2_weight: paddle.Tensor,
|
||||
ffn1_bias: Optional[paddle.Tensor],
|
||||
ffn1_scale: Optional[paddle.Tensor],
|
||||
ffn2_scale: Optional[paddle.Tensor],
|
||||
ffn2_in_scale: Optional[paddle.Tensor],
|
||||
up_gate_proj_weight: paddle.Tensor,
|
||||
down_proj_weight: paddle.Tensor,
|
||||
up_gate_proj_bias: Optional[paddle.Tensor],
|
||||
up_gate_proj_scale: Optional[paddle.Tensor],
|
||||
down_proj_scale: Optional[paddle.Tensor],
|
||||
down_proj_in_scale: Optional[paddle.Tensor],
|
||||
expert_idx_per_token: Optional[paddle.Tensor],
|
||||
quant_method: str,
|
||||
used_in_ep_low_latency: bool,
|
||||
):
|
||||
assert ffn1_bias is None
|
||||
assert ffn1_scale is not None
|
||||
assert ffn2_scale is not None
|
||||
assert ffn2_in_scale is None
|
||||
assert up_gate_proj_bias is None
|
||||
assert up_gate_proj_scale is not None
|
||||
assert down_proj_scale is not None
|
||||
assert down_proj_in_scale is None
|
||||
assert expert_idx_per_token is None
|
||||
assert quant_method in ("weight_only_int8")
|
||||
assert not used_in_ep_low_latency
|
||||
tokens_expert_prefix_sum_cpu = tokens_expert_prefix_sum.to("cpu")
|
||||
ffn1_output = w8a16_group_gemm(permute_input, ffn1_weight, ffn1_scale, tokens_expert_prefix_sum_cpu, -1)
|
||||
ffn1_output = w8a16_group_gemm(
|
||||
permute_input, up_gate_proj_weight, up_gate_proj_scale, tokens_expert_prefix_sum_cpu, -1
|
||||
)
|
||||
act_out = swiglu(ffn1_output)
|
||||
output = w8a16_group_gemm(act_out, ffn2_weight, ffn2_scale, tokens_expert_prefix_sum_cpu, -1)
|
||||
output = w8a16_group_gemm(act_out, down_proj_weight, down_proj_scale, tokens_expert_prefix_sum_cpu, -1)
|
||||
return output
|
||||
|
||||
Reference in New Issue
Block a user