mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Iiluvatar] fix ci error and update readme (#7453)
This commit is contained in:
+9
@@ -124,12 +124,18 @@ class IluvatarCutlassMoEMethod(UnquantizedFusedMoEMethod):
|
||||
x: paddle.Tensor,
|
||||
gate: nn.Layer,
|
||||
topk_ids_hookfunc: Callable = None,
|
||||
fc1_latent_proj: nn.Layer = None,
|
||||
fc2_latent_proj: nn.Layer = None,
|
||||
) -> paddle.Tensor:
|
||||
"""
|
||||
Paddle Cutlass compute Fused MoE.
|
||||
"""
|
||||
gate_out = gate(x)
|
||||
gate_out = gate_out.cast("float32")
|
||||
|
||||
if fc1_latent_proj is not None:
|
||||
x = fc1_latent_proj(x)
|
||||
|
||||
if layer.topk_method == "noaux_tc":
|
||||
gate_out, topk_weights, topk_idx = get_moe_scores(
|
||||
gate_out,
|
||||
@@ -206,6 +212,9 @@ class IluvatarCutlassMoEMethod(UnquantizedFusedMoEMethod):
|
||||
routed_scaling_factor=1.0,
|
||||
)
|
||||
|
||||
if fc2_latent_proj is not None:
|
||||
fused_moe_out = fc2_latent_proj(fused_moe_out)
|
||||
|
||||
return fused_moe_out
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user