mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix] Fix tbo nan (#6439)
This commit is contained in:
@@ -348,7 +348,6 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
|
||||
ffn_out,
|
||||
m_indices,
|
||||
)
|
||||
del permute_input
|
||||
|
||||
# swiglu
|
||||
ffn_out = paddle.incubate.nn.functional.swiglu(ffn_out, None)
|
||||
@@ -367,7 +366,6 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
|
||||
)
|
||||
ffn_in_x_scale_tensor = ffn_in_x_scale_tensor.T[: ffn_in_x.shape[0]]
|
||||
|
||||
del ffn_out
|
||||
ffn_out = paddle.empty(
|
||||
(token_all_num, getattr(layer, self.added_weight_attrs[1]).shape[1]),
|
||||
dtype=paddle.bfloat16,
|
||||
@@ -379,7 +377,6 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
|
||||
ffn_out,
|
||||
m_indices,
|
||||
)
|
||||
del ffn_in_x
|
||||
|
||||
# prmt back per rank
|
||||
tmp_ffn_out = fastdeploy.model_executor.ops.gpu.ep_moe_expert_combine(
|
||||
@@ -391,7 +388,6 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
|
||||
False, # norm_topk_prob
|
||||
1.0,
|
||||
)
|
||||
del ffn_out
|
||||
else:
|
||||
tmp_ffn_out = paddle.empty([0, hidden_size], paddle.bfloat16)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user