mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Others] add PADDLE_ENFORCE (#5288)
This commit is contained in:
@@ -964,7 +964,7 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
|
||||
} else {
|
||||
token_rows = input_dims[0];
|
||||
}
|
||||
const int num_rows = token_rows;
|
||||
|
||||
const int hidden_size = input.dims()[input_dims.size() - 1];
|
||||
const int num_experts_per_rank = num_experts_per_rank_tensor.dims()[0];
|
||||
|
||||
@@ -988,9 +988,9 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
|
||||
auto dst_weights = GetEmptyTensor(
|
||||
{token_nums_feed_to_ffn}, paddle::DataType::FLOAT32, place);
|
||||
auto dst_indices = GetEmptyTensor(
|
||||
{num_rows, num_experts_per_rank}, paddle::DataType::INT32, place);
|
||||
{token_rows, num_experts_per_rank}, paddle::DataType::INT32, place);
|
||||
auto permute_indices_per_token = paddle::full(
|
||||
{num_experts_per_rank, num_rows}, -1, paddle::DataType::INT32, place);
|
||||
{num_experts_per_rank, token_rows}, -1, paddle::DataType::INT32, place);
|
||||
auto cumsum_idx_gpu =
|
||||
paddle::full({num_experts_per_rank}, 0, paddle::DataType::INT32, place);
|
||||
|
||||
@@ -1001,7 +1001,7 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
|
||||
num_experts_per_rank_tensor,
|
||||
num_experts_per_rank_padded_tensor,
|
||||
moe_topk,
|
||||
num_rows,
|
||||
token_rows,
|
||||
-1,
|
||||
-1,
|
||||
hidden_size,
|
||||
|
||||
Reference in New Issue
Block a user