mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
This reverts commit eb80724b71.
This commit is contained in:
@@ -299,14 +299,14 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
|
||||
const bool use_in_ep,
|
||||
const int token_nums_this_rank_padded);
|
||||
|
||||
std::vector<paddle::Tensor> PerTokenQuant(paddle::Tensor& input,
|
||||
const int block_size);
|
||||
std::vector<paddle::Tensor> PerTokenQuantPadding(paddle::Tensor& input,
|
||||
const int block_size,
|
||||
const bool use_ue8m0);
|
||||
const int block_size);
|
||||
std::vector<paddle::Tensor> MaskedPerTokenQuant(
|
||||
paddle::Tensor& input,
|
||||
paddle::Tensor& recv_expert_count,
|
||||
const int block_size,
|
||||
const bool use_ue8m0);
|
||||
const int block_size);
|
||||
|
||||
std::vector<paddle::Tensor> EPMoeExpertCombine(
|
||||
const paddle::Tensor& ffn_out,
|
||||
@@ -1272,7 +1272,6 @@ PYBIND11_MODULE(fastdeploy_ops, m) {
|
||||
py::arg("input"),
|
||||
py::arg("recv_expert_count"),
|
||||
py::arg("block_size"),
|
||||
py::arg("use_ue8m0") = false,
|
||||
"per token per block quant");
|
||||
|
||||
#ifdef ENABLE_MACHETE
|
||||
|
||||
Reference in New Issue
Block a user