Revert "[Feature] Support Ernie FP8 on sm100 (#5593)" (#6275)

This reverts commit eb80724b71.
This commit is contained in:
JYChen
2026-01-30 11:22:01 +08:00
committed by GitHub
parent 292bab7e6d
commit 6c685c9474
11 changed files with 197 additions and 725 deletions
+4 -5
View File
@@ -299,14 +299,14 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
const bool use_in_ep,
const int token_nums_this_rank_padded);
std::vector<paddle::Tensor> PerTokenQuant(paddle::Tensor& input,
const int block_size);
std::vector<paddle::Tensor> PerTokenQuantPadding(paddle::Tensor& input,
const int block_size,
const bool use_ue8m0);
const int block_size);
std::vector<paddle::Tensor> MaskedPerTokenQuant(
paddle::Tensor& input,
paddle::Tensor& recv_expert_count,
const int block_size,
const bool use_ue8m0);
const int block_size);
std::vector<paddle::Tensor> EPMoeExpertCombine(
const paddle::Tensor& ffn_out,
@@ -1272,7 +1272,6 @@ PYBIND11_MODULE(fastdeploy_ops, m) {
py::arg("input"),
py::arg("recv_expert_count"),
py::arg("block_size"),
py::arg("use_ue8m0") = false,
"per token per block quant");
#ifdef ENABLE_MACHETE