Revert "[Feature] Support Ernie FP8 on sm100 (#5593)" (#6275)

This reverts commit eb80724b71.
2026-04-23 00:17:25 +08:00 · 2026-01-30 11:22:01 +08:00
parent 292bab7e6d
commit 6c685c9474
11 changed files with 197 additions and 725 deletions
@@ -299,14 +299,14 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
    const bool use_in_ep,
    const int token_nums_this_rank_padded);

+std::vector<paddle::Tensor> PerTokenQuant(paddle::Tensor& input,
+                                          const int block_size);
 std::vector<paddle::Tensor> PerTokenQuantPadding(paddle::Tensor& input,
-                                                 const int block_size,
-                                                 const bool use_ue8m0);
+                                                 const int block_size);
 std::vector<paddle::Tensor> MaskedPerTokenQuant(
    paddle::Tensor& input,
    paddle::Tensor& recv_expert_count,
-    const int block_size,
-    const bool use_ue8m0);
+    const int block_size);

 std::vector<paddle::Tensor> EPMoeExpertCombine(
    const paddle::Tensor& ffn_out,
@@ -1272,7 +1272,6 @@ PYBIND11_MODULE(fastdeploy_ops, m) {
        py::arg("input"),
        py::arg("recv_expert_count"),
        py::arg("block_size"),
-        py::arg("use_ue8m0") = false,
        "per token per block quant");

 #ifdef ENABLE_MACHETE