[CVCUDA] PP-OCR detector preprocessor integrate CV-CUDA (#1382)

* move manager initialized_ flag to ppcls * update dbdetector preprocess api * declare processor op * ppocr detector preprocessor support cvcuda * move cvcuda op to class member * ppcls use manager register api * refactor det preprocessor init api * add set preprocessor api * add create processor macro * new processor call api * ppcls preprocessor init resize on cpu * ppocr detector preprocessor set normalize api * revert ppcls pybind * remove dbdetector set preprocessor * refine dbdetector preprocessor includes * remove mean std in py constructor * add comments * update comment * Update __init__.py
2026-04-23 00:17:25 +08:00 · 2023-02-22 19:39:11 +08:00
parent 2f8d9c9a57
commit 91a1c72f98
24 changed files with 448 additions and 330 deletions
@@ -12,80 +12,106 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include <pybind11/stl.h>
+
 #include "fastdeploy/pybind/main.h"

 namespace fastdeploy {
 void BindPPOCRModel(pybind11::module& m) {
  m.def("sort_boxes", [](std::vector<std::array<int, 8>>& boxes) {
-       vision::ocr::SortBoxes(&boxes);
-       return boxes;
+    vision::ocr::SortBoxes(&boxes);
+    return boxes;
  });
-  
+
  // DBDetector
-  pybind11::class_<vision::ocr::DBDetectorPreprocessor>(m, "DBDetectorPreprocessor")
+  pybind11::class_<vision::ocr::DBDetectorPreprocessor>(
+      m, "DBDetectorPreprocessor")
      .def(pybind11::init<>())
-      .def_property("max_side_len", &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen, &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
-      .def_property("mean", &vision::ocr::DBDetectorPreprocessor::GetMean, &vision::ocr::DBDetectorPreprocessor::SetMean)
-      .def_property("scale", &vision::ocr::DBDetectorPreprocessor::GetScale, &vision::ocr::DBDetectorPreprocessor::SetScale)
-      .def_property("is_scale", &vision::ocr::DBDetectorPreprocessor::GetIsScale, &vision::ocr::DBDetectorPreprocessor::SetIsScale)
-      .def("run", [](vision::ocr::DBDetectorPreprocessor& self, std::vector<pybind11::array>& im_list) {
+      .def_property("max_side_len",
+                    &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen,
+                    &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
+      .def("set_normalize",
+           [](vision::ocr::DBDetectorPreprocessor& self,
+              const std::vector<float>& mean, const std::vector<float>& std,
+              bool is_scale) { self.SetNormalize(mean, std, is_scale); })
+      .def("run", [](vision::ocr::DBDetectorPreprocessor& self,
+                     std::vector<pybind11::array>& im_list) {
        std::vector<vision::FDMat> images;
        for (size_t i = 0; i < im_list.size(); ++i) {
          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
        }
        std::vector<FDTensor> outputs;
-        std::vector<std::array<int, 4>> batch_det_img_info;
-        self.Run(&images, &outputs, &batch_det_img_info);
-        for(size_t i = 0; i< outputs.size(); ++i){
+        self.Run(&images, &outputs);
+        auto batch_det_img_info = self.GetBatchImgInfo();
+        for (size_t i = 0; i < outputs.size(); ++i) {
          outputs[i].StopSharing();
        }
-        return std::make_pair(outputs, batch_det_img_info);
+        return std::make_pair(outputs, *batch_det_img_info);
      });

-  pybind11::class_<vision::ocr::DBDetectorPostprocessor>(m, "DBDetectorPostprocessor")
+  pybind11::class_<vision::ocr::DBDetectorPostprocessor>(
+      m, "DBDetectorPostprocessor")
      .def(pybind11::init<>())
-      .def_property("det_db_thresh", &vision::ocr::DBDetectorPostprocessor::GetDetDBThresh, &vision::ocr::DBDetectorPostprocessor::SetDetDBThresh) 
-      .def_property("det_db_box_thresh", &vision::ocr::DBDetectorPostprocessor::GetDetDBBoxThresh, &vision::ocr::DBDetectorPostprocessor::SetDetDBBoxThresh) 
-      .def_property("det_db_unclip_ratio", &vision::ocr::DBDetectorPostprocessor::GetDetDBUnclipRatio, &vision::ocr::DBDetectorPostprocessor::SetDetDBUnclipRatio) 
-      .def_property("det_db_score_mode", &vision::ocr::DBDetectorPostprocessor::GetDetDBScoreMode, &vision::ocr::DBDetectorPostprocessor::SetDetDBScoreMode) 
-      .def_property("use_dilation", &vision::ocr::DBDetectorPostprocessor::GetUseDilation, &vision::ocr::DBDetectorPostprocessor::SetUseDilation) 
+      .def_property("det_db_thresh",
+                    &vision::ocr::DBDetectorPostprocessor::GetDetDBThresh,
+                    &vision::ocr::DBDetectorPostprocessor::SetDetDBThresh)
+      .def_property("det_db_box_thresh",
+                    &vision::ocr::DBDetectorPostprocessor::GetDetDBBoxThresh,
+                    &vision::ocr::DBDetectorPostprocessor::SetDetDBBoxThresh)
+      .def_property("det_db_unclip_ratio",
+                    &vision::ocr::DBDetectorPostprocessor::GetDetDBUnclipRatio,
+                    &vision::ocr::DBDetectorPostprocessor::SetDetDBUnclipRatio)
+      .def_property("det_db_score_mode",
+                    &vision::ocr::DBDetectorPostprocessor::GetDetDBScoreMode,
+                    &vision::ocr::DBDetectorPostprocessor::SetDetDBScoreMode)
+      .def_property("use_dilation",
+                    &vision::ocr::DBDetectorPostprocessor::GetUseDilation,
+                    &vision::ocr::DBDetectorPostprocessor::SetUseDilation)

-      .def("run", [](vision::ocr::DBDetectorPostprocessor& self,
-                     std::vector<FDTensor>& inputs,
-                     const std::vector<std::array<int, 4>>& batch_det_img_info) {
-        std::vector<std::vector<std::array<int, 8>>> results;
+      .def("run",
+           [](vision::ocr::DBDetectorPostprocessor& self,
+              std::vector<FDTensor>& inputs,
+              const std::vector<std::array<int, 4>>& batch_det_img_info) {
+             std::vector<std::vector<std::array<int, 8>>> results;

-        if (!self.Run(inputs, &results, batch_det_img_info)) {
-          throw std::runtime_error("Failed to preprocess the input data in DBDetectorPostprocessor.");
-        }
-        return results;
-      })
-      .def("run", [](vision::ocr::DBDetectorPostprocessor& self,
-                     std::vector<pybind11::array>& input_array,
-                     const std::vector<std::array<int, 4>>& batch_det_img_info) {
-        std::vector<std::vector<std::array<int, 8>>> results;
-        std::vector<FDTensor> inputs;
-        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
-        if (!self.Run(inputs, &results, batch_det_img_info)) {
-          throw std::runtime_error("Failed to preprocess the input data in DBDetectorPostprocessor.");
-        }
-        return results;
-      });
+             if (!self.Run(inputs, &results, batch_det_img_info)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "DBDetectorPostprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::ocr::DBDetectorPostprocessor& self,
+              std::vector<pybind11::array>& input_array,
+              const std::vector<std::array<int, 4>>& batch_det_img_info) {
+             std::vector<std::vector<std::array<int, 8>>> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, batch_det_img_info)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "DBDetectorPostprocessor.");
+             }
+             return results;
+           });

  pybind11::class_<vision::ocr::DBDetector, FastDeployModel>(m, "DBDetector")
      .def(pybind11::init<std::string, std::string, RuntimeOption,
                          ModelFormat>())
      .def(pybind11::init<>())
-      .def_property_readonly("preprocessor", &vision::ocr::DBDetector::GetPreprocessor)
-      .def_property_readonly("postprocessor", &vision::ocr::DBDetector::GetPostprocessor)
-      .def("predict", [](vision::ocr::DBDetector& self,
-                         pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        std::vector<std::array<int, 8>> boxes_result;
-        self.Predict(mat, &boxes_result);
-        return boxes_result;
-      })
-      .def("batch_predict", [](vision::ocr::DBDetector& self, std::vector<pybind11::array>& data) {
+      .def_property_readonly("preprocessor",
+                             &vision::ocr::DBDetector::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::ocr::DBDetector::GetPostprocessor)
+      .def("predict",
+           [](vision::ocr::DBDetector& self, pybind11::array& data) {
+             auto mat = PyArrayToCvMat(data);
+             std::vector<std::array<int, 8>> boxes_result;
+             self.Predict(mat, &boxes_result);
+             return boxes_result;
+           })
+      .def("batch_predict", [](vision::ocr::DBDetector& self,
+                               std::vector<pybind11::array>& data) {
        std::vector<cv::Mat> images;
        std::vector<std::vector<std::array<int, 8>>> det_results;
        for (size_t i = 0; i < data.size(); ++i) {
@@ -96,39 +122,54 @@ void BindPPOCRModel(pybind11::module& m) {
      });

  // Classifier
-  pybind11::class_<vision::ocr::ClassifierPreprocessor>(m, "ClassifierPreprocessor")
+  pybind11::class_<vision::ocr::ClassifierPreprocessor>(
+      m, "ClassifierPreprocessor")
      .def(pybind11::init<>())
-      .def_property("cls_image_shape", &vision::ocr::ClassifierPreprocessor::GetClsImageShape, &vision::ocr::ClassifierPreprocessor::SetClsImageShape)
-      .def_property("mean", &vision::ocr::ClassifierPreprocessor::GetMean, &vision::ocr::ClassifierPreprocessor::SetMean)
-      .def_property("scale", &vision::ocr::ClassifierPreprocessor::GetScale, &vision::ocr::ClassifierPreprocessor::SetScale)
-      .def_property("is_scale", &vision::ocr::ClassifierPreprocessor::GetIsScale, &vision::ocr::ClassifierPreprocessor::SetIsScale)
-      .def("run", [](vision::ocr::ClassifierPreprocessor& self, std::vector<pybind11::array>& im_list) {
+      .def_property("cls_image_shape",
+                    &vision::ocr::ClassifierPreprocessor::GetClsImageShape,
+                    &vision::ocr::ClassifierPreprocessor::SetClsImageShape)
+      .def_property("mean", &vision::ocr::ClassifierPreprocessor::GetMean,
+                    &vision::ocr::ClassifierPreprocessor::SetMean)
+      .def_property("scale", &vision::ocr::ClassifierPreprocessor::GetScale,
+                    &vision::ocr::ClassifierPreprocessor::SetScale)
+      .def_property("is_scale",
+                    &vision::ocr::ClassifierPreprocessor::GetIsScale,
+                    &vision::ocr::ClassifierPreprocessor::SetIsScale)
+      .def("run", [](vision::ocr::ClassifierPreprocessor& self,
+                     std::vector<pybind11::array>& im_list) {
        std::vector<vision::FDMat> images;
        for (size_t i = 0; i < im_list.size(); ++i) {
          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
        }
        std::vector<FDTensor> outputs;
        if (!self.Run(&images, &outputs)) {
-          throw std::runtime_error("Failed to preprocess the input data in ClassifierPreprocessor.");
+          throw std::runtime_error(
+              "Failed to preprocess the input data in ClassifierPreprocessor.");
        }
-        for(size_t i = 0; i< outputs.size(); ++i){
+        for (size_t i = 0; i < outputs.size(); ++i) {
          outputs[i].StopSharing();
        }
        return outputs;
      });

-  pybind11::class_<vision::ocr::ClassifierPostprocessor>(m, "ClassifierPostprocessor")
+  pybind11::class_<vision::ocr::ClassifierPostprocessor>(
+      m, "ClassifierPostprocessor")
      .def(pybind11::init<>())
-      .def_property("cls_thresh", &vision::ocr::ClassifierPostprocessor::GetClsThresh, &vision::ocr::ClassifierPostprocessor::SetClsThresh) 
-      .def("run", [](vision::ocr::ClassifierPostprocessor& self,
-                     std::vector<FDTensor>& inputs) {
-        std::vector<int> cls_labels;
-        std::vector<float> cls_scores;
-        if (!self.Run(inputs, &cls_labels, &cls_scores)) {
-          throw std::runtime_error("Failed to preprocess the input data in ClassifierPostprocessor.");
-        }
-        return std::make_pair(cls_labels,cls_scores);
-      })
+      .def_property("cls_thresh",
+                    &vision::ocr::ClassifierPostprocessor::GetClsThresh,
+                    &vision::ocr::ClassifierPostprocessor::SetClsThresh)
+      .def("run",
+           [](vision::ocr::ClassifierPostprocessor& self,
+              std::vector<FDTensor>& inputs) {
+             std::vector<int> cls_labels;
+             std::vector<float> cls_scores;
+             if (!self.Run(inputs, &cls_labels, &cls_scores)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "ClassifierPostprocessor.");
+             }
+             return std::make_pair(cls_labels, cls_scores);
+           })
      .def("run", [](vision::ocr::ClassifierPostprocessor& self,
                     std::vector<pybind11::array>& input_array) {
        std::vector<FDTensor> inputs;
@@ -136,26 +177,31 @@ void BindPPOCRModel(pybind11::module& m) {
        std::vector<int> cls_labels;
        std::vector<float> cls_scores;
        if (!self.Run(inputs, &cls_labels, &cls_scores)) {
-          throw std::runtime_error("Failed to preprocess the input data in ClassifierPostprocessor.");
+          throw std::runtime_error(
+              "Failed to preprocess the input data in "
+              "ClassifierPostprocessor.");
        }
-        return std::make_pair(cls_labels,cls_scores);
+        return std::make_pair(cls_labels, cls_scores);
      });
-  
+
  pybind11::class_<vision::ocr::Classifier, FastDeployModel>(m, "Classifier")
      .def(pybind11::init<std::string, std::string, RuntimeOption,
                          ModelFormat>())
      .def(pybind11::init<>())
-      .def_property_readonly("preprocessor", &vision::ocr::Classifier::GetPreprocessor)
-      .def_property_readonly("postprocessor", &vision::ocr::Classifier::GetPostprocessor)
-      .def("predict", [](vision::ocr::Classifier& self,
-                         pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        int32_t cls_label;
-        float cls_score;
-        self.Predict(mat, &cls_label, &cls_score);
-        return std::make_pair(cls_label, cls_score);
-      })
-      .def("batch_predict", [](vision::ocr::Classifier& self, std::vector<pybind11::array>& data) {
+      .def_property_readonly("preprocessor",
+                             &vision::ocr::Classifier::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::ocr::Classifier::GetPostprocessor)
+      .def("predict",
+           [](vision::ocr::Classifier& self, pybind11::array& data) {
+             auto mat = PyArrayToCvMat(data);
+             int32_t cls_label;
+             float cls_score;
+             self.Predict(mat, &cls_label, &cls_score);
+             return std::make_pair(cls_label, cls_score);
+           })
+      .def("batch_predict", [](vision::ocr::Classifier& self,
+                               std::vector<pybind11::array>& data) {
        std::vector<cv::Mat> images;
        std::vector<int32_t> cls_labels;
        std::vector<float> cls_scores;
@@ -167,39 +213,54 @@ void BindPPOCRModel(pybind11::module& m) {
      });

  // Recognizer
-  pybind11::class_<vision::ocr::RecognizerPreprocessor>(m, "RecognizerPreprocessor")
-    .def(pybind11::init<>())
-    .def_property("static_shape_infer", &vision::ocr::RecognizerPreprocessor::GetStaticShapeInfer, &vision::ocr::RecognizerPreprocessor::SetStaticShapeInfer) 
-    .def_property("rec_image_shape", &vision::ocr::RecognizerPreprocessor::GetRecImageShape, &vision::ocr::RecognizerPreprocessor::SetRecImageShape)
-    .def_property("mean", &vision::ocr::RecognizerPreprocessor::GetMean, &vision::ocr::RecognizerPreprocessor::SetMean)
-    .def_property("scale", &vision::ocr::RecognizerPreprocessor::GetScale, &vision::ocr::RecognizerPreprocessor::SetScale)
-    .def_property("is_scale", &vision::ocr::RecognizerPreprocessor::GetIsScale, &vision::ocr::RecognizerPreprocessor::SetIsScale)
-    .def("run", [](vision::ocr::RecognizerPreprocessor& self, std::vector<pybind11::array>& im_list) {
-      std::vector<vision::FDMat> images;
-      for (size_t i = 0; i < im_list.size(); ++i) {
-        images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
-      }
-      std::vector<FDTensor> outputs;
-      if (!self.Run(&images, &outputs)) {
-        throw std::runtime_error("Failed to preprocess the input data in RecognizerPreprocessor.");
-      }
-      for(size_t i = 0; i< outputs.size(); ++i){
-        outputs[i].StopSharing();
-      }
-      return outputs;
-    });
-
-  pybind11::class_<vision::ocr::RecognizerPostprocessor>(m, "RecognizerPostprocessor")
-      .def(pybind11::init<std::string>())
-      .def("run", [](vision::ocr::RecognizerPostprocessor& self,
-                     std::vector<FDTensor>& inputs) {
-        std::vector<std::string> texts;
-        std::vector<float> rec_scores;
-        if (!self.Run(inputs, &texts, &rec_scores)) {
-          throw std::runtime_error("Failed to preprocess the input data in RecognizerPostprocessor.");
+  pybind11::class_<vision::ocr::RecognizerPreprocessor>(
+      m, "RecognizerPreprocessor")
+      .def(pybind11::init<>())
+      .def_property("static_shape_infer",
+                    &vision::ocr::RecognizerPreprocessor::GetStaticShapeInfer,
+                    &vision::ocr::RecognizerPreprocessor::SetStaticShapeInfer)
+      .def_property("rec_image_shape",
+                    &vision::ocr::RecognizerPreprocessor::GetRecImageShape,
+                    &vision::ocr::RecognizerPreprocessor::SetRecImageShape)
+      .def_property("mean", &vision::ocr::RecognizerPreprocessor::GetMean,
+                    &vision::ocr::RecognizerPreprocessor::SetMean)
+      .def_property("scale", &vision::ocr::RecognizerPreprocessor::GetScale,
+                    &vision::ocr::RecognizerPreprocessor::SetScale)
+      .def_property("is_scale",
+                    &vision::ocr::RecognizerPreprocessor::GetIsScale,
+                    &vision::ocr::RecognizerPreprocessor::SetIsScale)
+      .def("run", [](vision::ocr::RecognizerPreprocessor& self,
+                     std::vector<pybind11::array>& im_list) {
+        std::vector<vision::FDMat> images;
+        for (size_t i = 0; i < im_list.size(); ++i) {
+          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
        }
-        return std::make_pair(texts, rec_scores);
-      })
+        std::vector<FDTensor> outputs;
+        if (!self.Run(&images, &outputs)) {
+          throw std::runtime_error(
+              "Failed to preprocess the input data in RecognizerPreprocessor.");
+        }
+        for (size_t i = 0; i < outputs.size(); ++i) {
+          outputs[i].StopSharing();
+        }
+        return outputs;
+      });
+
+  pybind11::class_<vision::ocr::RecognizerPostprocessor>(
+      m, "RecognizerPostprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run",
+           [](vision::ocr::RecognizerPostprocessor& self,
+              std::vector<FDTensor>& inputs) {
+             std::vector<std::string> texts;
+             std::vector<float> rec_scores;
+             if (!self.Run(inputs, &texts, &rec_scores)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "RecognizerPostprocessor.");
+             }
+             return std::make_pair(texts, rec_scores);
+           })
      .def("run", [](vision::ocr::RecognizerPostprocessor& self,
                     std::vector<pybind11::array>& input_array) {
        std::vector<FDTensor> inputs;
@@ -207,7 +268,9 @@ void BindPPOCRModel(pybind11::module& m) {
        std::vector<std::string> texts;
        std::vector<float> rec_scores;
        if (!self.Run(inputs, &texts, &rec_scores)) {
-          throw std::runtime_error("Failed to preprocess the input data in RecognizerPostprocessor.");
+          throw std::runtime_error(
+              "Failed to preprocess the input data in "
+              "RecognizerPostprocessor.");
        }
        return std::make_pair(texts, rec_scores);
      });
@@ -216,17 +279,20 @@ void BindPPOCRModel(pybind11::module& m) {
      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
                          ModelFormat>())
      .def(pybind11::init<>())
-      .def_property_readonly("preprocessor", &vision::ocr::Recognizer::GetPreprocessor)
-      .def_property_readonly("postprocessor", &vision::ocr::Recognizer::GetPostprocessor)
-      .def("predict", [](vision::ocr::Recognizer& self,
-                         pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        std::string text;
-        float rec_score;
-        self.Predict(mat, &text, &rec_score);
-        return std::make_pair(text, rec_score);
-      })
-      .def("batch_predict", [](vision::ocr::Recognizer& self, std::vector<pybind11::array>& data) {
+      .def_property_readonly("preprocessor",
+                             &vision::ocr::Recognizer::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::ocr::Recognizer::GetPostprocessor)
+      .def("predict",
+           [](vision::ocr::Recognizer& self, pybind11::array& data) {
+             auto mat = PyArrayToCvMat(data);
+             std::string text;
+             float rec_score;
+             self.Predict(mat, &text, &rec_score);
+             return std::make_pair(text, rec_score);
+           })
+      .def("batch_predict", [](vision::ocr::Recognizer& self,
+                               std::vector<pybind11::array>& data) {
        std::vector<cv::Mat> images;
        std::vector<std::string> texts;
        std::vector<float> rec_scores;