[Hackthon_4th 242] Support en_ppstructure_mobile_v2.0_SLANet (#1816)

* first draft * update api name * fix bug * fix bug and * fix bug in c api * fix bug in c_api --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2026-04-22 16:07:51 +08:00 · 2023-04-27 10:45:14 +08:00
parent ef576ce875
commit 2c5fd91a7f
35 changed files with 2505 additions and 39 deletions
@@ -14,6 +14,11 @@ add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
 # 添加FastDeploy库依赖
 target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})

+# PPStructure-V2-Table
+add_executable(infer_ppstructurev2_table ${PROJECT_SOURCE_DIR}/infer_ppstructurev2_table.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(infer_ppstructurev2_table ${FASTDEPLOY_LIBS})
+
 # Only Det
 add_executable(infer_det ${PROJECT_SOURCE_DIR}/infer_det.cc)
 # 添加FastDeploy库依赖
@@ -28,3 +33,8 @@ target_link_libraries(infer_cls ${FASTDEPLOY_LIBS})
 add_executable(infer_rec ${PROJECT_SOURCE_DIR}/infer_rec.cc)
 # 添加FastDeploy库依赖
 target_link_libraries(infer_rec ${FASTDEPLOY_LIBS})
+
+# Only Table
+add_executable(infer_structurev2_table ${PROJECT_SOURCE_DIR}/infer_structurev2_table.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(infer_structurev2_table ${FASTDEPLOY_LIBS})
@@ -43,10 +43,15 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
 # 下载PP-OCRv3文字识别模型
 wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
 tar -xvf ch_PP-OCRv3_rec_infer.tar
+# 下载PPStructureV2表格识别模型
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar
+tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar

 # 下载预测图片与字典文件
 wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppstructure/docs/table/table.jpg
 wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/dict/table_structure_dict_ch.txt

 # 运行部署示例
 # 在CPU上使用Paddle Inference推理
@@ -77,6 +82,9 @@ wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_

 # 在CPU上,单独使用文字识别模型部署
 ./infer_rec ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 0
+
+# 在CPU上,单独使用表格识别模型部署
+./infer_structurev2_table ./ch_ppstructure_mobile_v2.0_SLANet_infer ./table_structure_dict_ch.txt ./table.jpg 0
 ```

 运行完成可视化结果如下图所示
@@ -0,0 +1,177 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void InitAndInfer(const std::string &det_model_dir,
+                  const std::string &rec_model_dir,
+                  const std::string &table_model_dir,
+                  const std::string &rec_label_file,
+                  const std::string &table_char_dict_path,
+                  const std::string &image_file,
+                  const fastdeploy::RuntimeOption &option) {
+  auto det_model_file = det_model_dir + sep + "inference.pdmodel";
+  auto det_params_file = det_model_dir + sep + "inference.pdiparams";
+
+  auto rec_model_file = rec_model_dir + sep + "inference.pdmodel";
+  auto rec_params_file = rec_model_dir + sep + "inference.pdiparams";
+
+  auto table_model_file = table_model_dir + sep + "inference.pdmodel";
+  auto table_params_file = table_model_dir + sep + "inference.pdiparams";
+
+  auto det_option = option;
+  auto rec_option = option;
+  auto table_option = option;
+
+  // The rec model can inference a batch of images now.
+  // User could initialize the inference batch size and set them after create
+  // PP-OCR model.
+  int rec_batch_size = 1;
+
+  // If use TRT backend, the dynamic shape will be set as follow.
+  // We recommend that users set the length and height of the detection model to
+  // a multiple of 32.
+  // We also recommend that users set the Trt input shape as follow.
+  det_option.SetTrtInputShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
+                              {1, 3, 960, 960});
+  rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {rec_batch_size, 3, 48, 320},
+                              {rec_batch_size, 3, 48, 2304});
+  table_option.SetTrtInputShape("x", {1, 3, 488, 488}, {1, 3, 488, 488},
+                                {1, 3, 488, 488});
+
+  // Users could save TRT cache file to disk as follow.
+  det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt");
+  rec_option.SetTrtCacheFile(rec_model_dir + sep + "rec_trt_cache.trt");
+  table_option.SetTrtCacheFile(table_model_dir + sep + "table_trt_cache.trt");
+
+  auto det_model = fastdeploy::vision::ocr::DBDetector(
+      det_model_file, det_params_file, det_option);
+  auto rec_model = fastdeploy::vision::ocr::Recognizer(
+      rec_model_file, rec_params_file, rec_label_file, rec_option);
+  auto table_model = fastdeploy::vision::ocr::StructureV2Table(
+      table_model_file, table_params_file, table_char_dict_path, table_option);
+
+  assert(det_model.Initialized());
+  assert(rec_model.Initialized());
+  assert(table_model.Initialized());
+
+  // Parameters settings for pre and post processing of Det/Cls/Rec Models.
+  // All parameters are set to default values.
+  det_model.GetPreprocessor().SetMaxSideLen(960);
+  det_model.GetPostprocessor().SetDetDBThresh(0.3);
+  det_model.GetPostprocessor().SetDetDBBoxThresh(0.6);
+  det_model.GetPostprocessor().SetDetDBUnclipRatio(1.5);
+  det_model.GetPostprocessor().SetDetDBScoreMode("slow");
+  det_model.GetPostprocessor().SetUseDilation(0);
+
+  rec_model.GetPreprocessor().SetStaticShapeInfer(true);
+  rec_model.GetPreprocessor().SetRecImageShape({3, 48, 320});
+
+  // The classification model is optional, so the PP-OCR can also be connected
+  // in series as follows
+  auto ppstructurev2_table = fastdeploy::pipeline::PPStructureV2Table(
+      &det_model, &rec_model, &table_model);
+
+  // Set inference batch size for cls model and rec model, the value could be -1
+  // and 1 to positive infinity.
+  // When inference batch size is set to -1, it means that the inference batch
+  // size of the rec models will be the same as the number of boxes detected
+  // by the det model.
+  ppstructurev2_table.SetRecBatchSize(rec_batch_size);
+
+  if (!ppstructurev2_table.Initialized()) {
+    std::cerr << "Failed to initialize PP-OCR-Table." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+  auto im_bak = im.clone();
+
+  fastdeploy::vision::OCRResult result;
+  if (!ppstructurev2_table.Predict(&im, &result)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << result.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisOcr(im_bak, result);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char *argv[]) {
+  if (argc < 8) {
+    std::cout << "Usage: infer_ppstructurev2_table path/to/det_model "
+                 "path/to/rec_model "
+                 "path/to/table_model path/to/rec_label_file "
+                 "path/to/table_char_dict_path path/to/image "
+                 "run_option, "
+                 "e.g ./infer_ppstructurev2_table ./ch_PP-OCRv3_det_infer "
+                 "./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer "
+                 "./ppocr_keys_v1.txt ./12.jpg 0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, e.g. 0: run with paddle "
+                 "inference on cpu;"
+              << std::endl;
+    return -1;
+  }
+
+  fastdeploy::RuntimeOption option;
+  int flag = std::atoi(argv[7]);
+  std::cout << "flag: " << flag << std::endl;
+
+  if (flag == 0) {
+    option.UseCpu();
+    option.UsePaddleBackend();  // Paddle Inference
+  } else if (flag == 1) {
+    option.UseCpu();
+    option.UseOpenVINOBackend();  // OpenVINO
+  } else if (flag == 2) {
+    option.UseCpu();
+    option.UseOrtBackend();  // ONNX Runtime
+  } else if (flag == 3) {
+    option.UseCpu();
+    option.UseLiteBackend();  // Paddle Lite
+  } else if (flag == 4) {
+    option.UseGpu();
+    option.UsePaddleBackend();  // Paddle Inference
+  } else if (flag == 5) {
+    option.UseGpu();
+    option.UsePaddleInferBackend();
+    option.paddle_infer_option.collect_trt_shape = true;
+    option.paddle_infer_option.enable_trt = true;  // Paddle-TensorRT
+  } else if (flag == 6) {
+    option.UseGpu();
+    option.UseOrtBackend();  // ONNX Runtime
+  } else if (flag == 7) {
+    option.UseGpu();
+    option.UseTrtBackend();  // TensorRT
+  }
+
+  std::string det_model_dir = argv[1];
+  std::string rec_model_dir = argv[2];
+  std::string table_model_dir = argv[3];
+  std::string rec_label_file = argv[4];
+  std::string table_char_dict_path = argv[5];
+  std::string test_image = argv[6];
+  InitAndInfer(det_model_dir, rec_model_dir, table_model_dir, rec_label_file,
+               table_char_dict_path, test_image, option);
+  return 0;
+}
@@ -0,0 +1,74 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void InitAndInfer(const std::string &table_model_dir,
+                  const std::string &image_file,
+                  const std::string &table_char_dict_path,
+                  const fastdeploy::RuntimeOption &option) {
+  auto table_model_file = table_model_dir + sep + "inference.pdmodel";
+  auto table_params_file = table_model_dir + sep + "inference.pdiparams";
+  auto table_option = option;
+
+  auto table_model = fastdeploy::vision::ocr::StructureV2Table(
+      table_model_file, table_params_file, table_char_dict_path, table_option);
+  assert(table_model.Initialized());
+
+  auto im = cv::imread(image_file);
+  auto im_bak = im.clone();
+
+  fastdeploy::vision::OCRResult result;
+  if (!table_model.Predict(im, &result)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << result.Str() << std::endl;
+}
+
+int main(int argc, char *argv[]) {
+  if (argc < 5) {
+    std::cout << "Usage: infer_demo path/to/table_model path/to/image  "
+                 "path/to/table_dict_path"
+                 "run_option, "
+                 "e.g ./infer_structurev2_table ch_ppocr_mobile_v2.0_cls_infer "
+                 "table.jpg table_structure_dict.txt 0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with gpu;."
+              << std::endl;
+    return -1;
+  }
+
+  fastdeploy::RuntimeOption option;
+  int flag = std::atoi(argv[4]);
+
+  if (flag == 0) {
+    option.UseCpu();
+  } else if (flag == 1) {
+    option.UseGpu();
+  }
+
+  std::string table_model_dir = argv[1];
+  std::string test_image = argv[2];
+  std::string table_char_dict_path = argv[3];
+  InitAndInfer(table_model_dir, test_image, table_char_dict_path, option);
+  return 0;
+}
@@ -36,10 +36,15 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
 # 下载PP-OCRv3文字识别模型
 wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
 tar -xvf ch_PP-OCRv3_rec_infer.tar
+# 下载PPStructureV2表格识别模型
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar
+tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar

 # 下载预测图片与字典文件
 wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppstructure/docs/table/table.jpg
 wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/dict/table_structure_dict_ch.txt

 # 运行部署示例
 # 在CPU上使用Paddle Inference推理
@@ -71,6 +76,8 @@ python infer_cls.py --cls_model ch_ppocr_mobile_v2.0_cls_infer --image 12.jpg --
 # 在CPU上,单独使用文字识别模型部署
 python infer_rec.py  --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device cpu

+# 在CPU上,单独使用文字识别模型部署
+python infer_structurev2_table.py  --table_model ./ch_ppstructure_mobile_v2.0_SLANet_infer --table_char_dict_path ./table_structure_dict_ch.txt --image table.jpg --device cpu
 ```

 运行完成可视化结果如下图所示
@@ -0,0 +1,175 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--det_model", required=True, help="Path of Detection model of PPOCR.")
+    parser.add_argument(
+        "--rec_model",
+        required=True,
+        help="Path of Recognization model of PPOCR.")
+    parser.add_argument(
+        "--table_model",
+        required=True,
+        help="Path of Table recognition model of PPOCR.")
+    parser.add_argument(
+        "--rec_label_file",
+        required=True,
+        help="Path of Recognization model of PPOCR.")
+    parser.add_argument(
+        "--table_char_dict_path",
+        type=str,
+        required=True,
+        help="tabel recognition dict path.")
+    parser.add_argument(
+        "--rec_bs",
+        type=int,
+        default=6,
+        help="Recognition model inference batch size")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu' or 'gpu'.")
+    parser.add_argument(
+        "--device_id",
+        type=int,
+        default=0,
+        help="Define which GPU card used to run model.")
+    parser.add_argument(
+        "--backend",
+        type=str,
+        default="default",
+        help="Type of inference backend, support ort/trt/paddle/openvino, default 'openvino' for cpu, 'tensorrt' for gpu"
+    )
+
+    return parser.parse_args()
+
+
+def build_option(args):
+    det_option = fd.RuntimeOption()
+    rec_option = fd.RuntimeOption()
+    table_option = fd.RuntimeOption()
+
+    if args.device.lower() == "gpu":
+        det_option.use_gpu(args.device_id)
+        rec_option.use_gpu(args.device_id)
+        table_option.use_gpu(args.device_id)
+
+    if args.backend.lower() == "trt":
+        assert args.device.lower(
+        ) == "gpu", "TensorRT backend require inference on device GPU."
+        det_option.use_trt_backend()
+        rec_option.use_trt_backend()
+        table_option.use_trt_backend()
+
+        # If use TRT backend, the dynamic shape will be set as follow.
+        # We recommend that users set the length and height of the detection model to a multiple of 32.
+        # We also recommend that users set the Trt input shape as follow.
+        det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640],
+                                       [1, 3, 960, 960])
+
+        rec_option.set_trt_input_shape("x", [1, 3, 48, 10],
+                                       [args.rec_bs, 3, 48, 320],
+                                       [args.rec_bs, 3, 48, 2304])
+
+        table_option.set_trt_input_shape("x", [1, 3, 488, 488])
+
+        # Users could save TRT cache file to disk as follow.
+        det_option.set_trt_cache_file(args.det_model + "/det_trt_cache.trt")
+        rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt")
+        table_option.set_trt_cache_file(args.table_model +
+                                        "/table_trt_cache.trt")
+
+    elif args.backend.lower() == "ort":
+        det_option.use_ort_backend()
+        rec_option.use_ort_backend()
+        table_option.use_ort_backend()
+
+    elif args.backend.lower() == "paddle":
+        det_option.use_paddle_infer_backend()
+        rec_option.use_paddle_infer_backend()
+        table_option.use_paddle_infer_backend()
+
+    elif args.backend.lower() == "openvino":
+        assert args.device.lower(
+        ) == "cpu", "OpenVINO backend require inference on device CPU."
+        det_option.use_openvino_backend()
+        rec_option.use_openvino_backend()
+        table_option.use_openvino_backend()
+
+    return det_option, rec_option, table_option
+
+
+args = parse_arguments()
+
+det_model_file = os.path.join(args.det_model, "inference.pdmodel")
+det_params_file = os.path.join(args.det_model, "inference.pdiparams")
+
+rec_model_file = os.path.join(args.rec_model, "inference.pdmodel")
+rec_params_file = os.path.join(args.rec_model, "inference.pdiparams")
+rec_label_file = args.rec_label_file
+
+table_model_file = os.path.join(args.table_model, "inference.pdmodel")
+table_params_file = os.path.join(args.table_model, "inference.pdiparams")
+table_char_dict_path = args.table_char_dict_path
+
+# Set the runtime option
+det_option, rec_option, table_option = build_option(args)
+
+det_model = fd.vision.ocr.DBDetector(
+    det_model_file, det_params_file, runtime_option=det_option)
+
+rec_model = fd.vision.ocr.Recognizer(
+    rec_model_file, rec_params_file, rec_label_file, runtime_option=rec_option)
+
+table_model = fd.vision.ocr.StructureV2Table(
+    table_model_file,
+    table_params_file,
+    table_char_dict_path,
+    runtime_option=table_option)
+
+det_model.preprocessor.max_side_len = 960
+det_model.postprocessor.det_db_thresh = 0.3
+det_model.postprocessor.det_db_box_thresh = 0.6
+det_model.postprocessor.det_db_unclip_ratio = 1.5
+det_model.postprocessor.det_db_score_mode = "slow"
+det_model.postprocessor.use_dilation = False
+
+ppstructurev2_table = fd.vision.ocr.PPStructureV2Table(
+    det_model=det_model, rec_model=rec_model, table_model=table_model)
+
+ppstructurev2_table.rec_batch_size = args.rec_bs
+
+# Read the input image
+im = cv2.imread(args.image)
+
+# Predict and reutrn the results
+result = ppstructurev2_table.predict(im)
+
+print(result)
+
+# Visuliaze the results.
+vis_im = fd.vision.vis_ppocr(im, result)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
@@ -0,0 +1,77 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--table_model",
+        required=True,
+        help="Path of Table recognition model of PPOCR.")
+    parser.add_argument(
+        "--table_char_dict_path",
+        type=str,
+        required=True,
+        help="tabel recognition dict path.")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu' or 'gpu'.")
+    parser.add_argument(
+        "--device_id",
+        type=int,
+        default=0,
+        help="Define which GPU card used to run model.")
+
+    return parser.parse_args()
+
+
+def build_option(args):
+
+    table_option = fd.RuntimeOption()
+
+    if args.device.lower() == "gpu":
+        table_option.use_gpu(args.device_id)
+
+    return table_option
+
+
+args = parse_arguments()
+
+table_model_file = os.path.join(args.table_model, "inference.pdmodel")
+table_params_file = os.path.join(args.table_model, "inference.pdiparams")
+
+# Set the runtime option
+table_option = build_option(args)
+
+# Create the table_model
+table_model = fd.vision.ocr.StructureV2Table(
+    table_model_file, table_params_file, args.table_char_dict_path,
+    table_option)
+
+# Read the image
+im = cv2.imread(args.image)
+
+# Predict and return the results
+result = table_model.predict(im)
+
+print(result)