diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2dd5a150a2..7c2c4707d0 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -205,6 +205,7 @@ if(ENABLE_LITE_BACKEND)
endif()
if(ENABLE_PADDLE_BACKEND)
+ set(ENABLE_PADDLE_FRONTEND ON)
add_definitions(-DENABLE_PADDLE_BACKEND)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS})
include(${PROJECT_SOURCE_DIR}/cmake/paddle_inference.cmake)
diff --git a/README_CN.md b/README_CN.md
index 0b74d1f02b..96b7a382fa 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -18,9 +18,9 @@
**⚡️FastDeploy**是一款**易用高效**的推理部署开发套件。覆盖业界🔥**热门CV、NLP、Speech的AI模型**并提供📦**开箱即用**的部署体验,包括图像分类、目标检测、图像分割、人脸检测、人脸识别、人体关键点识别、文字识别、语义理解等多任务,满足开发者**多场景**,**多硬件**、**多平台**的产业部署需求。
-| [Object Detection](examples/vision/detection) | [3D Object Detection](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
+| [Image Classification](examples/vision/classification) | [Object Detection](examples/vision/detection) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
|:---------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
-|
|
|
|
|
+|
|
|
|
|
| [**Image Matting**](examples/vision/matting) | [**Real-Time Matting**](examples/vision/matting) | [**OCR**](examples/vision/ocr) |[**Face Alignment**](examples/vision/facealign)
|
|
|
|
|
| [**Pose Estimation**](examples/vision/keypointdetection) | [**Behavior Recognition**](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [**NLP**](examples/text) |[**Speech**](examples/audio/pp-tts)
@@ -81,8 +81,8 @@
- [4. TensorRT使用中的一些技巧](docs/cn/faq/tensorrt_tricks.md)
- [5. 如何增加新的模型](docs/cn/faq/develop_a_new_model.md)(进行中)
- 更多FastDeploy部署模块
- - [服务化部署](../serving)
- - [Benchmark测试](../benchmark)
+ - [服务化部署](./serving)
+ - [Benchmark测试](./benchmark)
* **🖥️ 服务器端部署**
diff --git a/README_EN.md b/README_EN.md
index 5fdfe0c554..f3b6e8184a 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -20,9 +20,9 @@ English | [简体中文](README_CN.md)
**⚡️FastDeploy** is an **accessible and efficient** deployment Development Toolkit. It covers 🔥**critical CV、NLP、Speech AI models** in the industry and provides 📦**out-of-the-box** deployment experience. It covers image classification, object detection, image segmentation, face detection, face recognition, human keypoint detection, OCR, semantic understanding and other tasks to meet developers' industrial deployment needs for **multi-scenario**, **multi-hardware** and **multi-platform** .
-| [Object Detection](examples/vision/detection) | [3D Object Detection](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
+| [Image Classification](examples/vision/classification) | [Object Detection](examples/vision/detection) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
|:---------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
-|
|
|
|
|
+|
|
|
|
|
| [**Image Matting**](examples/vision/matting) | [**Real-Time Matting**](examples/vision/matting) | [**OCR**](examples/vision/ocr) |[**Face Alignment**](examples/vision/facealign)
|
|
|
|
|
| [**Pose Estimation**](examples/vision/keypointdetection) | [**Behavior Recognition**](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [**NLP**](examples/text) |[**Speech**](examples/audio/pp-tts)
diff --git a/docs/cn/build_and_install/third_libraries.md b/docs/cn/build_and_install/third_libraries.md
new file mode 100644
index 0000000000..90d9108c95
--- /dev/null
+++ b/docs/cn/build_and_install/third_libraries.md
@@ -0,0 +1,14 @@
+# 第三方库依赖
+
+FastDeploy当前根据编译选项,会依赖如下第三方依赖
+
+- OpenCV: 当ENABLE_VISION=ON时,会自动下载预编译OpenCV 3.4.16库
+- ONNX Runimte: 当ENABLE_ORT_BACKEND=ON时,会自动下载ONNX Runtime库
+- OpenVINO: 当ENABLE_OPENVINO_BACKEND=ON时,会自动下载OpenVINO库
+
+用户在实际编译时,可能会根据自身需求集成环境中已有的第三方库,可通出如下开关来配置
+
+
+- OPENCV_DIRECTORY: 指定环境中的OpenCV路径,如 `-DOPENCV_DIRECTORY=/usr/lib/aarch64-linux-gnu/cmake/opencv4/`
+- ORT_DIRECTORY: 指定环境中的ONNX Runtime路径, 如`-DORT_DIRECTORY=/download/onnxruntime-linux-x64-1.0.0`
+- OPENVINO_DIRECTORY: 指定环境中的OpenVINO路径, 如`-DOPENVINO_DIRECTORY=//download/openvino`
diff --git a/examples/text/ernie-3.0/README.md b/examples/text/ernie-3.0/README.md
new file mode 100644
index 0000000000..36d76ee817
--- /dev/null
+++ b/examples/text/ernie-3.0/README.md
@@ -0,0 +1,39 @@
+# ERNIE 3.0 模型部署
+
+## 模型详细说明
+- [PaddleNLP ERNIE 3.0模型说明](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
+
+## 支持的模型列表
+
+| 模型 | 结构 | 语言 |
+| :---: | :--------: | :--------: |
+| `ERNIE 3.0-Base`| 12-layers, 768-hidden, 12-heads | 中文 |
+| `ERNIE 3.0-Medium`| 6-layers, 768-hidden, 12-heads | 中文 |
+| `ERNIE 3.0-Mini`| 6-layers, 384-hidden, 12-heads | 中文 |
+| `ERNIE 3.0-Micro`| 4-layers, 384-hidden, 12-heads | 中文 |
+| `ERNIE 3.0-Nano `| 4-layers, 312-hidden, 12-heads | 中文 |
+
+## 支持的NLP任务列表
+
+| 任务 Task | 是否支持 |
+| :--------------- | ------- |
+| 文本分类 | ✅ |
+| 序列标注 | ❌ |
+
+## 导出部署模型
+
+在部署前,需要先将训练好的ERNIE模型导出成部署模型,导出步骤可参考文档[导出模型](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0).
+
+## 下载微调模型
+
+### 分类任务
+
+为了方便开发者的测试,下面提供了在文本分类[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上微调的ERNIE 3.0-Medium模型,开发者可直接下载体验。
+
+- [ERNIE 3.0 Medium AFQMC](https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz)
+
+## 详细部署文档
+
+- [Python部署](python)
+- [C++部署](cpp)
+- [Serving部署](serving)
diff --git a/examples/text/ernie-3.0/cpp/CMakeLists.txt b/examples/text/ernie-3.0/cpp/CMakeLists.txt
new file mode 100644
index 0000000000..fe15b14ff8
--- /dev/null
+++ b/examples/text/ernie-3.0/cpp/CMakeLists.txt
@@ -0,0 +1,26 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+set(THIRD_LIBS "")
+include(gflags.cmake)
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(seq_cls_infer_demo ${PROJECT_SOURCE_DIR}/seq_cls_infer.cc)
+target_link_libraries(seq_cls_infer_demo ${FASTDEPLOY_LIBS} ${THIRD_LIBS})
diff --git a/examples/text/ernie-3.0/cpp/README.md b/examples/text/ernie-3.0/cpp/README.md
new file mode 100644
index 0000000000..5c2c854edf
--- /dev/null
+++ b/examples/text/ernie-3.0/cpp/README.md
@@ -0,0 +1,70 @@
+# ERNIE 3.0 模型C++部署示例
+
+在部署前,需确认以下两个步骤
+
+- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`seq_cls_infer.cc`快速完成在CPU/GPU的文本分类任务的C++部署示例。
+
+
+## 文本分类任务
+
+### 快速开始
+
+以下示例展示如何基于FastDeploy库完成ERNIE 3.0 Medium模型在CLUE Benchmark的[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上进行文本分类任务的C++预测部署。
+
+```bash
+# 下载SDK,编译模型examples代码(SDK中包含了examples代码)
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-0.4.0.tgz
+tar xvf fastdeploy-linux-x64-gpu-0.4.0.tgz
+
+cd fastdeploy-linux-x64-gpu-0.4.0/examples/text/ernie-3.0/cpp
+mkdir build
+cd build
+# 执行cmake,需要指定FASTDEPLOY_INSTALL_DIR为FastDeploy SDK的目录。
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/../../../../../../fastdeploy-linux-x64-gpu-0.4.0
+make -j
+
+# 下载AFQMC数据集的微调后的ERNIE 3.0模型以及词表
+wget https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz
+tar xvfz ernie-3.0-medium-zh-afqmc.tgz
+
+# CPU 推理
+./seq_cls_infer_demo --device cpu --model_dir ernie-3.0-medium-zh-afqmc
+
+# GPU 推理
+./seq_cls_infer_demo --device gpu --model_dir ernie-3.0-medium-zh-afqmc
+
+```
+
+运行完成后返回的结果如下:
+```bash
+[INFO] /paddle/FastDeploy/examples/text/ernie-3.0/cpp/seq_cls_infer.cc(93)::CreateRuntimeOption model_path = ernie-3.0-medium-zh-afqmc/infer.pdmodel, param_path = ernie-3.0-medium-zh-afqmc/infer.pdiparams
+[INFO] fastdeploy/runtime.cc(469)::Init Runtime initialized with Backend::ORT in Device::CPU.
+Batch id: 0, example id: 0, sentence 1: 花呗收款额度限制, sentence 2: 收钱码,对花呗支付的金额有限制吗, label: 1, confidence: 0.581852
+Batch id: 1, example id: 0, sentence 1: 花呗支持高铁票支付吗, sentence 2: 为什么友付宝不支持花呗付款, label: 0, confidence: 0.997921
+```
+
+
+
+### 参数说明
+
+`seq_cls_infer_demo` 除了以上示例的命令行参数,还支持更多命令行参数的设置。以下为各命令行参数的说明。
+
+| 参数 |参数说明 |
+|----------|--------------|
+|--model_dir | 指定部署模型的目录, |
+|--batch_size |最大可测的 batch size,默认为 1|
+|--max_length |最大序列长度,默认为 128|
+|--device | 运行的设备,可选范围: ['cpu', 'gpu'],默认为'cpu' |
+|--backend | 支持的推理后端,可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'],默认为'onnx_runtime' |
+|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启,默认为False |
+
+## 相关文档
+
+[ERNIE 3.0模型详细介绍](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
+
+[ERNIE 3.0模型导出方法](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
+
+[ERNIE 3.0模型Python部署方法](../python/README.md)
diff --git a/examples/text/ernie-3.0/cpp/gflags.cmake b/examples/text/ernie-3.0/cpp/gflags.cmake
new file mode 100644
index 0000000000..9fede6c5fe
--- /dev/null
+++ b/examples/text/ernie-3.0/cpp/gflags.cmake
@@ -0,0 +1,76 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+INCLUDE(ExternalProject)
+SET(GIT_URL "https://github.com")
+SET(GFLAGS_PREFIX_DIR ${CMAKE_CURRENT_BINARY_DIR}/gflags)
+SET(GFLAGS_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/install/gflags)
+SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
+set(GFLAGS_REPOSITORY ${GIT_URL}/gflags/gflags.git)
+set(GFLAGS_TAG "v2.2.2")
+IF(WIN32)
+ set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
+ELSE(WIN32)
+ set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
+ set(BUILD_COMMAND $(MAKE) --silent)
+ set(INSTALL_COMMAND $(MAKE) install)
+ENDIF(WIN32)
+
+INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
+
+ExternalProject_Add(
+ extern_gflags
+ ${EXTERNAL_PROJECT_LOG_ARGS}
+ ${SHALLOW_CLONE}
+ GIT_REPOSITORY ${GFLAGS_REPOSITORY}
+ GIT_TAG ${GFLAGS_TAG}
+ PREFIX ${GFLAGS_PREFIX_DIR}
+ UPDATE_COMMAND ""
+ BUILD_COMMAND ${BUILD_COMMAND}
+ INSTALL_COMMAND ${INSTALL_COMMAND}
+ CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+ -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+ -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+ -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+ -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+ -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+ -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
+ -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+ -DBUILD_STATIC_LIBS=ON
+ -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+ -DBUILD_TESTING=OFF
+ -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+ ${EXTERNAL_OPTIONAL_ARGS}
+ CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
+ -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+ -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+ BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
+)
+
+ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
+ADD_DEPENDENCIES(gflags extern_gflags)
+LIST(APPEND THIRD_LIBS gflags)
+if (UNIX)
+ LIST(APPEND THIRD_LIBS pthread)
+endif()
+# On Windows (including MinGW), the Shlwapi library is used by gflags if available.
+if (WIN32)
+ include(CheckIncludeFileCXX)
+ check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI)
+ if (HAVE_SHLWAPI)
+ set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
+ endif(HAVE_SHLWAPI)
+endif (WIN32)
diff --git a/examples/text/ernie-3.0/cpp/seq_cls_infer.cc b/examples/text/ernie-3.0/cpp/seq_cls_infer.cc
new file mode 100644
index 0000000000..01ef403a0b
--- /dev/null
+++ b/examples/text/ernie-3.0/cpp/seq_cls_infer.cc
@@ -0,0 +1,269 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include
+#include
+#include
+#include "fastdeploy/function/reduce.h"
+#include "fastdeploy/function/softmax.h"
+#include "fastdeploy/runtime.h"
+#include "fastdeploy/utils/path.h"
+#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h"
+#include "gflags/gflags.h"
+
+using namespace paddlenlp;
+using namespace faster_tokenizer::tokenizers_impl;
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+DEFINE_string(model_dir, "", "Directory of the inference model.");
+DEFINE_string(vocab_path, "", "Path of the vocab file.");
+DEFINE_string(device, "cpu",
+ "Type of inference device, support 'cpu' or 'gpu'.");
+DEFINE_string(backend, "onnx_runtime",
+ "The inference runtime backend, support: ['onnx_runtime', "
+ "'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt']");
+DEFINE_int32(batch_size, 1, "The batch size of data.");
+DEFINE_int32(max_length, 128, "The batch size of data.");
+DEFINE_bool(use_fp16, false, "Wheter to use FP16 mode.");
+
+void PrintUsage() {
+ fastdeploy::FDINFO
+ << "Usage: seq_cls_infer_demo --model_dir dir --device [cpu|gpu] "
+ "--backend "
+ "[onnx_runtime|paddle|openvino|tensorrt|paddle_tensorrt] "
+ "--batch_size size --max_length len --use_fp16 false"
+ << std::endl;
+ fastdeploy::FDINFO << "Default value of device: cpu" << std::endl;
+ fastdeploy::FDINFO << "Default value of backend: onnx_runtime" << std::endl;
+ fastdeploy::FDINFO << "Default value of batch_size: 1" << std::endl;
+ fastdeploy::FDINFO << "Default value of max_length: 128" << std::endl;
+ fastdeploy::FDINFO << "Default value of use_fp16: false" << std::endl;
+}
+
+bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
+ if (FLAGS_device == "gpu") {
+ option->UseGpu();
+ } else if (FLAGS_device == "cpu") {
+ option->UseCpu();
+ } else {
+ fastdeploy::FDERROR << "The avilable device should be one of the list "
+ "['cpu', 'gpu']. But receive '"
+ << FLAGS_device << "'" << std::endl;
+ return false;
+ }
+
+ if (FLAGS_backend == "onnx_runtime") {
+ option->UseOrtBackend();
+ } else if (FLAGS_backend == "paddle") {
+ option->UsePaddleBackend();
+ } else if (FLAGS_backend == "openvino") {
+ option->UseOpenVINOBackend();
+ } else if (FLAGS_backend == "tensorrt" ||
+ FLAGS_backend == "paddle_tensorrt") {
+ option->UseTrtBackend();
+ if (FLAGS_backend == "paddle_tensorrt") {
+ option->EnablePaddleToTrt();
+ option->EnablePaddleTrtCollectShape();
+ }
+ std::string trt_file = FLAGS_model_dir + sep + "infer.trt";
+ option->SetTrtInputShape("input_ids", {1, FLAGS_max_length},
+ {FLAGS_batch_size, FLAGS_max_length},
+ {FLAGS_batch_size, FLAGS_max_length});
+ option->SetTrtInputShape("token_type_ids", {1, FLAGS_max_length},
+ {FLAGS_batch_size, FLAGS_max_length},
+ {FLAGS_batch_size, FLAGS_max_length});
+ if (FLAGS_use_fp16) {
+ option->EnableTrtFP16();
+ trt_file = trt_file + ".fp16";
+ }
+ } else {
+ fastdeploy::FDERROR << "The avilable backend should be one of the list "
+ "['paddle', 'openvino', 'tensorrt', "
+ "'paddle_tensorrt']. But receive '"
+ << FLAGS_backend << "'" << std::endl;
+ return false;
+ }
+ std::string model_path = FLAGS_model_dir + sep + "infer.pdmodel";
+ std::string param_path = FLAGS_model_dir + sep + "infer.pdiparams";
+ fastdeploy::FDINFO << "model_path = " << model_path
+ << ", param_path = " << param_path << std::endl;
+ option->SetModelPath(model_path, param_path);
+ return true;
+}
+
+bool BatchFyTexts(const std::vector& texts, int batch_size,
+ std::vector>* batch_texts) {
+ for (int idx = 0; idx < texts.size(); idx += batch_size) {
+ int rest = texts.size() - idx;
+ int curr_size = std::min(batch_size, rest);
+ std::vector batch_text(curr_size);
+ std::copy_n(texts.begin() + idx, curr_size, batch_text.begin());
+ batch_texts->emplace_back(std::move(batch_text));
+ }
+ return true;
+}
+
+struct SeqClsResult {
+ int label;
+ float confidence;
+};
+
+struct ErnieForSequenceClassificationPredictor {
+ fastdeploy::Runtime runtime_;
+ ErnieFasterTokenizer tokenizer_;
+ ErnieForSequenceClassificationPredictor(
+ const fastdeploy::RuntimeOption& option,
+ const ErnieFasterTokenizer& tokenizer)
+ : tokenizer_(tokenizer) {
+ runtime_.Init(option);
+ }
+
+ bool Preprocess(const std::vector& texts,
+ const std::vector& texts_pair,
+ std::vector* inputs) {
+ std::vector encodings;
+ std::vector text_pair_input;
+ // 1. Tokenize the text or (text, text_pair)
+ if (texts_pair.empty()) {
+ for (int i = 0; i < texts.size(); ++i) {
+ text_pair_input.emplace_back(texts[i]);
+ }
+ } else {
+ if (texts.size() != texts_pair.size()) {
+ return false;
+ }
+ for (int i = 0; i < texts.size(); ++i) {
+ text_pair_input.emplace_back(
+ std::pair(texts[i], texts_pair[i]));
+ }
+ }
+ tokenizer_.EncodeBatchStrings(text_pair_input, &encodings);
+ // 2. Construct the input vector tensor
+ // 2.1 Allocate input tensor
+ int64_t batch_size = texts.size();
+ int64_t seq_len = 0;
+ if (batch_size > 0) {
+ seq_len = encodings[0].GetIds().size();
+ }
+ inputs->resize(runtime_.NumInputs());
+ for (int i = 0; i < runtime_.NumInputs(); ++i) {
+ (*inputs)[i].Allocate({batch_size, seq_len},
+ fastdeploy::FDDataType::INT64,
+ runtime_.GetInputInfo(i).name);
+ }
+ // 2.2 Set the value of data
+ size_t start = 0;
+ int64_t* input_ids_ptr =
+ reinterpret_cast((*inputs)[0].MutableData());
+ int64_t* type_ids_ptr =
+ reinterpret_cast((*inputs)[1].MutableData());
+ for (int i = 0; i < encodings.size(); ++i) {
+ auto&& curr_input_ids = encodings[i].GetIds();
+ auto&& curr_type_ids = encodings[i].GetTypeIds();
+ std::copy(curr_input_ids.begin(), curr_input_ids.end(),
+ input_ids_ptr + start);
+ std::copy(curr_type_ids.begin(), curr_type_ids.end(),
+ type_ids_ptr + start);
+ start += seq_len;
+ }
+ return true;
+ }
+
+ bool Postprocess(const std::vector& outputs,
+ std::vector* seq_cls_results) {
+ const auto& logits = outputs[0];
+ fastdeploy::FDTensor probs;
+ fastdeploy::Softmax(logits, &probs);
+
+ fastdeploy::FDTensor labels, confidences;
+ fastdeploy::Max(probs, &confidences, {-1});
+ fastdeploy::ArgMax(probs, &labels, -1);
+ if (labels.Numel() != confidences.Numel()) {
+ return false;
+ }
+
+ seq_cls_results->resize(labels.Numel());
+ int64_t* label_ptr = reinterpret_cast(labels.Data());
+ float* confidence_ptr = reinterpret_cast(confidences.Data());
+ for (int i = 0; i < labels.Numel(); ++i) {
+ (*seq_cls_results)[i].label = label_ptr[i];
+ (*seq_cls_results)[i].confidence = confidence_ptr[i];
+ }
+ return true;
+ }
+
+ bool Predict(const std::vector& texts,
+ const std::vector& texts_pair,
+ std::vector* seq_cls_results) {
+ std::vector inputs;
+ if (!Preprocess(texts, texts_pair, &inputs)) {
+ return false;
+ }
+
+ std::vector outputs(runtime_.NumOutputs());
+ runtime_.Infer(inputs, &outputs);
+
+ if (!Postprocess(outputs, seq_cls_results)) {
+ return false;
+ }
+ return true;
+ }
+};
+
+int main(int argc, char* argv[]) {
+ google::ParseCommandLineFlags(&argc, &argv, true);
+ auto option = fastdeploy::RuntimeOption();
+ if (!CreateRuntimeOption(&option)) {
+ PrintUsage();
+ return -1;
+ }
+
+ std::string vocab_path = FLAGS_vocab_path;
+ if (!fastdeploy::CheckFileExists(vocab_path)) {
+ vocab_path = fastdeploy::PathJoin(FLAGS_model_dir, "vocab.txt");
+ if (!fastdeploy::CheckFileExists(vocab_path)) {
+ fastdeploy::FDERROR << "The path of vocab " << vocab_path
+ << " doesn't exist" << std::endl;
+ PrintUsage();
+ return -1;
+ }
+ }
+ ErnieFasterTokenizer tokenizer(vocab_path);
+
+ ErnieForSequenceClassificationPredictor predictor(option, tokenizer);
+
+ std::vector seq_cls_results;
+ std::vector texts_ds = {"花呗收款额度限制",
+ "花呗支持高铁票支付吗"};
+ std::vector texts_pair_ds = {"收钱码,对花呗支付的金额有限制吗",
+ "为什么友付宝不支持花呗付款"};
+ std::vector> batch_texts, batch_texts_pair;
+ BatchFyTexts(texts_ds, FLAGS_batch_size, &batch_texts);
+ BatchFyTexts(texts_pair_ds, FLAGS_batch_size, &batch_texts_pair);
+ for (int bs = 0; bs < batch_texts.size(); ++bs) {
+ predictor.Predict(batch_texts[bs], batch_texts_pair[bs], &seq_cls_results);
+ for (int i = 0; i < batch_texts[bs].size(); ++i) {
+ std::cout << "Batch id: " << bs << ", example id: " << i
+ << ", sentence 1: " << batch_texts[bs][i]
+ << ", sentence 2: " << batch_texts_pair[bs][i]
+ << ", label: " << seq_cls_results[i].label
+ << ", confidence: " << seq_cls_results[i].confidence
+ << std::endl;
+ }
+ }
+ return 0;
+}
diff --git a/examples/text/ernie-3.0/python/README.md b/examples/text/ernie-3.0/python/README.md
new file mode 100644
index 0000000000..12487aa7ad
--- /dev/null
+++ b/examples/text/ernie-3.0/python/README.md
@@ -0,0 +1,71 @@
+# ERNIE 3.0 模型Python部署示例
+
+在部署前,需确认以下两个步骤
+
+- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`seq_cls_infer.py`快速完成在CPU/GPU的文本分类任务的部署示例。
+
+## 依赖安装
+
+本项目提供的Python版本的预测器Predictor基于PaddleNLP提供的AutoTokenizer进行分词,并利用fast_tokenizer加速分词, 执行以下命令进行安装。
+
+```bash
+pip install -r requirements.txt
+```
+
+
+## 文本分类任务
+
+### 快速开始
+
+以下示例展示如何基于FastDeploy库完成ERNIE 3.0 Medium模型在CLUE Benchmark 的[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上进行文本分类任务的Python预测部署。
+
+```bash
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/text/ernie-3.0/python
+
+# 下载AFQMC数据集的微调后的ERNIE 3.0模型
+wget https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz
+tar xvfz ernie-3.0-medium-zh-afqmc.tgz
+
+# CPU 推理
+python seq_cls_infer.py --device cpu --model_dir ernie-3.0-medium-zh-afqmc
+
+# GPU 推理
+python seq_cls_infer.py --device gpu --model_dir ernie-3.0-medium-zh-afqmc
+
+```
+
+运行完成后返回的结果如下:
+
+```bash
+[INFO] fastdeploy/runtime.cc(469)::Init Runtime initialized with Backend::ORT in Device::CPU.
+Batch id:0, example id:0, sentence1:花呗收款额度限制, sentence2:收钱码,对花呗支付的金额有限制吗, label:1, similarity:0.5819
+Batch id:1, example id:0, sentence1:花呗支持高铁票支付吗, sentence2:为什么友付宝不支持花呗付款, label:0, similarity:0.9979
+```
+
+### 参数说明
+
+`seq_cls_infer.py` 除了以上示例的命令行参数,还支持更多命令行参数的设置。以下为各命令行参数的说明。
+
+| 参数 |参数说明 |
+|----------|--------------|
+|--model_dir | 指定部署模型的目录, |
+|--batch_size |最大可测的 batch size,默认为 1|
+|--max_length |最大序列长度,默认为 128|
+|--device | 运行的设备,可选范围: ['cpu', 'gpu'],默认为'cpu' |
+|--backend | 支持的推理后端,可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'],默认为'onnx_runtime' |
+|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启,默认为False |
+|--use_fast| 是否使用FastTokenizer加速分词阶段。默认为True|
+
+## 相关文档
+
+[ERNIE 3.0模型详细介绍](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
+
+[ERNIE 3.0模型导出方法](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
+
+[ERNIE 3.0模型C++部署方法](../cpp/README.md)
diff --git a/examples/text/ernie-3.0/python/requirements.txt b/examples/text/ernie-3.0/python/requirements.txt
new file mode 100644
index 0000000000..204cf718cd
--- /dev/null
+++ b/examples/text/ernie-3.0/python/requirements.txt
@@ -0,0 +1,2 @@
+faster_toeknizer
+paddlenlp
diff --git a/examples/text/ernie-3.0/python/seq_cls_infer.py b/examples/text/ernie-3.0/python/seq_cls_infer.py
new file mode 100644
index 0000000000..de67884a1d
--- /dev/null
+++ b/examples/text/ernie-3.0/python/seq_cls_infer.py
@@ -0,0 +1,182 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import distutils.util
+
+import numpy as np
+import faster_tokenizer
+from paddlenlp.transformers import AutoTokenizer
+import fastdeploy as fd
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir", required=True, help="The directory of model.")
+ parser.add_argument(
+ "--vocab_path",
+ type=str,
+ default="",
+ help="The path of tokenizer vocab.")
+ parser.add_argument(
+ "--device",
+ type=str,
+ default='cpu',
+ choices=['gpu', 'cpu'],
+ help="Type of inference device, support 'cpu' or 'gpu'.")
+ parser.add_argument(
+ "--backend",
+ type=str,
+ default='onnx_runtime',
+ choices=[
+ 'onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'
+ ],
+ help="The inference runtime backend.")
+ parser.add_argument(
+ "--batch_size", type=int, default=1, help="The batch size of data.")
+ parser.add_argument(
+ "--max_length",
+ type=int,
+ default=128,
+ help="The max length of sequence.")
+ parser.add_argument(
+ "--log_interval",
+ type=int,
+ default=10,
+ help="The interval of logging.")
+ parser.add_argument(
+ "--use_fp16",
+ type=distutils.util.strtobool,
+ default=False,
+ help="Wheter to use FP16 mode")
+ parser.add_argument(
+ "--use_fast",
+ type=distutils.util.strtobool,
+ default=False,
+ help="Whether to use fast_tokenizer to accelarate the tokenization.")
+ return parser.parse_args()
+
+
+def batchfy_text(texts, batch_size):
+ batch_texts = []
+ batch_start = 0
+ while batch_start < len(texts):
+ batch_texts += [
+ texts[batch_start:min(batch_start + batch_size, len(texts))]
+ ]
+ batch_start += batch_size
+ return batch_texts
+
+
+class ErnieForSequenceClassificationPredictor(object):
+ def __init__(self, args):
+ self.tokenizer = AutoTokenizer.from_pretrained(
+ 'ernie-3.0-medium-zh', use_faster=args.use_fast)
+ self.runtime = self.create_fd_runtime(args)
+ self.batch_size = args.batch_size
+ self.max_length = args.max_length
+
+ def create_fd_runtime(self, args):
+ option = fd.RuntimeOption()
+ model_path = os.path.join(args.model_dir, "infer.pdmodel")
+ params_path = os.path.join(args.model_dir, "infer.pdiparams")
+ option.set_model_path(model_path, params_path)
+ if args.device == 'cpu':
+ option.use_cpu()
+ else:
+ option.use_gpu()
+ if args.backend == 'paddle':
+ option.use_paddle_backend()
+ elif args.backend == 'onnx_runtime':
+ option.use_ort_backend()
+ elif args.backend == 'openvino':
+ option.use_openvino_backend()
+ else:
+ option.use_trt_backend()
+ if args.backend == 'paddle_tensorrt':
+ option.enable_paddle_to_trt()
+ option.enable_paddle_trt_collect_shape()
+ trt_file = os.path.join(args.model_dir, "infer.trt")
+ option.set_trt_input_shape(
+ 'input_ids',
+ min_shape=[1, args.max_length],
+ opt_shape=[args.batch_size, args.max_length],
+ max_shape=[args.batch_size, args.max_length])
+ option.set_trt_input_shape(
+ 'token_type_ids',
+ min_shape=[1, args.max_length],
+ opt_shape=[args.batch_size, args.max_length],
+ max_shape=[args.batch_size, args.max_length])
+ if args.use_fp16:
+ option.enable_trt_fp16()
+ trt_file = trt_file + ".fp16"
+ option.set_trt_cache_file(trt_file)
+ return fd.Runtime(option)
+
+ def preprocess(self, texts, texts_pair):
+ data = self.tokenizer(
+ texts,
+ texts_pair,
+ max_length=self.max_length,
+ padding=True,
+ truncation=True)
+ input_ids_name = self.runtime.get_input_info(0).name
+ token_type_ids_name = self.runtime.get_input_info(1).name
+ input_map = {
+ input_ids_name: np.array(
+ data["input_ids"], dtype="int64"),
+ token_type_ids_name: np.array(
+ data["token_type_ids"], dtype="int64")
+ }
+ return input_map
+
+ def infer(self, input_map):
+ results = self.runtime.infer(input_map)
+ return results
+
+ def postprocess(self, infer_data):
+ logits = np.array(infer_data[0])
+ max_value = np.max(logits, axis=1, keepdims=True)
+ exp_data = np.exp(logits - max_value)
+ probs = exp_data / np.sum(exp_data, axis=1, keepdims=True)
+ out_dict = {
+ "label": probs.argmax(axis=-1),
+ "confidence": probs.max(axis=-1)
+ }
+ return out_dict
+
+ def predict(self, texts, texts_pair=None):
+ input_map = self.preprocess(texts, texts_pair)
+ infer_result = self.infer(input_map)
+ output = self.postprocess(infer_result)
+ return output
+
+
+if __name__ == "__main__":
+ args = parse_arguments()
+ predictor = ErnieForSequenceClassificationPredictor(args)
+ texts_ds = ["花呗收款额度限制", "花呗支持高铁票支付吗"]
+ texts_pair_ds = ["收钱码,对花呗支付的金额有限制吗", "为什么友付宝不支持花呗付款"]
+ batch_texts = batchfy_text(texts_ds, args.batch_size)
+ batch_texts_pair = batchfy_text(texts_pair_ds, args.batch_size)
+
+ for bs, (texts,
+ texts_pair) in enumerate(zip(batch_texts, batch_texts_pair)):
+ outputs = predictor.predict(texts, texts_pair)
+ for i, (sentence1, sentence2) in enumerate(zip(texts, texts_pair)):
+ print(
+ f"Batch id:{bs}, example id:{i}, sentence1:{sentence1}, sentence2:{sentence2}, label:{outputs['label'][i]}, similarity:{outputs['confidence'][i]:.4f}"
+ )
diff --git a/examples/text/ernie-3.0/serving/README.md b/examples/text/ernie-3.0/serving/README.md
index 487a5eddca..fcf3b720ec 100644
--- a/examples/text/ernie-3.0/serving/README.md
+++ b/examples/text/ernie-3.0/serving/README.md
@@ -1,4 +1,4 @@
-# Ernie-3.0 服务化部署示例
+# ERNIE 3.0 服务化部署示例
## 准备模型
diff --git a/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md b/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md
index aaca8a9ec9..b3ce2c1ae2 100644
--- a/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md
+++ b/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md
@@ -1 +1 @@
-本目录存放Ernie-3.0模型
+本目录存放ERNIE 3.0模型
diff --git a/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md b/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md
index aaca8a9ec9..b3ce2c1ae2 100644
--- a/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md
+++ b/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md
@@ -1 +1 @@
-本目录存放Ernie-3.0模型
+本目录存放ERNIE 3.0模型