Merge branch 'PaddlePaddle:develop' into gbd_android

This commit is contained in:
Winter
2022-11-08 15:41:27 +08:00
committed by GitHub
15 changed files with 759 additions and 9 deletions
+1
View File
@@ -205,6 +205,7 @@ if(ENABLE_LITE_BACKEND)
endif()
if(ENABLE_PADDLE_BACKEND)
set(ENABLE_PADDLE_FRONTEND ON)
add_definitions(-DENABLE_PADDLE_BACKEND)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS})
include(${PROJECT_SOURCE_DIR}/cmake/paddle_inference.cmake)
+4 -4
View File
@@ -18,9 +18,9 @@
**⚡️FastDeploy**是一款**易用高效**的推理部署开发套件。覆盖业界🔥**热门CV、NLP、Speech的AI模型**并提供📦**开箱即用**的部署体验,包括图像分类、目标检测、图像分割、人脸检测、人脸识别、人体关键点识别、文字识别、语义理解等多任务,满足开发者**多场景**,**多硬件**、**多平台**的产业部署需求。
| [Object Detection](examples/vision/detection) | [3D Object Detection](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
| [Image Classification](examples/vision/classification) | [Object Detection](examples/vision/detection) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
|:---------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| <img src='https://user-images.githubusercontent.com/54695910/188054680-2f8d1952-c120-4b67-88fc-7d2d7d2378b4.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188270227-1a4671b3-0123-46ab-8d0f-0e4132ae8ec0.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054711-6119f0e7-d741-43b1-b273-9493d103d49f.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054718-6395321c-8937-4fa0-881c-5b20deb92aaa.gif' height="126px" width="190px"> |
| <img src='https://user-images.githubusercontent.com/54695910/200465949-da478e1b-21ce-43b8-9f3f-287460e786bd.png' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054680-2f8d1952-c120-4b67-88fc-7d2d7d2378b4.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054711-6119f0e7-d741-43b1-b273-9493d103d49f.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054718-6395321c-8937-4fa0-881c-5b20deb92aaa.gif' height="126px" width="190px"> |
| [**Image Matting**](examples/vision/matting) | [**Real-Time Matting**](examples/vision/matting) | [**OCR**](examples/vision/ocr) |[**Face Alignment**](examples/vision/facealign)
| <img src='https://user-images.githubusercontent.com/54695910/188058231-a5fe1ce1-0a38-460f-9582-e0b881514908.gif' height="126px" width="190px"> |<img src='https://user-images.githubusercontent.com/54695910/188054691-e4cb1a70-09fe-4691-bc62-5552d50bd853.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054669-a85996ba-f7f3-4646-ae1f-3b7e3e353e7d.gif' height="126px" width="190px" > |<img src='https://user-images.githubusercontent.com/54695910/188059460-9845e717-c30a-4252-bd80-b7f6d4cf30cb.png' height="126px" width="190px"> |
| [**Pose Estimation**](examples/vision/keypointdetection) | [**Behavior Recognition**](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [**NLP**](examples/text) |[**Speech**](examples/audio/pp-tts)
@@ -81,8 +81,8 @@
- [4. TensorRT使用中的一些技巧](docs/cn/faq/tensorrt_tricks.md)
- [5. 如何增加新的模型](docs/cn/faq/develop_a_new_model.md)(进行中)
- 更多FastDeploy部署模块
- [服务化部署](../serving)
- [Benchmark测试](../benchmark)
- [服务化部署](./serving)
- [Benchmark测试](./benchmark)
</details>
* **🖥️ 服务器端部署**
+2 -2
View File
@@ -20,9 +20,9 @@ English | [简体中文](README_CN.md)
**⚡️FastDeploy** is an **accessible and efficient** deployment Development Toolkit. It covers 🔥**critical CV、NLP、Speech AI models** in the industry and provides 📦**out-of-the-box** deployment experience. It covers image classification, object detection, image segmentation, face detection, face recognition, human keypoint detection, OCR, semantic understanding and other tasks to meet developers' industrial deployment needs for **multi-scenario**, **multi-hardware** and **multi-platform** .
| [Object Detection](examples/vision/detection) | [3D Object Detection](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
| [Image Classification](examples/vision/classification) | [Object Detection](examples/vision/detection) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
|:---------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| <img src='https://user-images.githubusercontent.com/54695910/188054680-2f8d1952-c120-4b67-88fc-7d2d7d2378b4.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188270227-1a4671b3-0123-46ab-8d0f-0e4132ae8ec0.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054711-6119f0e7-d741-43b1-b273-9493d103d49f.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054718-6395321c-8937-4fa0-881c-5b20deb92aaa.gif' height="126px" width="190px"> |
| <img src='https://user-images.githubusercontent.com/54695910/200465949-da478e1b-21ce-43b8-9f3f-287460e786bd.png' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054680-2f8d1952-c120-4b67-88fc-7d2d7d2378b4.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054711-6119f0e7-d741-43b1-b273-9493d103d49f.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054718-6395321c-8937-4fa0-881c-5b20deb92aaa.gif' height="126px" width="190px"> |
| [**Image Matting**](examples/vision/matting) | [**Real-Time Matting**](examples/vision/matting) | [**OCR**](examples/vision/ocr) |[**Face Alignment**](examples/vision/facealign)
| <img src='https://user-images.githubusercontent.com/54695910/188058231-a5fe1ce1-0a38-460f-9582-e0b881514908.gif' height="126px" width="190px"> |<img src='https://user-images.githubusercontent.com/54695910/188054691-e4cb1a70-09fe-4691-bc62-5552d50bd853.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054669-a85996ba-f7f3-4646-ae1f-3b7e3e353e7d.gif' height="126px" width="190px" > |<img src='https://user-images.githubusercontent.com/54695910/188059460-9845e717-c30a-4252-bd80-b7f6d4cf30cb.png' height="126px" width="190px"> |
| [**Pose Estimation**](examples/vision/keypointdetection) | [**Behavior Recognition**](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [**NLP**](examples/text) |[**Speech**](examples/audio/pp-tts)
@@ -0,0 +1,14 @@
# 第三方库依赖
FastDeploy当前根据编译选项,会依赖如下第三方依赖
- OpenCV: 当ENABLE_VISION=ON时,会自动下载预编译OpenCV 3.4.16库
- ONNX Runimte: 当ENABLE_ORT_BACKEND=ON时,会自动下载ONNX Runtime库
- OpenVINO: 当ENABLE_OPENVINO_BACKEND=ON时,会自动下载OpenVINO库
用户在实际编译时,可能会根据自身需求集成环境中已有的第三方库,可通出如下开关来配置
- OPENCV_DIRECTORY: 指定环境中的OpenCV路径,如 `-DOPENCV_DIRECTORY=/usr/lib/aarch64-linux-gnu/cmake/opencv4/`
- ORT_DIRECTORY: 指定环境中的ONNX Runtime路径, 如`-DORT_DIRECTORY=/download/onnxruntime-linux-x64-1.0.0`
- OPENVINO_DIRECTORY: 指定环境中的OpenVINO路径, 如`-DOPENVINO_DIRECTORY=//download/openvino`
+39
View File
@@ -0,0 +1,39 @@
# ERNIE 3.0 模型部署
## 模型详细说明
- [PaddleNLP ERNIE 3.0模型说明](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
## 支持的模型列表
| 模型 | 结构 | 语言 |
| :---: | :--------: | :--------: |
| `ERNIE 3.0-Base`| 12-layers, 768-hidden, 12-heads | 中文 |
| `ERNIE 3.0-Medium`| 6-layers, 768-hidden, 12-heads | 中文 |
| `ERNIE 3.0-Mini`| 6-layers, 384-hidden, 12-heads | 中文 |
| `ERNIE 3.0-Micro`| 4-layers, 384-hidden, 12-heads | 中文 |
| `ERNIE 3.0-Nano `| 4-layers, 312-hidden, 12-heads | 中文 |
## 支持的NLP任务列表
| 任务 Task | 是否支持 |
| :--------------- | ------- |
| 文本分类 | ✅ |
| 序列标注 | ❌ |
## 导出部署模型
在部署前,需要先将训练好的ERNIE模型导出成部署模型,导出步骤可参考文档[导出模型](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0).
## 下载微调模型
### 分类任务
为了方便开发者的测试,下面提供了在文本分类[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上微调的ERNIE 3.0-Medium模型,开发者可直接下载体验。
- [ERNIE 3.0 Medium AFQMC](https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz)
## 详细部署文档
- [Python部署](python)
- [C++部署](cpp)
- [Serving部署](serving)
@@ -0,0 +1,26 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PROJECT(infer_demo C CXX)
CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
set(THIRD_LIBS "")
include(gflags.cmake)
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
include_directories(${FASTDEPLOY_INCS})
add_executable(seq_cls_infer_demo ${PROJECT_SOURCE_DIR}/seq_cls_infer.cc)
target_link_libraries(seq_cls_infer_demo ${FASTDEPLOY_LIBS} ${THIRD_LIBS})
+70
View File
@@ -0,0 +1,70 @@
# ERNIE 3.0 模型C++部署示例
在部署前,需确认以下两个步骤
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
本目录下提供`seq_cls_infer.cc`快速完成在CPU/GPU的文本分类任务的C++部署示例。
## 文本分类任务
### 快速开始
以下示例展示如何基于FastDeploy库完成ERNIE 3.0 Medium模型在CLUE Benchmark的[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上进行文本分类任务的C++预测部署。
```bash
# 下载SDK,编译模型examples代码(SDK中包含了examples代码)
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-0.4.0.tgz
tar xvf fastdeploy-linux-x64-gpu-0.4.0.tgz
cd fastdeploy-linux-x64-gpu-0.4.0/examples/text/ernie-3.0/cpp
mkdir build
cd build
# 执行cmake,需要指定FASTDEPLOY_INSTALL_DIR为FastDeploy SDK的目录。
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/../../../../../../fastdeploy-linux-x64-gpu-0.4.0
make -j
# 下载AFQMC数据集的微调后的ERNIE 3.0模型以及词表
wget https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz
tar xvfz ernie-3.0-medium-zh-afqmc.tgz
# CPU 推理
./seq_cls_infer_demo --device cpu --model_dir ernie-3.0-medium-zh-afqmc
# GPU 推理
./seq_cls_infer_demo --device gpu --model_dir ernie-3.0-medium-zh-afqmc
```
运行完成后返回的结果如下:
```bash
[INFO] /paddle/FastDeploy/examples/text/ernie-3.0/cpp/seq_cls_infer.cc(93)::CreateRuntimeOption model_path = ernie-3.0-medium-zh-afqmc/infer.pdmodel, param_path = ernie-3.0-medium-zh-afqmc/infer.pdiparams
[INFO] fastdeploy/runtime.cc(469)::Init Runtime initialized with Backend::ORT in Device::CPU.
Batch id: 0, example id: 0, sentence 1: 花呗收款额度限制, sentence 2: 收钱码,对花呗支付的金额有限制吗, label: 1, confidence: 0.581852
Batch id: 1, example id: 0, sentence 1: 花呗支持高铁票支付吗, sentence 2: 为什么友付宝不支持花呗付款, label: 0, confidence: 0.997921
```
### 参数说明
`seq_cls_infer_demo` 除了以上示例的命令行参数,还支持更多命令行参数的设置。以下为各命令行参数的说明。
| 参数 |参数说明 |
|----------|--------------|
|--model_dir | 指定部署模型的目录, |
|--batch_size |最大可测的 batch size,默认为 1|
|--max_length |最大序列长度,默认为 128|
|--device | 运行的设备,可选范围: ['cpu', 'gpu'],默认为'cpu' |
|--backend | 支持的推理后端,可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'],默认为'onnx_runtime' |
|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启,默认为False |
## 相关文档
[ERNIE 3.0模型详细介绍](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
[ERNIE 3.0模型导出方法](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
[ERNIE 3.0模型Python部署方法](../python/README.md)
+76
View File
@@ -0,0 +1,76 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(GIT_URL "https://github.com")
SET(GFLAGS_PREFIX_DIR ${CMAKE_CURRENT_BINARY_DIR}/gflags)
SET(GFLAGS_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/install/gflags)
SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
set(GFLAGS_REPOSITORY ${GIT_URL}/gflags/gflags.git)
set(GFLAGS_TAG "v2.2.2")
IF(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
ELSE(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
set(BUILD_COMMAND $(MAKE) --silent)
set(INSTALL_COMMAND $(MAKE) install)
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
ExternalProject_Add(
extern_gflags
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
GIT_REPOSITORY ${GFLAGS_REPOSITORY}
GIT_TAG ${GFLAGS_TAG}
PREFIX ${GFLAGS_PREFIX_DIR}
UPDATE_COMMAND ""
BUILD_COMMAND ${BUILD_COMMAND}
INSTALL_COMMAND ${INSTALL_COMMAND}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DBUILD_STATIC_LIBS=ON
-DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
)
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
ADD_DEPENDENCIES(gflags extern_gflags)
LIST(APPEND THIRD_LIBS gflags)
if (UNIX)
LIST(APPEND THIRD_LIBS pthread)
endif()
# On Windows (including MinGW), the Shlwapi library is used by gflags if available.
if (WIN32)
include(CheckIncludeFileCXX)
check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI)
if (HAVE_SHLWAPI)
set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
endif(HAVE_SHLWAPI)
endif (WIN32)
@@ -0,0 +1,269 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <sstream>
#include <vector>
#include "fastdeploy/function/reduce.h"
#include "fastdeploy/function/softmax.h"
#include "fastdeploy/runtime.h"
#include "fastdeploy/utils/path.h"
#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h"
#include "gflags/gflags.h"
using namespace paddlenlp;
using namespace faster_tokenizer::tokenizers_impl;
#ifdef WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif
DEFINE_string(model_dir, "", "Directory of the inference model.");
DEFINE_string(vocab_path, "", "Path of the vocab file.");
DEFINE_string(device, "cpu",
"Type of inference device, support 'cpu' or 'gpu'.");
DEFINE_string(backend, "onnx_runtime",
"The inference runtime backend, support: ['onnx_runtime', "
"'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt']");
DEFINE_int32(batch_size, 1, "The batch size of data.");
DEFINE_int32(max_length, 128, "The batch size of data.");
DEFINE_bool(use_fp16, false, "Wheter to use FP16 mode.");
void PrintUsage() {
fastdeploy::FDINFO
<< "Usage: seq_cls_infer_demo --model_dir dir --device [cpu|gpu] "
"--backend "
"[onnx_runtime|paddle|openvino|tensorrt|paddle_tensorrt] "
"--batch_size size --max_length len --use_fp16 false"
<< std::endl;
fastdeploy::FDINFO << "Default value of device: cpu" << std::endl;
fastdeploy::FDINFO << "Default value of backend: onnx_runtime" << std::endl;
fastdeploy::FDINFO << "Default value of batch_size: 1" << std::endl;
fastdeploy::FDINFO << "Default value of max_length: 128" << std::endl;
fastdeploy::FDINFO << "Default value of use_fp16: false" << std::endl;
}
bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
if (FLAGS_device == "gpu") {
option->UseGpu();
} else if (FLAGS_device == "cpu") {
option->UseCpu();
} else {
fastdeploy::FDERROR << "The avilable device should be one of the list "
"['cpu', 'gpu']. But receive '"
<< FLAGS_device << "'" << std::endl;
return false;
}
if (FLAGS_backend == "onnx_runtime") {
option->UseOrtBackend();
} else if (FLAGS_backend == "paddle") {
option->UsePaddleBackend();
} else if (FLAGS_backend == "openvino") {
option->UseOpenVINOBackend();
} else if (FLAGS_backend == "tensorrt" ||
FLAGS_backend == "paddle_tensorrt") {
option->UseTrtBackend();
if (FLAGS_backend == "paddle_tensorrt") {
option->EnablePaddleToTrt();
option->EnablePaddleTrtCollectShape();
}
std::string trt_file = FLAGS_model_dir + sep + "infer.trt";
option->SetTrtInputShape("input_ids", {1, FLAGS_max_length},
{FLAGS_batch_size, FLAGS_max_length},
{FLAGS_batch_size, FLAGS_max_length});
option->SetTrtInputShape("token_type_ids", {1, FLAGS_max_length},
{FLAGS_batch_size, FLAGS_max_length},
{FLAGS_batch_size, FLAGS_max_length});
if (FLAGS_use_fp16) {
option->EnableTrtFP16();
trt_file = trt_file + ".fp16";
}
} else {
fastdeploy::FDERROR << "The avilable backend should be one of the list "
"['paddle', 'openvino', 'tensorrt', "
"'paddle_tensorrt']. But receive '"
<< FLAGS_backend << "'" << std::endl;
return false;
}
std::string model_path = FLAGS_model_dir + sep + "infer.pdmodel";
std::string param_path = FLAGS_model_dir + sep + "infer.pdiparams";
fastdeploy::FDINFO << "model_path = " << model_path
<< ", param_path = " << param_path << std::endl;
option->SetModelPath(model_path, param_path);
return true;
}
bool BatchFyTexts(const std::vector<std::string>& texts, int batch_size,
std::vector<std::vector<std::string>>* batch_texts) {
for (int idx = 0; idx < texts.size(); idx += batch_size) {
int rest = texts.size() - idx;
int curr_size = std::min(batch_size, rest);
std::vector<std::string> batch_text(curr_size);
std::copy_n(texts.begin() + idx, curr_size, batch_text.begin());
batch_texts->emplace_back(std::move(batch_text));
}
return true;
}
struct SeqClsResult {
int label;
float confidence;
};
struct ErnieForSequenceClassificationPredictor {
fastdeploy::Runtime runtime_;
ErnieFasterTokenizer tokenizer_;
ErnieForSequenceClassificationPredictor(
const fastdeploy::RuntimeOption& option,
const ErnieFasterTokenizer& tokenizer)
: tokenizer_(tokenizer) {
runtime_.Init(option);
}
bool Preprocess(const std::vector<std::string>& texts,
const std::vector<std::string>& texts_pair,
std::vector<fastdeploy::FDTensor>* inputs) {
std::vector<faster_tokenizer::core::Encoding> encodings;
std::vector<faster_tokenizer::core::EncodeInput> text_pair_input;
// 1. Tokenize the text or (text, text_pair)
if (texts_pair.empty()) {
for (int i = 0; i < texts.size(); ++i) {
text_pair_input.emplace_back(texts[i]);
}
} else {
if (texts.size() != texts_pair.size()) {
return false;
}
for (int i = 0; i < texts.size(); ++i) {
text_pair_input.emplace_back(
std::pair<std::string, std::string>(texts[i], texts_pair[i]));
}
}
tokenizer_.EncodeBatchStrings(text_pair_input, &encodings);
// 2. Construct the input vector tensor
// 2.1 Allocate input tensor
int64_t batch_size = texts.size();
int64_t seq_len = 0;
if (batch_size > 0) {
seq_len = encodings[0].GetIds().size();
}
inputs->resize(runtime_.NumInputs());
for (int i = 0; i < runtime_.NumInputs(); ++i) {
(*inputs)[i].Allocate({batch_size, seq_len},
fastdeploy::FDDataType::INT64,
runtime_.GetInputInfo(i).name);
}
// 2.2 Set the value of data
size_t start = 0;
int64_t* input_ids_ptr =
reinterpret_cast<int64_t*>((*inputs)[0].MutableData());
int64_t* type_ids_ptr =
reinterpret_cast<int64_t*>((*inputs)[1].MutableData());
for (int i = 0; i < encodings.size(); ++i) {
auto&& curr_input_ids = encodings[i].GetIds();
auto&& curr_type_ids = encodings[i].GetTypeIds();
std::copy(curr_input_ids.begin(), curr_input_ids.end(),
input_ids_ptr + start);
std::copy(curr_type_ids.begin(), curr_type_ids.end(),
type_ids_ptr + start);
start += seq_len;
}
return true;
}
bool Postprocess(const std::vector<fastdeploy::FDTensor>& outputs,
std::vector<SeqClsResult>* seq_cls_results) {
const auto& logits = outputs[0];
fastdeploy::FDTensor probs;
fastdeploy::Softmax(logits, &probs);
fastdeploy::FDTensor labels, confidences;
fastdeploy::Max(probs, &confidences, {-1});
fastdeploy::ArgMax(probs, &labels, -1);
if (labels.Numel() != confidences.Numel()) {
return false;
}
seq_cls_results->resize(labels.Numel());
int64_t* label_ptr = reinterpret_cast<int64_t*>(labels.Data());
float* confidence_ptr = reinterpret_cast<float*>(confidences.Data());
for (int i = 0; i < labels.Numel(); ++i) {
(*seq_cls_results)[i].label = label_ptr[i];
(*seq_cls_results)[i].confidence = confidence_ptr[i];
}
return true;
}
bool Predict(const std::vector<std::string>& texts,
const std::vector<std::string>& texts_pair,
std::vector<SeqClsResult>* seq_cls_results) {
std::vector<fastdeploy::FDTensor> inputs;
if (!Preprocess(texts, texts_pair, &inputs)) {
return false;
}
std::vector<fastdeploy::FDTensor> outputs(runtime_.NumOutputs());
runtime_.Infer(inputs, &outputs);
if (!Postprocess(outputs, seq_cls_results)) {
return false;
}
return true;
}
};
int main(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option)) {
PrintUsage();
return -1;
}
std::string vocab_path = FLAGS_vocab_path;
if (!fastdeploy::CheckFileExists(vocab_path)) {
vocab_path = fastdeploy::PathJoin(FLAGS_model_dir, "vocab.txt");
if (!fastdeploy::CheckFileExists(vocab_path)) {
fastdeploy::FDERROR << "The path of vocab " << vocab_path
<< " doesn't exist" << std::endl;
PrintUsage();
return -1;
}
}
ErnieFasterTokenizer tokenizer(vocab_path);
ErnieForSequenceClassificationPredictor predictor(option, tokenizer);
std::vector<SeqClsResult> seq_cls_results;
std::vector<std::string> texts_ds = {"花呗收款额度限制",
"花呗支持高铁票支付吗"};
std::vector<std::string> texts_pair_ds = {"收钱码,对花呗支付的金额有限制吗",
"为什么友付宝不支持花呗付款"};
std::vector<std::vector<std::string>> batch_texts, batch_texts_pair;
BatchFyTexts(texts_ds, FLAGS_batch_size, &batch_texts);
BatchFyTexts(texts_pair_ds, FLAGS_batch_size, &batch_texts_pair);
for (int bs = 0; bs < batch_texts.size(); ++bs) {
predictor.Predict(batch_texts[bs], batch_texts_pair[bs], &seq_cls_results);
for (int i = 0; i < batch_texts[bs].size(); ++i) {
std::cout << "Batch id: " << bs << ", example id: " << i
<< ", sentence 1: " << batch_texts[bs][i]
<< ", sentence 2: " << batch_texts_pair[bs][i]
<< ", label: " << seq_cls_results[i].label
<< ", confidence: " << seq_cls_results[i].confidence
<< std::endl;
}
}
return 0;
}
+71
View File
@@ -0,0 +1,71 @@
# ERNIE 3.0 模型Python部署示例
在部署前,需确认以下两个步骤
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
本目录下提供`seq_cls_infer.py`快速完成在CPU/GPU的文本分类任务的部署示例。
## 依赖安装
本项目提供的Python版本的预测器Predictor基于PaddleNLP提供的AutoTokenizer进行分词,并利用fast_tokenizer加速分词, 执行以下命令进行安装。
```bash
pip install -r requirements.txt
```
## 文本分类任务
### 快速开始
以下示例展示如何基于FastDeploy库完成ERNIE 3.0 Medium模型在CLUE Benchmark 的[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上进行文本分类任务的Python预测部署。
```bash
# 下载部署示例代码
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy/examples/text/ernie-3.0/python
# 下载AFQMC数据集的微调后的ERNIE 3.0模型
wget https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz
tar xvfz ernie-3.0-medium-zh-afqmc.tgz
# CPU 推理
python seq_cls_infer.py --device cpu --model_dir ernie-3.0-medium-zh-afqmc
# GPU 推理
python seq_cls_infer.py --device gpu --model_dir ernie-3.0-medium-zh-afqmc
```
运行完成后返回的结果如下:
```bash
[INFO] fastdeploy/runtime.cc(469)::Init Runtime initialized with Backend::ORT in Device::CPU.
Batch id:0, example id:0, sentence1:花呗收款额度限制, sentence2:收钱码,对花呗支付的金额有限制吗, label:1, similarity:0.5819
Batch id:1, example id:0, sentence1:花呗支持高铁票支付吗, sentence2:为什么友付宝不支持花呗付款, label:0, similarity:0.9979
```
### 参数说明
`seq_cls_infer.py` 除了以上示例的命令行参数,还支持更多命令行参数的设置。以下为各命令行参数的说明。
| 参数 |参数说明 |
|----------|--------------|
|--model_dir | 指定部署模型的目录, |
|--batch_size |最大可测的 batch size,默认为 1|
|--max_length |最大序列长度,默认为 128|
|--device | 运行的设备,可选范围: ['cpu', 'gpu'],默认为'cpu' |
|--backend | 支持的推理后端,可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'],默认为'onnx_runtime' |
|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启,默认为False |
|--use_fast| 是否使用FastTokenizer加速分词阶段。默认为True|
## 相关文档
[ERNIE 3.0模型详细介绍](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
[ERNIE 3.0模型导出方法](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
[ERNIE 3.0模型C++部署方法](../cpp/README.md)
@@ -0,0 +1,2 @@
faster_toeknizer
paddlenlp
@@ -0,0 +1,182 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import distutils.util
import numpy as np
import faster_tokenizer
from paddlenlp.transformers import AutoTokenizer
import fastdeploy as fd
def parse_arguments():
import argparse
import ast
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_dir", required=True, help="The directory of model.")
parser.add_argument(
"--vocab_path",
type=str,
default="",
help="The path of tokenizer vocab.")
parser.add_argument(
"--device",
type=str,
default='cpu',
choices=['gpu', 'cpu'],
help="Type of inference device, support 'cpu' or 'gpu'.")
parser.add_argument(
"--backend",
type=str,
default='onnx_runtime',
choices=[
'onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'
],
help="The inference runtime backend.")
parser.add_argument(
"--batch_size", type=int, default=1, help="The batch size of data.")
parser.add_argument(
"--max_length",
type=int,
default=128,
help="The max length of sequence.")
parser.add_argument(
"--log_interval",
type=int,
default=10,
help="The interval of logging.")
parser.add_argument(
"--use_fp16",
type=distutils.util.strtobool,
default=False,
help="Wheter to use FP16 mode")
parser.add_argument(
"--use_fast",
type=distutils.util.strtobool,
default=False,
help="Whether to use fast_tokenizer to accelarate the tokenization.")
return parser.parse_args()
def batchfy_text(texts, batch_size):
batch_texts = []
batch_start = 0
while batch_start < len(texts):
batch_texts += [
texts[batch_start:min(batch_start + batch_size, len(texts))]
]
batch_start += batch_size
return batch_texts
class ErnieForSequenceClassificationPredictor(object):
def __init__(self, args):
self.tokenizer = AutoTokenizer.from_pretrained(
'ernie-3.0-medium-zh', use_faster=args.use_fast)
self.runtime = self.create_fd_runtime(args)
self.batch_size = args.batch_size
self.max_length = args.max_length
def create_fd_runtime(self, args):
option = fd.RuntimeOption()
model_path = os.path.join(args.model_dir, "infer.pdmodel")
params_path = os.path.join(args.model_dir, "infer.pdiparams")
option.set_model_path(model_path, params_path)
if args.device == 'cpu':
option.use_cpu()
else:
option.use_gpu()
if args.backend == 'paddle':
option.use_paddle_backend()
elif args.backend == 'onnx_runtime':
option.use_ort_backend()
elif args.backend == 'openvino':
option.use_openvino_backend()
else:
option.use_trt_backend()
if args.backend == 'paddle_tensorrt':
option.enable_paddle_to_trt()
option.enable_paddle_trt_collect_shape()
trt_file = os.path.join(args.model_dir, "infer.trt")
option.set_trt_input_shape(
'input_ids',
min_shape=[1, args.max_length],
opt_shape=[args.batch_size, args.max_length],
max_shape=[args.batch_size, args.max_length])
option.set_trt_input_shape(
'token_type_ids',
min_shape=[1, args.max_length],
opt_shape=[args.batch_size, args.max_length],
max_shape=[args.batch_size, args.max_length])
if args.use_fp16:
option.enable_trt_fp16()
trt_file = trt_file + ".fp16"
option.set_trt_cache_file(trt_file)
return fd.Runtime(option)
def preprocess(self, texts, texts_pair):
data = self.tokenizer(
texts,
texts_pair,
max_length=self.max_length,
padding=True,
truncation=True)
input_ids_name = self.runtime.get_input_info(0).name
token_type_ids_name = self.runtime.get_input_info(1).name
input_map = {
input_ids_name: np.array(
data["input_ids"], dtype="int64"),
token_type_ids_name: np.array(
data["token_type_ids"], dtype="int64")
}
return input_map
def infer(self, input_map):
results = self.runtime.infer(input_map)
return results
def postprocess(self, infer_data):
logits = np.array(infer_data[0])
max_value = np.max(logits, axis=1, keepdims=True)
exp_data = np.exp(logits - max_value)
probs = exp_data / np.sum(exp_data, axis=1, keepdims=True)
out_dict = {
"label": probs.argmax(axis=-1),
"confidence": probs.max(axis=-1)
}
return out_dict
def predict(self, texts, texts_pair=None):
input_map = self.preprocess(texts, texts_pair)
infer_result = self.infer(input_map)
output = self.postprocess(infer_result)
return output
if __name__ == "__main__":
args = parse_arguments()
predictor = ErnieForSequenceClassificationPredictor(args)
texts_ds = ["花呗收款额度限制", "花呗支持高铁票支付吗"]
texts_pair_ds = ["收钱码,对花呗支付的金额有限制吗", "为什么友付宝不支持花呗付款"]
batch_texts = batchfy_text(texts_ds, args.batch_size)
batch_texts_pair = batchfy_text(texts_pair_ds, args.batch_size)
for bs, (texts,
texts_pair) in enumerate(zip(batch_texts, batch_texts_pair)):
outputs = predictor.predict(texts, texts_pair)
for i, (sentence1, sentence2) in enumerate(zip(texts, texts_pair)):
print(
f"Batch id:{bs}, example id:{i}, sentence1:{sentence1}, sentence2:{sentence2}, label:{outputs['label'][i]}, similarity:{outputs['confidence'][i]:.4f}"
)
+1 -1
View File
@@ -1,4 +1,4 @@
# Ernie-3.0 服务化部署示例
# ERNIE 3.0 服务化部署示例
## 准备模型
@@ -1 +1 @@
本目录存放Ernie-3.0模型
本目录存放ERNIE 3.0模型
@@ -1 +1 @@
本目录存放Ernie-3.0模型
本目录存放ERNIE 3.0模型