mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-05-07 16:08:58 +08:00
Merge branch 'PaddlePaddle:develop' into gbd_android
This commit is contained in:
@@ -205,6 +205,7 @@ if(ENABLE_LITE_BACKEND)
|
||||
endif()
|
||||
|
||||
if(ENABLE_PADDLE_BACKEND)
|
||||
set(ENABLE_PADDLE_FRONTEND ON)
|
||||
add_definitions(-DENABLE_PADDLE_BACKEND)
|
||||
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS})
|
||||
include(${PROJECT_SOURCE_DIR}/cmake/paddle_inference.cmake)
|
||||
|
||||
+4
-4
@@ -18,9 +18,9 @@
|
||||
|
||||
**⚡️FastDeploy**是一款**易用高效**的推理部署开发套件。覆盖业界🔥**热门CV、NLP、Speech的AI模型**并提供📦**开箱即用**的部署体验,包括图像分类、目标检测、图像分割、人脸检测、人脸识别、人体关键点识别、文字识别、语义理解等多任务,满足开发者**多场景**,**多硬件**、**多平台**的产业部署需求。
|
||||
|
||||
| [Object Detection](examples/vision/detection) | [3D Object Detection](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
|
||||
| [Image Classification](examples/vision/classification) | [Object Detection](examples/vision/detection) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
|
||||
|:---------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
|
||||
| <img src='https://user-images.githubusercontent.com/54695910/188054680-2f8d1952-c120-4b67-88fc-7d2d7d2378b4.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188270227-1a4671b3-0123-46ab-8d0f-0e4132ae8ec0.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054711-6119f0e7-d741-43b1-b273-9493d103d49f.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054718-6395321c-8937-4fa0-881c-5b20deb92aaa.gif' height="126px" width="190px"> |
|
||||
| <img src='https://user-images.githubusercontent.com/54695910/200465949-da478e1b-21ce-43b8-9f3f-287460e786bd.png' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054680-2f8d1952-c120-4b67-88fc-7d2d7d2378b4.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054711-6119f0e7-d741-43b1-b273-9493d103d49f.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054718-6395321c-8937-4fa0-881c-5b20deb92aaa.gif' height="126px" width="190px"> |
|
||||
| [**Image Matting**](examples/vision/matting) | [**Real-Time Matting**](examples/vision/matting) | [**OCR**](examples/vision/ocr) |[**Face Alignment**](examples/vision/facealign)
|
||||
| <img src='https://user-images.githubusercontent.com/54695910/188058231-a5fe1ce1-0a38-460f-9582-e0b881514908.gif' height="126px" width="190px"> |<img src='https://user-images.githubusercontent.com/54695910/188054691-e4cb1a70-09fe-4691-bc62-5552d50bd853.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054669-a85996ba-f7f3-4646-ae1f-3b7e3e353e7d.gif' height="126px" width="190px" > |<img src='https://user-images.githubusercontent.com/54695910/188059460-9845e717-c30a-4252-bd80-b7f6d4cf30cb.png' height="126px" width="190px"> |
|
||||
| [**Pose Estimation**](examples/vision/keypointdetection) | [**Behavior Recognition**](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [**NLP**](examples/text) |[**Speech**](examples/audio/pp-tts)
|
||||
@@ -81,8 +81,8 @@
|
||||
- [4. TensorRT使用中的一些技巧](docs/cn/faq/tensorrt_tricks.md)
|
||||
- [5. 如何增加新的模型](docs/cn/faq/develop_a_new_model.md)(进行中)
|
||||
- 更多FastDeploy部署模块
|
||||
- [服务化部署](../serving)
|
||||
- [Benchmark测试](../benchmark)
|
||||
- [服务化部署](./serving)
|
||||
- [Benchmark测试](./benchmark)
|
||||
</details>
|
||||
|
||||
* **🖥️ 服务器端部署**
|
||||
|
||||
+2
-2
@@ -20,9 +20,9 @@ English | [简体中文](README_CN.md)
|
||||
|
||||
**⚡️FastDeploy** is an **accessible and efficient** deployment Development Toolkit. It covers 🔥**critical CV、NLP、Speech AI models** in the industry and provides 📦**out-of-the-box** deployment experience. It covers image classification, object detection, image segmentation, face detection, face recognition, human keypoint detection, OCR, semantic understanding and other tasks to meet developers' industrial deployment needs for **multi-scenario**, **multi-hardware** and **multi-platform** .
|
||||
|
||||
| [Object Detection](examples/vision/detection) | [3D Object Detection](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
|
||||
| [Image Classification](examples/vision/classification) | [Object Detection](examples/vision/detection) | [Semantic Segmentation](examples/vision/segmentation/paddleseg) | [Potrait Segmentation](examples/vision/segmentation/paddleseg) |
|
||||
|:---------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
|
||||
| <img src='https://user-images.githubusercontent.com/54695910/188054680-2f8d1952-c120-4b67-88fc-7d2d7d2378b4.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188270227-1a4671b3-0123-46ab-8d0f-0e4132ae8ec0.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054711-6119f0e7-d741-43b1-b273-9493d103d49f.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054718-6395321c-8937-4fa0-881c-5b20deb92aaa.gif' height="126px" width="190px"> |
|
||||
| <img src='https://user-images.githubusercontent.com/54695910/200465949-da478e1b-21ce-43b8-9f3f-287460e786bd.png' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054680-2f8d1952-c120-4b67-88fc-7d2d7d2378b4.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054711-6119f0e7-d741-43b1-b273-9493d103d49f.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054718-6395321c-8937-4fa0-881c-5b20deb92aaa.gif' height="126px" width="190px"> |
|
||||
| [**Image Matting**](examples/vision/matting) | [**Real-Time Matting**](examples/vision/matting) | [**OCR**](examples/vision/ocr) |[**Face Alignment**](examples/vision/facealign)
|
||||
| <img src='https://user-images.githubusercontent.com/54695910/188058231-a5fe1ce1-0a38-460f-9582-e0b881514908.gif' height="126px" width="190px"> |<img src='https://user-images.githubusercontent.com/54695910/188054691-e4cb1a70-09fe-4691-bc62-5552d50bd853.gif' height="126px" width="190px"> | <img src='https://user-images.githubusercontent.com/54695910/188054669-a85996ba-f7f3-4646-ae1f-3b7e3e353e7d.gif' height="126px" width="190px" > |<img src='https://user-images.githubusercontent.com/54695910/188059460-9845e717-c30a-4252-bd80-b7f6d4cf30cb.png' height="126px" width="190px"> |
|
||||
| [**Pose Estimation**](examples/vision/keypointdetection) | [**Behavior Recognition**](https://github.com/PaddlePaddle/FastDeploy/issues/6) | [**NLP**](examples/text) |[**Speech**](examples/audio/pp-tts)
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
# 第三方库依赖
|
||||
|
||||
FastDeploy当前根据编译选项,会依赖如下第三方依赖
|
||||
|
||||
- OpenCV: 当ENABLE_VISION=ON时,会自动下载预编译OpenCV 3.4.16库
|
||||
- ONNX Runimte: 当ENABLE_ORT_BACKEND=ON时,会自动下载ONNX Runtime库
|
||||
- OpenVINO: 当ENABLE_OPENVINO_BACKEND=ON时,会自动下载OpenVINO库
|
||||
|
||||
用户在实际编译时,可能会根据自身需求集成环境中已有的第三方库,可通出如下开关来配置
|
||||
|
||||
|
||||
- OPENCV_DIRECTORY: 指定环境中的OpenCV路径,如 `-DOPENCV_DIRECTORY=/usr/lib/aarch64-linux-gnu/cmake/opencv4/`
|
||||
- ORT_DIRECTORY: 指定环境中的ONNX Runtime路径, 如`-DORT_DIRECTORY=/download/onnxruntime-linux-x64-1.0.0`
|
||||
- OPENVINO_DIRECTORY: 指定环境中的OpenVINO路径, 如`-DOPENVINO_DIRECTORY=//download/openvino`
|
||||
@@ -0,0 +1,39 @@
|
||||
# ERNIE 3.0 模型部署
|
||||
|
||||
## 模型详细说明
|
||||
- [PaddleNLP ERNIE 3.0模型说明](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
|
||||
|
||||
## 支持的模型列表
|
||||
|
||||
| 模型 | 结构 | 语言 |
|
||||
| :---: | :--------: | :--------: |
|
||||
| `ERNIE 3.0-Base`| 12-layers, 768-hidden, 12-heads | 中文 |
|
||||
| `ERNIE 3.0-Medium`| 6-layers, 768-hidden, 12-heads | 中文 |
|
||||
| `ERNIE 3.0-Mini`| 6-layers, 384-hidden, 12-heads | 中文 |
|
||||
| `ERNIE 3.0-Micro`| 4-layers, 384-hidden, 12-heads | 中文 |
|
||||
| `ERNIE 3.0-Nano `| 4-layers, 312-hidden, 12-heads | 中文 |
|
||||
|
||||
## 支持的NLP任务列表
|
||||
|
||||
| 任务 Task | 是否支持 |
|
||||
| :--------------- | ------- |
|
||||
| 文本分类 | ✅ |
|
||||
| 序列标注 | ❌ |
|
||||
|
||||
## 导出部署模型
|
||||
|
||||
在部署前,需要先将训练好的ERNIE模型导出成部署模型,导出步骤可参考文档[导出模型](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0).
|
||||
|
||||
## 下载微调模型
|
||||
|
||||
### 分类任务
|
||||
|
||||
为了方便开发者的测试,下面提供了在文本分类[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上微调的ERNIE 3.0-Medium模型,开发者可直接下载体验。
|
||||
|
||||
- [ERNIE 3.0 Medium AFQMC](https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz)
|
||||
|
||||
## 详细部署文档
|
||||
|
||||
- [Python部署](python)
|
||||
- [C++部署](cpp)
|
||||
- [Serving部署](serving)
|
||||
@@ -0,0 +1,26 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
PROJECT(infer_demo C CXX)
|
||||
CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
|
||||
|
||||
option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
|
||||
set(THIRD_LIBS "")
|
||||
include(gflags.cmake)
|
||||
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||
|
||||
include_directories(${FASTDEPLOY_INCS})
|
||||
|
||||
add_executable(seq_cls_infer_demo ${PROJECT_SOURCE_DIR}/seq_cls_infer.cc)
|
||||
target_link_libraries(seq_cls_infer_demo ${FASTDEPLOY_LIBS} ${THIRD_LIBS})
|
||||
@@ -0,0 +1,70 @@
|
||||
# ERNIE 3.0 模型C++部署示例
|
||||
|
||||
在部署前,需确认以下两个步骤
|
||||
|
||||
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
|
||||
- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
|
||||
|
||||
本目录下提供`seq_cls_infer.cc`快速完成在CPU/GPU的文本分类任务的C++部署示例。
|
||||
|
||||
|
||||
## 文本分类任务
|
||||
|
||||
### 快速开始
|
||||
|
||||
以下示例展示如何基于FastDeploy库完成ERNIE 3.0 Medium模型在CLUE Benchmark的[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上进行文本分类任务的C++预测部署。
|
||||
|
||||
```bash
|
||||
# 下载SDK,编译模型examples代码(SDK中包含了examples代码)
|
||||
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-0.4.0.tgz
|
||||
tar xvf fastdeploy-linux-x64-gpu-0.4.0.tgz
|
||||
|
||||
cd fastdeploy-linux-x64-gpu-0.4.0/examples/text/ernie-3.0/cpp
|
||||
mkdir build
|
||||
cd build
|
||||
# 执行cmake,需要指定FASTDEPLOY_INSTALL_DIR为FastDeploy SDK的目录。
|
||||
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/../../../../../../fastdeploy-linux-x64-gpu-0.4.0
|
||||
make -j
|
||||
|
||||
# 下载AFQMC数据集的微调后的ERNIE 3.0模型以及词表
|
||||
wget https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz
|
||||
tar xvfz ernie-3.0-medium-zh-afqmc.tgz
|
||||
|
||||
# CPU 推理
|
||||
./seq_cls_infer_demo --device cpu --model_dir ernie-3.0-medium-zh-afqmc
|
||||
|
||||
# GPU 推理
|
||||
./seq_cls_infer_demo --device gpu --model_dir ernie-3.0-medium-zh-afqmc
|
||||
|
||||
```
|
||||
|
||||
运行完成后返回的结果如下:
|
||||
```bash
|
||||
[INFO] /paddle/FastDeploy/examples/text/ernie-3.0/cpp/seq_cls_infer.cc(93)::CreateRuntimeOption model_path = ernie-3.0-medium-zh-afqmc/infer.pdmodel, param_path = ernie-3.0-medium-zh-afqmc/infer.pdiparams
|
||||
[INFO] fastdeploy/runtime.cc(469)::Init Runtime initialized with Backend::ORT in Device::CPU.
|
||||
Batch id: 0, example id: 0, sentence 1: 花呗收款额度限制, sentence 2: 收钱码,对花呗支付的金额有限制吗, label: 1, confidence: 0.581852
|
||||
Batch id: 1, example id: 0, sentence 1: 花呗支持高铁票支付吗, sentence 2: 为什么友付宝不支持花呗付款, label: 0, confidence: 0.997921
|
||||
```
|
||||
|
||||
|
||||
|
||||
### 参数说明
|
||||
|
||||
`seq_cls_infer_demo` 除了以上示例的命令行参数,还支持更多命令行参数的设置。以下为各命令行参数的说明。
|
||||
|
||||
| 参数 |参数说明 |
|
||||
|----------|--------------|
|
||||
|--model_dir | 指定部署模型的目录, |
|
||||
|--batch_size |最大可测的 batch size,默认为 1|
|
||||
|--max_length |最大序列长度,默认为 128|
|
||||
|--device | 运行的设备,可选范围: ['cpu', 'gpu'],默认为'cpu' |
|
||||
|--backend | 支持的推理后端,可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'],默认为'onnx_runtime' |
|
||||
|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启,默认为False |
|
||||
|
||||
## 相关文档
|
||||
|
||||
[ERNIE 3.0模型详细介绍](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
|
||||
|
||||
[ERNIE 3.0模型导出方法](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
|
||||
|
||||
[ERNIE 3.0模型Python部署方法](../python/README.md)
|
||||
@@ -0,0 +1,76 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
INCLUDE(ExternalProject)
|
||||
SET(GIT_URL "https://github.com")
|
||||
SET(GFLAGS_PREFIX_DIR ${CMAKE_CURRENT_BINARY_DIR}/gflags)
|
||||
SET(GFLAGS_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/install/gflags)
|
||||
SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
|
||||
set(GFLAGS_REPOSITORY ${GIT_URL}/gflags/gflags.git)
|
||||
set(GFLAGS_TAG "v2.2.2")
|
||||
IF(WIN32)
|
||||
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
|
||||
ELSE(WIN32)
|
||||
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
|
||||
set(BUILD_COMMAND $(MAKE) --silent)
|
||||
set(INSTALL_COMMAND $(MAKE) install)
|
||||
ENDIF(WIN32)
|
||||
|
||||
INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
|
||||
|
||||
ExternalProject_Add(
|
||||
extern_gflags
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
${SHALLOW_CLONE}
|
||||
GIT_REPOSITORY ${GFLAGS_REPOSITORY}
|
||||
GIT_TAG ${GFLAGS_TAG}
|
||||
PREFIX ${GFLAGS_PREFIX_DIR}
|
||||
UPDATE_COMMAND ""
|
||||
BUILD_COMMAND ${BUILD_COMMAND}
|
||||
INSTALL_COMMAND ${INSTALL_COMMAND}
|
||||
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
|
||||
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
|
||||
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
|
||||
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
|
||||
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
|
||||
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
|
||||
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
|
||||
-DBUILD_STATIC_LIBS=ON
|
||||
-DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
-DBUILD_TESTING=OFF
|
||||
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
|
||||
${EXTERNAL_OPTIONAL_ARGS}
|
||||
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
|
||||
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
|
||||
BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
|
||||
)
|
||||
|
||||
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
|
||||
SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
|
||||
ADD_DEPENDENCIES(gflags extern_gflags)
|
||||
LIST(APPEND THIRD_LIBS gflags)
|
||||
if (UNIX)
|
||||
LIST(APPEND THIRD_LIBS pthread)
|
||||
endif()
|
||||
# On Windows (including MinGW), the Shlwapi library is used by gflags if available.
|
||||
if (WIN32)
|
||||
include(CheckIncludeFileCXX)
|
||||
check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI)
|
||||
if (HAVE_SHLWAPI)
|
||||
set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
|
||||
endif(HAVE_SHLWAPI)
|
||||
endif (WIN32)
|
||||
@@ -0,0 +1,269 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include "fastdeploy/function/reduce.h"
|
||||
#include "fastdeploy/function/softmax.h"
|
||||
#include "fastdeploy/runtime.h"
|
||||
#include "fastdeploy/utils/path.h"
|
||||
#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h"
|
||||
#include "gflags/gflags.h"
|
||||
|
||||
using namespace paddlenlp;
|
||||
using namespace faster_tokenizer::tokenizers_impl;
|
||||
#ifdef WIN32
|
||||
const char sep = '\\';
|
||||
#else
|
||||
const char sep = '/';
|
||||
#endif
|
||||
|
||||
DEFINE_string(model_dir, "", "Directory of the inference model.");
|
||||
DEFINE_string(vocab_path, "", "Path of the vocab file.");
|
||||
DEFINE_string(device, "cpu",
|
||||
"Type of inference device, support 'cpu' or 'gpu'.");
|
||||
DEFINE_string(backend, "onnx_runtime",
|
||||
"The inference runtime backend, support: ['onnx_runtime', "
|
||||
"'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt']");
|
||||
DEFINE_int32(batch_size, 1, "The batch size of data.");
|
||||
DEFINE_int32(max_length, 128, "The batch size of data.");
|
||||
DEFINE_bool(use_fp16, false, "Wheter to use FP16 mode.");
|
||||
|
||||
void PrintUsage() {
|
||||
fastdeploy::FDINFO
|
||||
<< "Usage: seq_cls_infer_demo --model_dir dir --device [cpu|gpu] "
|
||||
"--backend "
|
||||
"[onnx_runtime|paddle|openvino|tensorrt|paddle_tensorrt] "
|
||||
"--batch_size size --max_length len --use_fp16 false"
|
||||
<< std::endl;
|
||||
fastdeploy::FDINFO << "Default value of device: cpu" << std::endl;
|
||||
fastdeploy::FDINFO << "Default value of backend: onnx_runtime" << std::endl;
|
||||
fastdeploy::FDINFO << "Default value of batch_size: 1" << std::endl;
|
||||
fastdeploy::FDINFO << "Default value of max_length: 128" << std::endl;
|
||||
fastdeploy::FDINFO << "Default value of use_fp16: false" << std::endl;
|
||||
}
|
||||
|
||||
bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
|
||||
if (FLAGS_device == "gpu") {
|
||||
option->UseGpu();
|
||||
} else if (FLAGS_device == "cpu") {
|
||||
option->UseCpu();
|
||||
} else {
|
||||
fastdeploy::FDERROR << "The avilable device should be one of the list "
|
||||
"['cpu', 'gpu']. But receive '"
|
||||
<< FLAGS_device << "'" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (FLAGS_backend == "onnx_runtime") {
|
||||
option->UseOrtBackend();
|
||||
} else if (FLAGS_backend == "paddle") {
|
||||
option->UsePaddleBackend();
|
||||
} else if (FLAGS_backend == "openvino") {
|
||||
option->UseOpenVINOBackend();
|
||||
} else if (FLAGS_backend == "tensorrt" ||
|
||||
FLAGS_backend == "paddle_tensorrt") {
|
||||
option->UseTrtBackend();
|
||||
if (FLAGS_backend == "paddle_tensorrt") {
|
||||
option->EnablePaddleToTrt();
|
||||
option->EnablePaddleTrtCollectShape();
|
||||
}
|
||||
std::string trt_file = FLAGS_model_dir + sep + "infer.trt";
|
||||
option->SetTrtInputShape("input_ids", {1, FLAGS_max_length},
|
||||
{FLAGS_batch_size, FLAGS_max_length},
|
||||
{FLAGS_batch_size, FLAGS_max_length});
|
||||
option->SetTrtInputShape("token_type_ids", {1, FLAGS_max_length},
|
||||
{FLAGS_batch_size, FLAGS_max_length},
|
||||
{FLAGS_batch_size, FLAGS_max_length});
|
||||
if (FLAGS_use_fp16) {
|
||||
option->EnableTrtFP16();
|
||||
trt_file = trt_file + ".fp16";
|
||||
}
|
||||
} else {
|
||||
fastdeploy::FDERROR << "The avilable backend should be one of the list "
|
||||
"['paddle', 'openvino', 'tensorrt', "
|
||||
"'paddle_tensorrt']. But receive '"
|
||||
<< FLAGS_backend << "'" << std::endl;
|
||||
return false;
|
||||
}
|
||||
std::string model_path = FLAGS_model_dir + sep + "infer.pdmodel";
|
||||
std::string param_path = FLAGS_model_dir + sep + "infer.pdiparams";
|
||||
fastdeploy::FDINFO << "model_path = " << model_path
|
||||
<< ", param_path = " << param_path << std::endl;
|
||||
option->SetModelPath(model_path, param_path);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BatchFyTexts(const std::vector<std::string>& texts, int batch_size,
|
||||
std::vector<std::vector<std::string>>* batch_texts) {
|
||||
for (int idx = 0; idx < texts.size(); idx += batch_size) {
|
||||
int rest = texts.size() - idx;
|
||||
int curr_size = std::min(batch_size, rest);
|
||||
std::vector<std::string> batch_text(curr_size);
|
||||
std::copy_n(texts.begin() + idx, curr_size, batch_text.begin());
|
||||
batch_texts->emplace_back(std::move(batch_text));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
struct SeqClsResult {
|
||||
int label;
|
||||
float confidence;
|
||||
};
|
||||
|
||||
struct ErnieForSequenceClassificationPredictor {
|
||||
fastdeploy::Runtime runtime_;
|
||||
ErnieFasterTokenizer tokenizer_;
|
||||
ErnieForSequenceClassificationPredictor(
|
||||
const fastdeploy::RuntimeOption& option,
|
||||
const ErnieFasterTokenizer& tokenizer)
|
||||
: tokenizer_(tokenizer) {
|
||||
runtime_.Init(option);
|
||||
}
|
||||
|
||||
bool Preprocess(const std::vector<std::string>& texts,
|
||||
const std::vector<std::string>& texts_pair,
|
||||
std::vector<fastdeploy::FDTensor>* inputs) {
|
||||
std::vector<faster_tokenizer::core::Encoding> encodings;
|
||||
std::vector<faster_tokenizer::core::EncodeInput> text_pair_input;
|
||||
// 1. Tokenize the text or (text, text_pair)
|
||||
if (texts_pair.empty()) {
|
||||
for (int i = 0; i < texts.size(); ++i) {
|
||||
text_pair_input.emplace_back(texts[i]);
|
||||
}
|
||||
} else {
|
||||
if (texts.size() != texts_pair.size()) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < texts.size(); ++i) {
|
||||
text_pair_input.emplace_back(
|
||||
std::pair<std::string, std::string>(texts[i], texts_pair[i]));
|
||||
}
|
||||
}
|
||||
tokenizer_.EncodeBatchStrings(text_pair_input, &encodings);
|
||||
// 2. Construct the input vector tensor
|
||||
// 2.1 Allocate input tensor
|
||||
int64_t batch_size = texts.size();
|
||||
int64_t seq_len = 0;
|
||||
if (batch_size > 0) {
|
||||
seq_len = encodings[0].GetIds().size();
|
||||
}
|
||||
inputs->resize(runtime_.NumInputs());
|
||||
for (int i = 0; i < runtime_.NumInputs(); ++i) {
|
||||
(*inputs)[i].Allocate({batch_size, seq_len},
|
||||
fastdeploy::FDDataType::INT64,
|
||||
runtime_.GetInputInfo(i).name);
|
||||
}
|
||||
// 2.2 Set the value of data
|
||||
size_t start = 0;
|
||||
int64_t* input_ids_ptr =
|
||||
reinterpret_cast<int64_t*>((*inputs)[0].MutableData());
|
||||
int64_t* type_ids_ptr =
|
||||
reinterpret_cast<int64_t*>((*inputs)[1].MutableData());
|
||||
for (int i = 0; i < encodings.size(); ++i) {
|
||||
auto&& curr_input_ids = encodings[i].GetIds();
|
||||
auto&& curr_type_ids = encodings[i].GetTypeIds();
|
||||
std::copy(curr_input_ids.begin(), curr_input_ids.end(),
|
||||
input_ids_ptr + start);
|
||||
std::copy(curr_type_ids.begin(), curr_type_ids.end(),
|
||||
type_ids_ptr + start);
|
||||
start += seq_len;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Postprocess(const std::vector<fastdeploy::FDTensor>& outputs,
|
||||
std::vector<SeqClsResult>* seq_cls_results) {
|
||||
const auto& logits = outputs[0];
|
||||
fastdeploy::FDTensor probs;
|
||||
fastdeploy::Softmax(logits, &probs);
|
||||
|
||||
fastdeploy::FDTensor labels, confidences;
|
||||
fastdeploy::Max(probs, &confidences, {-1});
|
||||
fastdeploy::ArgMax(probs, &labels, -1);
|
||||
if (labels.Numel() != confidences.Numel()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
seq_cls_results->resize(labels.Numel());
|
||||
int64_t* label_ptr = reinterpret_cast<int64_t*>(labels.Data());
|
||||
float* confidence_ptr = reinterpret_cast<float*>(confidences.Data());
|
||||
for (int i = 0; i < labels.Numel(); ++i) {
|
||||
(*seq_cls_results)[i].label = label_ptr[i];
|
||||
(*seq_cls_results)[i].confidence = confidence_ptr[i];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Predict(const std::vector<std::string>& texts,
|
||||
const std::vector<std::string>& texts_pair,
|
||||
std::vector<SeqClsResult>* seq_cls_results) {
|
||||
std::vector<fastdeploy::FDTensor> inputs;
|
||||
if (!Preprocess(texts, texts_pair, &inputs)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<fastdeploy::FDTensor> outputs(runtime_.NumOutputs());
|
||||
runtime_.Infer(inputs, &outputs);
|
||||
|
||||
if (!Postprocess(outputs, seq_cls_results)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
if (!CreateRuntimeOption(&option)) {
|
||||
PrintUsage();
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string vocab_path = FLAGS_vocab_path;
|
||||
if (!fastdeploy::CheckFileExists(vocab_path)) {
|
||||
vocab_path = fastdeploy::PathJoin(FLAGS_model_dir, "vocab.txt");
|
||||
if (!fastdeploy::CheckFileExists(vocab_path)) {
|
||||
fastdeploy::FDERROR << "The path of vocab " << vocab_path
|
||||
<< " doesn't exist" << std::endl;
|
||||
PrintUsage();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
ErnieFasterTokenizer tokenizer(vocab_path);
|
||||
|
||||
ErnieForSequenceClassificationPredictor predictor(option, tokenizer);
|
||||
|
||||
std::vector<SeqClsResult> seq_cls_results;
|
||||
std::vector<std::string> texts_ds = {"花呗收款额度限制",
|
||||
"花呗支持高铁票支付吗"};
|
||||
std::vector<std::string> texts_pair_ds = {"收钱码,对花呗支付的金额有限制吗",
|
||||
"为什么友付宝不支持花呗付款"};
|
||||
std::vector<std::vector<std::string>> batch_texts, batch_texts_pair;
|
||||
BatchFyTexts(texts_ds, FLAGS_batch_size, &batch_texts);
|
||||
BatchFyTexts(texts_pair_ds, FLAGS_batch_size, &batch_texts_pair);
|
||||
for (int bs = 0; bs < batch_texts.size(); ++bs) {
|
||||
predictor.Predict(batch_texts[bs], batch_texts_pair[bs], &seq_cls_results);
|
||||
for (int i = 0; i < batch_texts[bs].size(); ++i) {
|
||||
std::cout << "Batch id: " << bs << ", example id: " << i
|
||||
<< ", sentence 1: " << batch_texts[bs][i]
|
||||
<< ", sentence 2: " << batch_texts_pair[bs][i]
|
||||
<< ", label: " << seq_cls_results[i].label
|
||||
<< ", confidence: " << seq_cls_results[i].confidence
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
# ERNIE 3.0 模型Python部署示例
|
||||
|
||||
在部署前,需确认以下两个步骤
|
||||
|
||||
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
|
||||
- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
|
||||
|
||||
本目录下提供`seq_cls_infer.py`快速完成在CPU/GPU的文本分类任务的部署示例。
|
||||
|
||||
## 依赖安装
|
||||
|
||||
本项目提供的Python版本的预测器Predictor基于PaddleNLP提供的AutoTokenizer进行分词,并利用fast_tokenizer加速分词, 执行以下命令进行安装。
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
|
||||
## 文本分类任务
|
||||
|
||||
### 快速开始
|
||||
|
||||
以下示例展示如何基于FastDeploy库完成ERNIE 3.0 Medium模型在CLUE Benchmark 的[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上进行文本分类任务的Python预测部署。
|
||||
|
||||
```bash
|
||||
|
||||
# 下载部署示例代码
|
||||
git clone https://github.com/PaddlePaddle/FastDeploy.git
|
||||
cd FastDeploy/examples/text/ernie-3.0/python
|
||||
|
||||
# 下载AFQMC数据集的微调后的ERNIE 3.0模型
|
||||
wget https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz
|
||||
tar xvfz ernie-3.0-medium-zh-afqmc.tgz
|
||||
|
||||
# CPU 推理
|
||||
python seq_cls_infer.py --device cpu --model_dir ernie-3.0-medium-zh-afqmc
|
||||
|
||||
# GPU 推理
|
||||
python seq_cls_infer.py --device gpu --model_dir ernie-3.0-medium-zh-afqmc
|
||||
|
||||
```
|
||||
|
||||
运行完成后返回的结果如下:
|
||||
|
||||
```bash
|
||||
[INFO] fastdeploy/runtime.cc(469)::Init Runtime initialized with Backend::ORT in Device::CPU.
|
||||
Batch id:0, example id:0, sentence1:花呗收款额度限制, sentence2:收钱码,对花呗支付的金额有限制吗, label:1, similarity:0.5819
|
||||
Batch id:1, example id:0, sentence1:花呗支持高铁票支付吗, sentence2:为什么友付宝不支持花呗付款, label:0, similarity:0.9979
|
||||
```
|
||||
|
||||
### 参数说明
|
||||
|
||||
`seq_cls_infer.py` 除了以上示例的命令行参数,还支持更多命令行参数的设置。以下为各命令行参数的说明。
|
||||
|
||||
| 参数 |参数说明 |
|
||||
|----------|--------------|
|
||||
|--model_dir | 指定部署模型的目录, |
|
||||
|--batch_size |最大可测的 batch size,默认为 1|
|
||||
|--max_length |最大序列长度,默认为 128|
|
||||
|--device | 运行的设备,可选范围: ['cpu', 'gpu'],默认为'cpu' |
|
||||
|--backend | 支持的推理后端,可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'],默认为'onnx_runtime' |
|
||||
|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启,默认为False |
|
||||
|--use_fast| 是否使用FastTokenizer加速分词阶段。默认为True|
|
||||
|
||||
## 相关文档
|
||||
|
||||
[ERNIE 3.0模型详细介绍](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
|
||||
|
||||
[ERNIE 3.0模型导出方法](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
|
||||
|
||||
[ERNIE 3.0模型C++部署方法](../cpp/README.md)
|
||||
@@ -0,0 +1,2 @@
|
||||
faster_toeknizer
|
||||
paddlenlp
|
||||
@@ -0,0 +1,182 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import distutils.util
|
||||
|
||||
import numpy as np
|
||||
import faster_tokenizer
|
||||
from paddlenlp.transformers import AutoTokenizer
|
||||
import fastdeploy as fd
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
import argparse
|
||||
import ast
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--model_dir", required=True, help="The directory of model.")
|
||||
parser.add_argument(
|
||||
"--vocab_path",
|
||||
type=str,
|
||||
default="",
|
||||
help="The path of tokenizer vocab.")
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
default='cpu',
|
||||
choices=['gpu', 'cpu'],
|
||||
help="Type of inference device, support 'cpu' or 'gpu'.")
|
||||
parser.add_argument(
|
||||
"--backend",
|
||||
type=str,
|
||||
default='onnx_runtime',
|
||||
choices=[
|
||||
'onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'
|
||||
],
|
||||
help="The inference runtime backend.")
|
||||
parser.add_argument(
|
||||
"--batch_size", type=int, default=1, help="The batch size of data.")
|
||||
parser.add_argument(
|
||||
"--max_length",
|
||||
type=int,
|
||||
default=128,
|
||||
help="The max length of sequence.")
|
||||
parser.add_argument(
|
||||
"--log_interval",
|
||||
type=int,
|
||||
default=10,
|
||||
help="The interval of logging.")
|
||||
parser.add_argument(
|
||||
"--use_fp16",
|
||||
type=distutils.util.strtobool,
|
||||
default=False,
|
||||
help="Wheter to use FP16 mode")
|
||||
parser.add_argument(
|
||||
"--use_fast",
|
||||
type=distutils.util.strtobool,
|
||||
default=False,
|
||||
help="Whether to use fast_tokenizer to accelarate the tokenization.")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def batchfy_text(texts, batch_size):
|
||||
batch_texts = []
|
||||
batch_start = 0
|
||||
while batch_start < len(texts):
|
||||
batch_texts += [
|
||||
texts[batch_start:min(batch_start + batch_size, len(texts))]
|
||||
]
|
||||
batch_start += batch_size
|
||||
return batch_texts
|
||||
|
||||
|
||||
class ErnieForSequenceClassificationPredictor(object):
|
||||
def __init__(self, args):
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(
|
||||
'ernie-3.0-medium-zh', use_faster=args.use_fast)
|
||||
self.runtime = self.create_fd_runtime(args)
|
||||
self.batch_size = args.batch_size
|
||||
self.max_length = args.max_length
|
||||
|
||||
def create_fd_runtime(self, args):
|
||||
option = fd.RuntimeOption()
|
||||
model_path = os.path.join(args.model_dir, "infer.pdmodel")
|
||||
params_path = os.path.join(args.model_dir, "infer.pdiparams")
|
||||
option.set_model_path(model_path, params_path)
|
||||
if args.device == 'cpu':
|
||||
option.use_cpu()
|
||||
else:
|
||||
option.use_gpu()
|
||||
if args.backend == 'paddle':
|
||||
option.use_paddle_backend()
|
||||
elif args.backend == 'onnx_runtime':
|
||||
option.use_ort_backend()
|
||||
elif args.backend == 'openvino':
|
||||
option.use_openvino_backend()
|
||||
else:
|
||||
option.use_trt_backend()
|
||||
if args.backend == 'paddle_tensorrt':
|
||||
option.enable_paddle_to_trt()
|
||||
option.enable_paddle_trt_collect_shape()
|
||||
trt_file = os.path.join(args.model_dir, "infer.trt")
|
||||
option.set_trt_input_shape(
|
||||
'input_ids',
|
||||
min_shape=[1, args.max_length],
|
||||
opt_shape=[args.batch_size, args.max_length],
|
||||
max_shape=[args.batch_size, args.max_length])
|
||||
option.set_trt_input_shape(
|
||||
'token_type_ids',
|
||||
min_shape=[1, args.max_length],
|
||||
opt_shape=[args.batch_size, args.max_length],
|
||||
max_shape=[args.batch_size, args.max_length])
|
||||
if args.use_fp16:
|
||||
option.enable_trt_fp16()
|
||||
trt_file = trt_file + ".fp16"
|
||||
option.set_trt_cache_file(trt_file)
|
||||
return fd.Runtime(option)
|
||||
|
||||
def preprocess(self, texts, texts_pair):
|
||||
data = self.tokenizer(
|
||||
texts,
|
||||
texts_pair,
|
||||
max_length=self.max_length,
|
||||
padding=True,
|
||||
truncation=True)
|
||||
input_ids_name = self.runtime.get_input_info(0).name
|
||||
token_type_ids_name = self.runtime.get_input_info(1).name
|
||||
input_map = {
|
||||
input_ids_name: np.array(
|
||||
data["input_ids"], dtype="int64"),
|
||||
token_type_ids_name: np.array(
|
||||
data["token_type_ids"], dtype="int64")
|
||||
}
|
||||
return input_map
|
||||
|
||||
def infer(self, input_map):
|
||||
results = self.runtime.infer(input_map)
|
||||
return results
|
||||
|
||||
def postprocess(self, infer_data):
|
||||
logits = np.array(infer_data[0])
|
||||
max_value = np.max(logits, axis=1, keepdims=True)
|
||||
exp_data = np.exp(logits - max_value)
|
||||
probs = exp_data / np.sum(exp_data, axis=1, keepdims=True)
|
||||
out_dict = {
|
||||
"label": probs.argmax(axis=-1),
|
||||
"confidence": probs.max(axis=-1)
|
||||
}
|
||||
return out_dict
|
||||
|
||||
def predict(self, texts, texts_pair=None):
|
||||
input_map = self.preprocess(texts, texts_pair)
|
||||
infer_result = self.infer(input_map)
|
||||
output = self.postprocess(infer_result)
|
||||
return output
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_arguments()
|
||||
predictor = ErnieForSequenceClassificationPredictor(args)
|
||||
texts_ds = ["花呗收款额度限制", "花呗支持高铁票支付吗"]
|
||||
texts_pair_ds = ["收钱码,对花呗支付的金额有限制吗", "为什么友付宝不支持花呗付款"]
|
||||
batch_texts = batchfy_text(texts_ds, args.batch_size)
|
||||
batch_texts_pair = batchfy_text(texts_pair_ds, args.batch_size)
|
||||
|
||||
for bs, (texts,
|
||||
texts_pair) in enumerate(zip(batch_texts, batch_texts_pair)):
|
||||
outputs = predictor.predict(texts, texts_pair)
|
||||
for i, (sentence1, sentence2) in enumerate(zip(texts, texts_pair)):
|
||||
print(
|
||||
f"Batch id:{bs}, example id:{i}, sentence1:{sentence1}, sentence2:{sentence2}, label:{outputs['label'][i]}, similarity:{outputs['confidence'][i]:.4f}"
|
||||
)
|
||||
@@ -1,4 +1,4 @@
|
||||
# Ernie-3.0 服务化部署示例
|
||||
# ERNIE 3.0 服务化部署示例
|
||||
|
||||
## 准备模型
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
本目录存放Ernie-3.0模型
|
||||
本目录存放ERNIE 3.0模型
|
||||
|
||||
@@ -1 +1 @@
|
||||
本目录存放Ernie-3.0模型
|
||||
本目录存放ERNIE 3.0模型
|
||||
|
||||
Reference in New Issue
Block a user