Add some comments for runtime (#309)

* Add some notes for runtime

* modify note

* modify note

* add some comment

* Add comment for runtime

* Add comment for runtime

* Add comment for runtime

* Add comment for runtime
This commit is contained in:
Jason
2022-09-30 14:43:44 +08:00
committed by GitHub
parent dfb83a889e
commit 8a1e2c6143
5 changed files with 344 additions and 239 deletions
+180 -178
View File
@@ -1,178 +1,180 @@
--- # ---
Language: Cpp # Language: Cpp
# BasedOnStyle: LLVM # # BasedOnStyle: LLVM
AccessModifierOffset: -1 # AccessModifierOffset: -1
AlignAfterOpenBracket: Align # AlignAfterOpenBracket: Align
AlignArrayOfStructures: None # AlignArrayOfStructures: None
AlignConsecutiveMacros: None # AlignConsecutiveMacros: None
AlignConsecutiveAssignments: None # AlignConsecutiveAssignments: None
AlignConsecutiveBitFields: None # AlignConsecutiveBitFields: None
AlignConsecutiveDeclarations: None # AlignConsecutiveDeclarations: None
AlignEscapedNewlines: Right # AlignEscapedNewlines: Right
AlignOperands: Align # AlignOperands: Align
AlignTrailingComments: true # AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true # AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true # AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true # AllowAllParametersOfDeclarationOnNextLine: true
AllowShortEnumsOnASingleLine: true # AllowShortEnumsOnASingleLine: true
AllowShortBlocksOnASingleLine: Never # AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false # AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All # AllowShortFunctionsOnASingleLine: All
AllowShortLambdasOnASingleLine: All # AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never # AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false # AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None # AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None # AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false # AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: MultiLine # AlwaysBreakTemplateDeclarations: MultiLine
AttributeMacros: # AttributeMacros:
- __capability # - __capability
BinPackArguments: true # BinPackArguments: true
BinPackParameters: true # BinPackParameters: true
BraceWrapping: # BraceWrapping:
AfterCaseLabel: false # AfterCaseLabel: false
AfterClass: false # AfterClass: false
AfterControlStatement: Never # AfterControlStatement: Never
AfterEnum: false # AfterEnum: false
AfterFunction: false # AfterFunction: false
AfterNamespace: false # AfterNamespace: false
AfterObjCDeclaration: false # AfterObjCDeclaration: false
AfterStruct: false # AfterStruct: false
AfterUnion: false # AfterUnion: false
AfterExternBlock: false # AfterExternBlock: false
BeforeCatch: false # BeforeCatch: false
BeforeElse: false # BeforeElse: false
BeforeLambdaBody: false # BeforeLambdaBody: false
BeforeWhile: false # BeforeWhile: false
IndentBraces: false # IndentBraces: false
SplitEmptyFunction: true # SplitEmptyFunction: true
SplitEmptyRecord: true # SplitEmptyRecord: true
SplitEmptyNamespace: true # SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None # BreakBeforeBinaryOperators: None
BreakBeforeConceptDeclarations: true # BreakBeforeConceptDeclarations: true
BreakBeforeBraces: Attach # BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false # BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon # BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true # BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false # BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon # BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false # BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true # BreakStringLiterals: true
ColumnLimit: 80 # ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:' # # CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false # # CommentPragmas: '^[^ ]'
ConstructorInitializerAllOnOneLineOrOnePerLine: false # CommentPragmas: '^\\.+'
ConstructorInitializerIndentWidth: 4 # CompactNamespaces: false
ContinuationIndentWidth: 4 # ConstructorInitializerAllOnOneLineOrOnePerLine: false
Cpp11BracedListStyle: true # ConstructorInitializerIndentWidth: 4
DeriveLineEnding: true # ContinuationIndentWidth: 4
DerivePointerAlignment: false # Cpp11BracedListStyle: true
DisableFormat: false # DeriveLineEnding: true
EmptyLineAfterAccessModifier: Never # DerivePointerAlignment: false
EmptyLineBeforeAccessModifier: LogicalBlock # DisableFormat: false
ExperimentalAutoDetectBinPacking: false # EmptyLineAfterAccessModifier: Never
FixNamespaceComments: true # EmptyLineBeforeAccessModifier: LogicalBlock
ForEachMacros: # ExperimentalAutoDetectBinPacking: false
- foreach # FixNamespaceComments: true
- Q_FOREACH # ForEachMacros:
- BOOST_FOREACH # - foreach
IfMacros: # - Q_FOREACH
- KJ_IF_MAYBE # - BOOST_FOREACH
IncludeBlocks: Preserve # IfMacros:
IncludeCategories: # - KJ_IF_MAYBE
- Regex: '^"(llvm|llvm-c|clang|clang-c)/' # IncludeBlocks: Preserve
Priority: 2 # IncludeCategories:
SortPriority: 0 # - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
CaseSensitive: false # Priority: 2
- Regex: '^(<|"(gtest|gmock|isl|json)/)' # SortPriority: 0
Priority: 3 # CaseSensitive: false
SortPriority: 0 # - Regex: '^(<|"(gtest|gmock|isl|json)/)'
CaseSensitive: false # Priority: 3
- Regex: '.*' # SortPriority: 0
Priority: 1 # CaseSensitive: false
SortPriority: 0 # - Regex: '.*'
CaseSensitive: false # Priority: 1
IncludeIsMainRegex: '(Test)?$' # SortPriority: 0
IncludeIsMainSourceRegex: '' # CaseSensitive: false
IndentAccessModifiers: false # IncludeIsMainRegex: '(Test)?$'
IndentCaseLabels: false # IncludeIsMainSourceRegex: ''
IndentCaseBlocks: false # IndentAccessModifiers: false
IndentGotoLabels: true # IndentCaseLabels: false
IndentPPDirectives: None # IndentCaseBlocks: false
IndentExternBlock: AfterExternBlock # IndentGotoLabels: true
IndentRequires: false # IndentPPDirectives: None
IndentWidth: 2 # IndentExternBlock: AfterExternBlock
IndentWrappedFunctionNames: false # IndentRequires: false
InsertTrailingCommas: None # IndentWidth: 2
JavaScriptQuotes: Leave # IndentWrappedFunctionNames: false
JavaScriptWrapImports: true # InsertTrailingCommas: None
KeepEmptyLinesAtTheStartOfBlocks: true # JavaScriptQuotes: Leave
LambdaBodyIndentation: Signature # JavaScriptWrapImports: true
MacroBlockBegin: '' # KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockEnd: '' # LambdaBodyIndentation: Signature
MaxEmptyLinesToKeep: 1 # MacroBlockBegin: ''
NamespaceIndentation: None # MacroBlockEnd: ''
ObjCBinPackProtocolList: Auto # MaxEmptyLinesToKeep: 1
ObjCBlockIndentWidth: 2 # NamespaceIndentation: None
ObjCBreakBeforeNestedBlockParam: true # ObjCBinPackProtocolList: Auto
ObjCSpaceAfterProperty: false # ObjCBlockIndentWidth: 2
ObjCSpaceBeforeProtocolList: true # ObjCBreakBeforeNestedBlockParam: true
PenaltyBreakAssignment: 2 # ObjCSpaceAfterProperty: false
PenaltyBreakBeforeFirstCallParameter: 19 # ObjCSpaceBeforeProtocolList: true
PenaltyBreakComment: 300 # PenaltyBreakAssignment: 2
PenaltyBreakFirstLessLess: 120 # PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakString: 1000 # PenaltyBreakComment: 300
PenaltyBreakTemplateDeclaration: 10 # PenaltyBreakFirstLessLess: 120
PenaltyExcessCharacter: 1000000 # PenaltyBreakString: 1000
PenaltyReturnTypeOnItsOwnLine: 60 # PenaltyBreakTemplateDeclaration: 10
PenaltyIndentedWhitespace: 0 # PenaltyExcessCharacter: 1000000
PointerAlignment: Left # PenaltyReturnTypeOnItsOwnLine: 60
PPIndentWidth: -1 # PenaltyIndentedWhitespace: 0
ReferenceAlignment: Pointer # PointerAlignment: Left
ReflowComments: true # PPIndentWidth: -1
ShortNamespaceLines: 1 # ReferenceAlignment: Pointer
SortIncludes: CaseSensitive # ReflowComments: false
SortJavaStaticImport: Before # ShortNamespaceLines: 1
SortUsingDeclarations: true # SortIncludes: CaseSensitive
SpaceAfterCStyleCast: false # SortJavaStaticImport: Before
SpaceAfterLogicalNot: false # SortUsingDeclarations: true
SpaceAfterTemplateKeyword: true # SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true # SpaceAfterLogicalNot: false
SpaceBeforeCaseColon: false # SpaceAfterTemplateKeyword: true
SpaceBeforeCpp11BracedList: false # SpaceBeforeAssignmentOperators: true
SpaceBeforeCtorInitializerColon: true # SpaceBeforeCaseColon: false
SpaceBeforeInheritanceColon: true # SpaceBeforeCpp11BracedList: false
SpaceBeforeParens: ControlStatements # SpaceBeforeCtorInitializerColon: true
SpaceAroundPointerQualifiers: Default # SpaceBeforeInheritanceColon: true
SpaceBeforeRangeBasedForLoopColon: true # SpaceBeforeParens: ControlStatements
SpaceInEmptyBlock: false # SpaceAroundPointerQualifiers: Default
SpaceInEmptyParentheses: false # SpaceBeforeRangeBasedForLoopColon: true
SpacesBeforeTrailingComments: 1 # SpaceInEmptyBlock: false
SpacesInAngles: Never # SpaceInEmptyParentheses: false
SpacesInConditionalStatement: false # SpacesBeforeTrailingComments: 1
SpacesInContainerLiterals: true # SpacesInAngles: Never
SpacesInCStyleCastParentheses: false # SpacesInConditionalStatement: false
SpacesInLineCommentPrefix: # SpacesInContainerLiterals: true
Minimum: 1 # SpacesInCStyleCastParentheses: false
Maximum: -1 # SpacesInLineCommentPrefix:
SpacesInParentheses: false # Minimum: 1
SpacesInSquareBrackets: false # Maximum: -1
SpaceBeforeSquareBrackets: false # SpacesInParentheses: false
BitFieldColonSpacing: Both # SpacesInSquareBrackets: false
Standard: Latest # SpaceBeforeSquareBrackets: false
StatementAttributeLikeMacros: # BitFieldColonSpacing: Both
- Q_EMIT # Standard: Latest
StatementMacros: # StatementAttributeLikeMacros:
- Q_UNUSED # - Q_EMIT
- QT_REQUIRE_VERSION # StatementMacros:
TabWidth: 8 # - Q_UNUSED
UseCRLF: false # - QT_REQUIRE_VERSION
UseTab: Never # TabWidth: 8
WhitespaceSensitiveMacros: # UseCRLF: false
- STRINGIZE # UseTab: Never
- PP_STRINGIZE # WhitespaceSensitiveMacros:
- BOOST_PP_STRINGIZE # - STRINGIZE
- NS_SWIFT_NAME # - PP_STRINGIZE
- CF_SWIFT_NAME # - BOOST_PP_STRINGIZE
... # - NS_SWIFT_NAME
# - CF_SWIFT_NAME
# ...
#
+9 -9
View File
@@ -24,15 +24,15 @@ repos:
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude: (?!.*third_party)^.*$ exclude: (?!.*third_party)^.*$
- repo: local # - repo: local
hooks: # hooks:
- id: clang-format-with-version-check # - id: clang-format-with-version-check
name: clang-format # name: clang-format
description: Format files with ClangFormat. # description: Format files with ClangFormat.
entry: bash .clang_format.hook -i # entry: bash .clang_format.hook -i
language: system # language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ # files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
#
- repo: local - repo: local
hooks: hooks:
- id: cpplint-cpp-source - id: cpplint-cpp-source
+5 -3
View File
@@ -24,10 +24,12 @@
namespace fastdeploy { namespace fastdeploy {
/*! @brief Information of Tensor
*/
struct TensorInfo { struct TensorInfo {
std::string name; std::string name; ///< Name of tensor
std::vector<int> shape; std::vector<int> shape; ///< Shape of tensor
FDDataType dtype; FDDataType dtype; ///< Data type of tensor
friend std::ostream& operator<<(std::ostream& output, friend std::ostream& operator<<(std::ostream& output,
const TensorInfo& info) { const TensorInfo& info) {
+30 -4
View File
@@ -93,6 +93,32 @@ std::string Str(const ModelFormat& f) {
return "UNKNOWN-ModelFormat"; return "UNKNOWN-ModelFormat";
} }
std::ostream& operator<<(std::ostream& out, const Backend& backend) {
if (backend == Backend::ORT) {
out << "Backend::ORT";
} else if (backend == Backend::TRT) {
out << "Backend::TRT";
} else if (backend == Backend::PDINFER) {
out << "Backend::PDINFER";
} else if (backend == Backend::OPENVINO) {
out << "Backend::OPENVINO";
} else if (backend == Backend::LITE) {
out << "Backend::LITE";
}
out << "UNKNOWN-Backend";
return out;
}
std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
if (format == ModelFormat::PADDLE) {
out << "ModelFormat::PADDLE";
} else if (format == ModelFormat::ONNX) {
out << "ModelFormat::ONNX";
}
out << "UNKNOWN-ModelFormat";
return out;
}
bool CheckModelFormat(const std::string& model_file, bool CheckModelFormat(const std::string& model_file,
const ModelFormat& model_format) { const ModelFormat& model_format) {
if (model_format == ModelFormat::PADDLE) { if (model_format == ModelFormat::PADDLE) {
@@ -255,6 +281,10 @@ void RuntimeOption::SetTrtInputShape(const std::string& input_name,
} }
} }
void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) {
trt_max_workspace_size = max_workspace_size;
}
void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; } void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; }
void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; } void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; }
@@ -263,10 +293,6 @@ void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
trt_serialize_file = cache_file_path; trt_serialize_file = cache_file_path;
} }
void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) {
trt_max_workspace_size = max_workspace_size;
}
bool Runtime::Init(const RuntimeOption& _option) { bool Runtime::Init(const RuntimeOption& _option) {
option = _option; option = _option;
if (option.model_format == ModelFormat::AUTOREC) { if (option.model_format == ModelFormat::AUTOREC) {
+120 -45
View File
@@ -11,6 +11,13 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
/*! \file runtime.h
\brief A brief file description.
More details
*/
#pragma once #pragma once
#include <map> #include <map>
@@ -19,95 +26,149 @@
#include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/backend.h"
#include "fastdeploy/utils/perf.h" #include "fastdeploy/utils/perf.h"
/** \brief All C++ FastDeploy APIs are defined inside this namespace
*
*/
namespace fastdeploy { namespace fastdeploy {
enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER, OPENVINO, LITE }; /*! Inference backend supported in FastDeploy */
// AUTOREC will according to the name of model file enum Backend {
// to decide which ModelFormat is UNKNOWN, ///< Unknown inference backend
enum FASTDEPLOY_DECL ModelFormat { AUTOREC, PADDLE, ONNX }; ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only
};
/*! Deep learning model format */
enum ModelFormat {
AUTOREC, ///< Auto recognize the model format by model file name
PADDLE, ///< Model with paddlepaddle format
ONNX, ///< Model with ONNX format
};
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
const Backend& backend);
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
const ModelFormat& format);
FASTDEPLOY_DECL std::string Str(const Backend& b); FASTDEPLOY_DECL std::string Str(const Backend& b);
FASTDEPLOY_DECL std::string Str(const ModelFormat& f); FASTDEPLOY_DECL std::string Str(const ModelFormat& f);
/**
* @brief Get all the available inference backend in FastDeploy
*/
FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends(); FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
/**
* @brief Check if the inference backend available
*/
FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend); FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
bool CheckModelFormat(const std::string& model_file, bool CheckModelFormat(const std::string& model_file,
const ModelFormat& model_format); const ModelFormat& model_format);
ModelFormat GuessModelFormat(const std::string& model_file); ModelFormat GuessModelFormat(const std::string& model_file);
/*! @brief Option object used when create a new Runtime object
*/
struct FASTDEPLOY_DECL RuntimeOption { struct FASTDEPLOY_DECL RuntimeOption {
// set path of model file and params file /** \brief Set path of model file and parameter file
// for onnx, only need to define model_file, but also need to *
// define model_format * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model
// model_format support 'paddle' / 'onnx' now. * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams
* \param[in] format Format of the loaded model
*/
void SetModelPath(const std::string& model_path, void SetModelPath(const std::string& model_path,
const std::string& params_path = "", const std::string& params_path = "",
const ModelFormat& format = ModelFormat::PADDLE); const ModelFormat& format = ModelFormat::PADDLE);
// set model inference in GPU /// Use cpu to inference, the runtime will inference on CPU by default
void UseCpu(); void UseCpu();
// set model inference in CPU /// Use Nvidia GPU to inference
void UseGpu(int gpu_id = 0); void UseGpu(int gpu_id = 0);
// set number of thread while inference in CPU /*
* @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
*/
void SetCpuThreadNum(int thread_num); void SetCpuThreadNum(int thread_num);
// use paddle inference backend /// Set Paddle Inference as inference backend, support CPU/GPU
void UsePaddleBackend(); void UsePaddleBackend();
// use onnxruntime backend /// Set ONNX Runtime as inference backend, support CPU/GPU
void UseOrtBackend(); void UseOrtBackend();
// use tensorrt backend /// Set TensorRT as inference backend, only support GPU
void UseTrtBackend(); void UseTrtBackend();
// use openvino backend /// Set OpenVINO as inference backend, only support CPU
void UseOpenVINOBackend(); void UseOpenVINOBackend();
// use paddle lite backend /// Set Paddle Lite as inference backend, only support arm cpu
void UseLiteBackend(); void UseLiteBackend();
// enable mkldnn while use paddle inference in CPU /// Enable mkldnn while using Paddle Inference as inference backend
void EnablePaddleMKLDNN(); void EnablePaddleMKLDNN();
// disable mkldnn while use paddle inference in CPU
/// Disable mkldnn while using Paddle Inference as inference backend
void DisablePaddleMKLDNN(); void DisablePaddleMKLDNN();
// Enable delete in pass
/*
* Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete mulitple passes
*/
void DeletePaddleBackendPass(const std::string& delete_pass_name); void DeletePaddleBackendPass(const std::string& delete_pass_name);
// enable debug information of paddle backend /**
* @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default
*/
void EnablePaddleLogInfo(); void EnablePaddleLogInfo();
// disable debug information of paddle backend
/**
* @brief Disable print debug information while using Paddle Inference as inference backend
*/
void DisablePaddleLogInfo(); void DisablePaddleLogInfo();
// set size of cached shape while enable mkldnn with paddle inference backend /**
* @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape
*/
void SetPaddleMKLDNNCacheSize(int size); void SetPaddleMKLDNNCacheSize(int size);
// set the power mode of paddle lite backend. /**
* @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details)
*/
void SetLitePowerMode(int mode); void SetLitePowerMode(int mode);
// set tensorrt shape while the inputs of model contain dynamic shape
// min_shape: the minimum shape
// opt_shape: the most common shape while inference, default be empty
// max_shape: the maximum shape, default be empty
// if opt_shape, max_shape are empty, they will keep same with the min_shape /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
// which means the shape will be fixed as min_shape while inference *
* \param[in] input_name The name of input for the model which is dynamic shape
* \param[in] min_shape The minimal shape for the input tensor
* \param[in] opt_shape The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape
* \param[in] max_shape The maximum shape for the input tensor, if set as default value, it will keep same with min_shape
*/
void SetTrtInputShape( void SetTrtInputShape(
const std::string& input_name, const std::vector<int32_t>& min_shape, const std::string& input_name, const std::vector<int32_t>& min_shape,
const std::vector<int32_t>& opt_shape = std::vector<int32_t>(), const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
const std::vector<int32_t>& max_shape = std::vector<int32_t>()); const std::vector<int32_t>& max_shape = std::vector<int32_t>());
// enable half precision while use tensorrt backend /// Set max_workspace_size for TensorRT, default 1<<30
void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
/**
* @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly
*/
void EnableTrtFP16(); void EnableTrtFP16();
// disable half precision, change to full precision(float32)
/// Disable FP16 inference while using TensorRT backend
void DisableTrtFP16(); void DisableTrtFP16();
/**
* @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
*/
void SetTrtCacheFile(const std::string& cache_file_path); void SetTrtCacheFile(const std::string& cache_file_path);
void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
Backend backend = Backend::UNKNOWN; Backend backend = Backend::UNKNOWN;
// for cpu inference and preprocess // for cpu inference and preprocess
// default will let the backend choose their own default value // default will let the backend choose their own default value
@@ -158,35 +219,49 @@ struct FASTDEPLOY_DECL RuntimeOption {
std::map<std::string, std::string> custom_op_info_; std::map<std::string, std::string> custom_op_info_;
}; };
/*! @brief Runtime object used to inference the loaded model on different devices
*/
struct FASTDEPLOY_DECL Runtime { struct FASTDEPLOY_DECL Runtime {
public: public:
// explicit Runtime(const RuntimeOption& _option = RuntimeOption()); /// Intialize a Runtime object with RuntimeOption
bool Init(const RuntimeOption& _option); bool Init(const RuntimeOption& _option);
/** \brief Inference the model by the input data, and write to the output
*
* \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input
* \param[in] output_tensors Inference results
* \return true if the inference successed, otherwise false
*/
bool Infer(std::vector<FDTensor>& input_tensors, bool Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors); std::vector<FDTensor>* output_tensors);
void CreateOrtBackend(); /** \brief Get number of inputs
*/
void CreatePaddleBackend();
void CreateTrtBackend();
void CreateOpenVINOBackend();
void CreateLiteBackend();
int NumInputs() { return backend_->NumInputs(); } int NumInputs() { return backend_->NumInputs(); }
/** \brief Get number of outputs
*/
int NumOutputs() { return backend_->NumOutputs(); } int NumOutputs() { return backend_->NumOutputs(); }
/** \brief Get input information by index
*/
TensorInfo GetInputInfo(int index); TensorInfo GetInputInfo(int index);
/** \brief Get output information by index
*/
TensorInfo GetOutputInfo(int index); TensorInfo GetOutputInfo(int index);
/** \brief Get all the input information
*/
std::vector<TensorInfo> GetInputInfos(); std::vector<TensorInfo> GetInputInfos();
/** \brief Get all the output information
*/
std::vector<TensorInfo> GetOutputInfos(); std::vector<TensorInfo> GetOutputInfos();
RuntimeOption option; RuntimeOption option;
private: private:
void CreateOrtBackend();
void CreatePaddleBackend();
void CreateTrtBackend();
void CreateOpenVINOBackend();
void CreateLiteBackend();
std::unique_ptr<BaseBackend> backend_; std::unique_ptr<BaseBackend> backend_;
}; };
} // namespace fastdeploy } // namespace fastdeploy