mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
ddb06ff83f
Co-authored-by: gongweibao <gognweibao@baidu.com>
195 lines
6.9 KiB
C++
195 lines
6.9 KiB
C++
/*
|
|
* SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION &
|
|
* AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
/**
|
|
* @file epilogue_helpers.h
|
|
*
|
|
* This file includes types for the epilogues. The empty structs exist so we can
|
|
* signal to template code the type of epilogue we want to run, and let the
|
|
* underlying code specify the details such as element types, accumulator type
|
|
* and elements per vector access.
|
|
*
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "cutlass/epilogue/thread/linear_combination.h"
|
|
#include "cutlass/epilogue/thread/linear_combination_generic.h"
|
|
#include "cutlass/epilogue/thread/linear_combination_relu.h"
|
|
#include "cutlass/epilogue/thread/linear_combination_silu.h"
|
|
#include "cutlass_extensions/epilogue/thread/fused_activations.h"
|
|
|
|
// #include "cutlass/epilogue/fusion/operations.hpp"
|
|
|
|
namespace cutlass_extensions {
|
|
|
|
struct EpilogueOpBiasSilu {};
|
|
|
|
struct EpilogueOpBiasReLU {};
|
|
|
|
struct EpilogueOpBiasFtGelu {};
|
|
|
|
struct EpilogueOpBias {};
|
|
|
|
struct EpilogueOpDefaultSilu {};
|
|
|
|
struct EpilogueOpDefaultReLU {};
|
|
|
|
struct EpilogueOpDefaultFtGelu {};
|
|
|
|
struct EpilogueOpDefault {};
|
|
|
|
template <typename ElementType,
|
|
int ElementsPerVectorAccess,
|
|
typename ElementAccumulator,
|
|
typename Op>
|
|
struct Epilogue {
|
|
static_assert(sizeof(ElementType) == 0, "Unrecognized Epilogue Tag");
|
|
};
|
|
|
|
constexpr auto BiasScaleMode =
|
|
cutlass::epilogue::thread::ScaleType::NoBetaScaling;
|
|
|
|
template <typename ElementType,
|
|
int ElementsPerVectorAccess,
|
|
typename ElementAccumulator>
|
|
struct Epilogue<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
EpilogueOpBiasSilu> {
|
|
using Op =
|
|
cutlass::epilogue::thread::LinearCombinationSilu<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
ElementAccumulator,
|
|
BiasScaleMode>;
|
|
};
|
|
|
|
template <typename ElementType,
|
|
int ElementsPerVectorAccess,
|
|
typename ElementAccumulator>
|
|
struct Epilogue<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
EpilogueOpBiasReLU> {
|
|
using Op =
|
|
cutlass::epilogue::thread::LinearCombinationRelu<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
ElementAccumulator,
|
|
BiasScaleMode>;
|
|
};
|
|
|
|
template <typename ElementType,
|
|
int ElementsPerVectorAccess,
|
|
typename ElementAccumulator>
|
|
struct Epilogue<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
EpilogueOpBiasFtGelu> {
|
|
using Op = cutlass::epilogue::thread::LinearCombinationGeneric<
|
|
cutlass::epilogue::thread::GELU_taylor,
|
|
ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
ElementAccumulator,
|
|
BiasScaleMode,
|
|
cutlass::FloatRoundStyle::round_to_nearest,
|
|
true>;
|
|
};
|
|
|
|
template <typename ElementType,
|
|
int ElementsPerVectorAccess,
|
|
typename ElementAccumulator>
|
|
struct Epilogue<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
EpilogueOpBias> {
|
|
using Op =
|
|
cutlass::epilogue::thread::LinearCombination<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
ElementAccumulator,
|
|
BiasScaleMode>;
|
|
};
|
|
|
|
constexpr auto DefaultScaleMode = cutlass::epilogue::thread::ScaleType::Default;
|
|
|
|
template <typename ElementType,
|
|
int ElementsPerVectorAccess,
|
|
typename ElementAccumulator>
|
|
struct Epilogue<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
EpilogueOpDefaultSilu> {
|
|
using Op =
|
|
cutlass::epilogue::thread::LinearCombinationSilu<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
ElementAccumulator,
|
|
DefaultScaleMode>;
|
|
};
|
|
|
|
template <typename ElementType,
|
|
int ElementsPerVectorAccess,
|
|
typename ElementAccumulator>
|
|
struct Epilogue<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
EpilogueOpDefaultReLU> {
|
|
using Op =
|
|
cutlass::epilogue::thread::LinearCombinationRelu<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
ElementAccumulator,
|
|
DefaultScaleMode>;
|
|
};
|
|
|
|
template <typename ElementType,
|
|
int ElementsPerVectorAccess,
|
|
typename ElementAccumulator>
|
|
struct Epilogue<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
EpilogueOpDefaultFtGelu> {
|
|
using Op = cutlass::epilogue::thread::LinearCombinationGeneric<
|
|
cutlass::epilogue::thread::GELU_taylor,
|
|
ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
ElementAccumulator,
|
|
DefaultScaleMode,
|
|
cutlass::FloatRoundStyle::round_to_nearest,
|
|
true>;
|
|
};
|
|
|
|
template <typename ElementType,
|
|
int ElementsPerVectorAccess,
|
|
typename ElementAccumulator>
|
|
struct Epilogue<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
EpilogueOpDefault> {
|
|
using Op =
|
|
cutlass::epilogue::thread::LinearCombination<ElementType,
|
|
ElementsPerVectorAccess,
|
|
ElementAccumulator,
|
|
ElementAccumulator,
|
|
DefaultScaleMode>;
|
|
};
|
|
|
|
} // namespace cutlass_extensions
|