mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
support moe for sm103 (#7238)
This commit is contained in:
@@ -635,7 +635,7 @@ struct MoeFCGemm {
|
||||
static constexpr bool compile_needed =
|
||||
platform::is_same<KernelArch, arch::Sm75>::value;
|
||||
KernelRunner<compile_needed>::run_kernel(params, shared_storage);
|
||||
#elif defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800) && (__CUDA_ARCH__ < 1010)
|
||||
#elif defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800) && (__CUDA_ARCH__ < 1100)
|
||||
static constexpr bool compile_needed =
|
||||
platform::is_same<KernelArch, arch::Sm80>::value;
|
||||
KernelRunner<compile_needed>::run_kernel(params, shared_storage);
|
||||
@@ -1060,7 +1060,7 @@ struct Wint2xMoeFCGemm : public MoeFCGemm<Mma_,
|
||||
CUTLASS_DEVICE
|
||||
void operator()(Params const& params,
|
||||
SharedStorage& shared_storage) { // NOLINT
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800) && (__CUDA_ARCH__ < 1010)
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800) && (__CUDA_ARCH__ < 1100)
|
||||
KernelRunner<WintQuantMethod::kWeightOnlyInt2, true>::run_kernel(
|
||||
params, shared_storage);
|
||||
#else
|
||||
|
||||
@@ -709,7 +709,7 @@ void MoeGemmRunner<T, WeightQuantTraits>::dispatch_to_arch<EpilogueTag>(
|
||||
dispatch_moe_gemm_to_cutlass_macro(cutlass::arch::Sm70);
|
||||
} else if (sm_ >= 75 && sm_ < 80) {
|
||||
dispatch_moe_gemm_to_cutlass_macro(cutlass::arch::Sm75);
|
||||
} else if (sm_ >= 80 && sm_ < 101) {
|
||||
} else if (sm_ >= 80 && sm_ < 104) {
|
||||
dispatch_moe_gemm_to_cutlass_macro(cutlass::arch::Sm80);
|
||||
} else {
|
||||
throw std::runtime_error(
|
||||
|
||||
Reference in New Issue
Block a user