diff --git a/custom_ops/gpu_ops/cutlass_kernels/moe_gemm/fused_moe_cutlass_kernel.h b/custom_ops/gpu_ops/cutlass_kernels/moe_gemm/fused_moe_cutlass_kernel.h index 9c5e7bfc47..7e93f16902 100644 --- a/custom_ops/gpu_ops/cutlass_kernels/moe_gemm/fused_moe_cutlass_kernel.h +++ b/custom_ops/gpu_ops/cutlass_kernels/moe_gemm/fused_moe_cutlass_kernel.h @@ -635,7 +635,7 @@ struct MoeFCGemm { static constexpr bool compile_needed = platform::is_same::value; KernelRunner::run_kernel(params, shared_storage); -#elif defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800) && (__CUDA_ARCH__ < 1010) +#elif defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800) && (__CUDA_ARCH__ < 1100) static constexpr bool compile_needed = platform::is_same::value; KernelRunner::run_kernel(params, shared_storage); @@ -1060,7 +1060,7 @@ struct Wint2xMoeFCGemm : public MoeFCGemm= 800) && (__CUDA_ARCH__ < 1010) +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800) && (__CUDA_ARCH__ < 1100) KernelRunner::run_kernel( params, shared_storage); #else diff --git a/custom_ops/gpu_ops/cutlass_kernels/moe_gemm/fused_moe_gemm_kernels_template.h b/custom_ops/gpu_ops/cutlass_kernels/moe_gemm/fused_moe_gemm_kernels_template.h index db5af4f493..68b5b05447 100644 --- a/custom_ops/gpu_ops/cutlass_kernels/moe_gemm/fused_moe_gemm_kernels_template.h +++ b/custom_ops/gpu_ops/cutlass_kernels/moe_gemm/fused_moe_gemm_kernels_template.h @@ -709,7 +709,7 @@ void MoeGemmRunner::dispatch_to_arch( dispatch_moe_gemm_to_cutlass_macro(cutlass::arch::Sm70); } else if (sm_ >= 75 && sm_ < 80) { dispatch_moe_gemm_to_cutlass_macro(cutlass::arch::Sm75); - } else if (sm_ >= 80 && sm_ < 101) { + } else if (sm_ >= 80 && sm_ < 104) { dispatch_moe_gemm_to_cutlass_macro(cutlass::arch::Sm80); } else { throw std::runtime_error(