mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix]fix RL bug about blockwisefp8 (#6466)
* fix RL bug about blockwisefp8 * fix moe same bug * fix RL FP8 bug
This commit is contained in:
@@ -27,6 +27,7 @@ from fastdeploy.model_executor.layers.linear import (
|
||||
)
|
||||
from fastdeploy.model_executor.layers.moe import FusedMoE
|
||||
from fastdeploy.model_executor.layers.quantization.fp8_utils import (
|
||||
deep_gemm,
|
||||
quant_weight_ue8m0,
|
||||
transform_scale_ue8m0,
|
||||
)
|
||||
@@ -43,9 +44,9 @@ from .quant_base import QuantConfigBase, QuantMethodBase
|
||||
|
||||
if current_platform.is_cuda():
|
||||
try:
|
||||
fp8_gemm_nt = fastdeploy.model_executor.layers.quantization.fp8_utils.deep_gemm.fp8_gemm_nt
|
||||
fp8_gemm_nt = deep_gemm.fp8_gemm_nt
|
||||
except:
|
||||
fp8_gemm_nt = fastdeploy.model_executor.layers.quantization.fp8_utils.deep_gemm.gemm_fp8_fp8_bf16_nt
|
||||
fp8_gemm_nt = deep_gemm.gemm_fp8_fp8_bf16_nt
|
||||
else:
|
||||
fp8_gemm_nt = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user