[BugFix]fix RL bug about blockwisefp8 (#6466)

* fix RL bug about blockwisefp8

* fix  moe same bug

* fix RL FP8 bug
This commit is contained in:
AIbin
2026-02-12 09:15:29 +08:00
committed by GitHub
parent 9d72332aca
commit 0eb87467f8
3 changed files with 11 additions and 17 deletions
@@ -27,6 +27,7 @@ from fastdeploy.model_executor.layers.linear import (
)
from fastdeploy.model_executor.layers.moe import FusedMoE
from fastdeploy.model_executor.layers.quantization.fp8_utils import (
deep_gemm,
quant_weight_ue8m0,
transform_scale_ue8m0,
)
@@ -43,9 +44,9 @@ from .quant_base import QuantConfigBase, QuantMethodBase
if current_platform.is_cuda():
try:
fp8_gemm_nt = fastdeploy.model_executor.layers.quantization.fp8_utils.deep_gemm.fp8_gemm_nt
fp8_gemm_nt = deep_gemm.fp8_gemm_nt
except:
fp8_gemm_nt = fastdeploy.model_executor.layers.quantization.fp8_utils.deep_gemm.gemm_fp8_fp8_bf16_nt
fp8_gemm_nt = deep_gemm.gemm_fp8_fp8_bf16_nt
else:
fp8_gemm_nt = None