[BugFix]fix RL bug about blockwisefp8 (#6466)

* fix RL bug about blockwisefp8

* fix  moe same bug

* fix RL FP8 bug
This commit is contained in:
AIbin
2026-02-12 09:15:29 +08:00
committed by GitHub
parent 9d72332aca
commit 0eb87467f8
3 changed files with 11 additions and 17 deletions
@@ -27,6 +27,7 @@ from fastdeploy.model_executor.layers.linear import (
)
from fastdeploy.model_executor.layers.moe import FusedMoE
from fastdeploy.model_executor.layers.quantization.fp8_utils import (
deep_gemm,
quant_weight_ue8m0,
transform_scale_ue8m0,
)
@@ -43,9 +44,9 @@ from .quant_base import QuantConfigBase, QuantMethodBase
if current_platform.is_cuda():
try:
fp8_gemm_nt = fastdeploy.model_executor.layers.quantization.fp8_utils.deep_gemm.fp8_gemm_nt
fp8_gemm_nt = deep_gemm.fp8_gemm_nt
except:
fp8_gemm_nt = fastdeploy.model_executor.layers.quantization.fp8_utils.deep_gemm.gemm_fp8_fp8_bf16_nt
fp8_gemm_nt = deep_gemm.gemm_fp8_fp8_bf16_nt
else:
fp8_gemm_nt = None
@@ -35,16 +35,16 @@ def load_deep_gemm():
# SM100 should use PFCC DeepGemm
paddle.compat.enable_torch_proxy(scope={"deep_gemm"})
try:
from paddlefleet.ops import deep_gemm
import paddlefleet.ops.deep_gemm as deep_gemm
logger.info("Detected sm100, use PaddleFleet DeepGEMM")
except:
import deep_gemm
import deep_gemm as deep_gemm
logger.info("Detected sm100, use PFCC DeepGEMM")
else:
logger.info("use FastDeploy DeepGEMM")
from fastdeploy.model_executor.ops.gpu import deep_gemm
import fastdeploy.model_executor.ops.gpu.deep_gemm as deep_gemm
else:
deep_gemm = None
return deep_gemm