mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix]fix RL bug about blockwisefp8 (#6466)
* fix RL bug about blockwisefp8 * fix moe same bug * fix RL FP8 bug
This commit is contained in:
@@ -27,6 +27,7 @@ from fastdeploy.model_executor.layers.linear import (
|
||||
)
|
||||
from fastdeploy.model_executor.layers.moe import FusedMoE
|
||||
from fastdeploy.model_executor.layers.quantization.fp8_utils import (
|
||||
deep_gemm,
|
||||
quant_weight_ue8m0,
|
||||
transform_scale_ue8m0,
|
||||
)
|
||||
@@ -43,9 +44,9 @@ from .quant_base import QuantConfigBase, QuantMethodBase
|
||||
|
||||
if current_platform.is_cuda():
|
||||
try:
|
||||
fp8_gemm_nt = fastdeploy.model_executor.layers.quantization.fp8_utils.deep_gemm.fp8_gemm_nt
|
||||
fp8_gemm_nt = deep_gemm.fp8_gemm_nt
|
||||
except:
|
||||
fp8_gemm_nt = fastdeploy.model_executor.layers.quantization.fp8_utils.deep_gemm.gemm_fp8_fp8_bf16_nt
|
||||
fp8_gemm_nt = deep_gemm.gemm_fp8_fp8_bf16_nt
|
||||
else:
|
||||
fp8_gemm_nt = None
|
||||
|
||||
|
||||
@@ -35,16 +35,16 @@ def load_deep_gemm():
|
||||
# SM100 should use PFCC DeepGemm
|
||||
paddle.compat.enable_torch_proxy(scope={"deep_gemm"})
|
||||
try:
|
||||
from paddlefleet.ops import deep_gemm
|
||||
import paddlefleet.ops.deep_gemm as deep_gemm
|
||||
|
||||
logger.info("Detected sm100, use PaddleFleet DeepGEMM")
|
||||
except:
|
||||
import deep_gemm
|
||||
import deep_gemm as deep_gemm
|
||||
|
||||
logger.info("Detected sm100, use PFCC DeepGEMM")
|
||||
else:
|
||||
logger.info("use FastDeploy DeepGEMM")
|
||||
from fastdeploy.model_executor.ops.gpu import deep_gemm
|
||||
import fastdeploy.model_executor.ops.gpu.deep_gemm as deep_gemm
|
||||
else:
|
||||
deep_gemm = None
|
||||
return deep_gemm
|
||||
|
||||
Reference in New Issue
Block a user