[Feature] Support Ernie FP8 on sm100 ( the fixed version) (#6304)

This commit is contained in:
JYChen
2026-02-03 17:47:38 +08:00
committed by GitHub
parent 73952a3b67
commit c745a22420
9 changed files with 423 additions and 134 deletions
+10 -1
View File
@@ -254,7 +254,10 @@ def per_block_cast_to_fp8(x: Tensor, block_size: list = [128, 128]) -> Tuple[Ten
Only used in deep_gemm block wise quant weight.
copy from FastDeploy/custom_ops/gpu_ops/fp8_deep_gemm/tests/test_core.py.
"""
from fastdeploy.model_executor.ops.gpu.deep_gemm import ceil_div
try:
from deep_gemm import ceil_div
except ModuleNotFoundError:
from fastdeploy.model_executor.ops.gpu.deep_gemm import ceil_div
assert x.dim() == 2
m, n = x.shape
@@ -551,6 +554,12 @@ def vocab_range_from_global_vocab_size(global_vocab_size: int, rank: int, world_
return vocab_range_from_per_partition_vocab_size(per_partition_vocab_size, rank, offset=offset)
def get_sm_version():
prop = paddle.device.cuda.get_device_properties()
cc = prop.major * 10 + prop.minor
return cc
def modules_to_convert(prefix: str, fd_config: FDConfig):
import fnmatch