[TI-consistent] support quant use pow2scale (#7308)

* support quant use pow2scale

* fix

* fix
This commit is contained in:
liuruyan
2026-04-13 15:01:53 +08:00
committed by GitHub
parent 6213ad5340
commit b34708604c
4 changed files with 24 additions and 18 deletions
@@ -353,7 +353,7 @@ class BlockWiseFP8LinearMethod(QuantMethodBase):
else:
x, x_scale_tensor = paddle.incubate.nn.functional.fp8_quant_blockwise(
x,
using_pow2_scale=self.quant_config.deepgemm_scale_ue8m0,
using_pow2_scale=self.quant_config.deepgemm_scale_ue8m0 or fastdeploy.envs.FD_FP8_QUANT_WITH_POW2SCALE,
output_scale_transpose=True,
using_ue8m0_scale=self.quant_config.deepgemm_scale_ue8m0,
)