mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
[RL] Support SM100 FP8 quantization in RL (#6601)
* RL SM100 Fix * update
This commit is contained in:
@@ -281,8 +281,12 @@ class BlockWiseFP8LinearMethod(QuantMethodBase):
|
||||
if self.model_format != "torch":
|
||||
process_weight_transpose(layer, "weight")
|
||||
process_weight_transpose(layer, "weight_scale_inv")
|
||||
else:
|
||||
return
|
||||
if self.quant_config.deepgemm_scale_ue8m0:
|
||||
new_weight_scale_inv = paddle.empty(
|
||||
layer.weight_scale_inv.shape[::-1], dtype=layer.weight_scale_inv.dtype
|
||||
)
|
||||
new_weight_scale_inv = new_weight_scale_inv.transpose([1, 0])
|
||||
layer.weight_scale_inv.data = new_weight_scale_inv
|
||||
|
||||
def process_loaded_weights(self, layer, weights) -> None:
|
||||
weight_tensor = weights.transpose([1, 0])
|
||||
|
||||
Reference in New Issue
Block a user