mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-24 01:29:57 +08:00
[Feature] support w4afp8 v1_loader and v0_loader(tp>1) (#5757)
* support * fix * support w4afp8 v1_loader and v0_loader * fix * fix test * fix test * fix test * fix moe.py * add test_ernie_4_5_w4afp8 * add test * delete tensor * fix test * fix * add * fix test
This commit is contained in:
@@ -41,6 +41,7 @@ class W4AFP8Config(QuantConfigBase):
|
||||
self.is_permuted = is_permuted
|
||||
self.hadamard_block_size = hadamard_block_size
|
||||
self.is_quantized = is_quantized
|
||||
self.is_checkpoint_bf16 = not is_quantized
|
||||
|
||||
def name(self) -> str:
|
||||
return "w4afp8"
|
||||
|
||||
Reference in New Issue
Block a user