mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[XPU] refactor moe ffn (#5501)
- remove BKCL_DISPATCH_ALL_GATHER - support sparse mode - support moe quant_method
This commit is contained in:
@@ -52,6 +52,6 @@ class W4A8Config(QuantConfigBase):
|
||||
XPUW4A8MoEMethod,
|
||||
)
|
||||
|
||||
return XPUW4A8MoEMethod(self)
|
||||
return XPUW4A8MoEMethod(self, layer)
|
||||
else:
|
||||
raise ValueError(f"Unsupported layer type {type(layer)} for w4a8")
|
||||
|
||||
@@ -101,7 +101,7 @@ class WeightOnlyConfig(QuantConfigBase):
|
||||
XPUWeightOnlyMoEMethod,
|
||||
)
|
||||
|
||||
return XPUWeightOnlyMoEMethod(self)
|
||||
return XPUWeightOnlyMoEMethod(self, layer)
|
||||
else:
|
||||
from fastdeploy.model_executor.layers.backends import (
|
||||
XPUWeightOnlyLinearMethod,
|
||||
|
||||
Reference in New Issue
Block a user