[XPU] refactor moe ffn (#5501)

- remove BKCL_DISPATCH_ALL_GATHER
- support sparse mode
- support moe quant_method
This commit is contained in:
zhupengyang
2025-12-18 14:14:05 +08:00
committed by GitHub
parent d0a7834a17
commit 8735cb5045
12 changed files with 397 additions and 127 deletions
@@ -52,6 +52,6 @@ class W4A8Config(QuantConfigBase):
XPUW4A8MoEMethod,
)
return XPUW4A8MoEMethod(self)
return XPUW4A8MoEMethod(self, layer)
else:
raise ValueError(f"Unsupported layer type {type(layer)} for w4a8")
@@ -101,7 +101,7 @@ class WeightOnlyConfig(QuantConfigBase):
XPUWeightOnlyMoEMethod,
)
return XPUWeightOnlyMoEMethod(self)
return XPUWeightOnlyMoEMethod(self, layer)
else:
from fastdeploy.model_executor.layers.backends import (
XPUWeightOnlyLinearMethod,