[XPU] refactor moe ffn (#5501)

- remove BKCL_DISPATCH_ALL_GATHER - support sparse mode - support moe quant_method
2026-04-23 00:17:25 +08:00 · 2025-12-18 14:14:05 +08:00
parent d0a7834a17
commit 8735cb5045
12 changed files with 397 additions and 127 deletions
@@ -52,6 +52,6 @@ class W4A8Config(QuantConfigBase):
                XPUW4A8MoEMethod,
            )

-            return XPUW4A8MoEMethod(self)
+            return XPUW4A8MoEMethod(self, layer)
        else:
            raise ValueError(f"Unsupported layer type {type(layer)} for w4a8")
@@ -101,7 +101,7 @@ class WeightOnlyConfig(QuantConfigBase):
                    XPUWeightOnlyMoEMethod,
                )

-                return XPUWeightOnlyMoEMethod(self)
+                return XPUWeightOnlyMoEMethod(self, layer)
            else:
                from fastdeploy.model_executor.layers.backends import (
                    XPUWeightOnlyLinearMethod,