[XPU] refine fused moe (#4219)

2026-04-23 08:21:53 +08:00 · 2025-10-16 19:04:07 +08:00
parent 3bbe99eae7
commit 26ff2f8683
7 changed files with 354 additions and 585 deletions
@@ -79,18 +79,11 @@ class WeightOnlyConfig(QuantConfigBase):
    def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
        if current_platform.is_xpu():
            if isinstance(layer, FusedMoE):
-                if layer.ep_size > 1:
-                    from fastdeploy.model_executor.layers.backends import (
-                        XPUWeightOnlyMoeEpMethod,
-                    )
+                from fastdeploy.model_executor.layers.backends import (
+                    XPUWeightOnlyMoEMethod,
+                )

-                    return XPUWeightOnlyMoeEpMethod(self)
-                else:
-                    from fastdeploy.model_executor.layers.backends import (
-                        XPUWeightOnlyMoEMethod,
-                    )
-
-                    return XPUWeightOnlyMoEMethod(self)
+                return XPUWeightOnlyMoEMethod(self)
            else:
                from fastdeploy.model_executor.layers.backends import (
                    XPUWeightOnlyLinearMethod,