[XPU] refine fused moe (#4219)

This commit is contained in:
zhupengyang
2025-10-16 19:04:07 +08:00
committed by GitHub
parent 3bbe99eae7
commit 26ff2f8683
7 changed files with 354 additions and 585 deletions
@@ -79,18 +79,11 @@ class WeightOnlyConfig(QuantConfigBase):
def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
if current_platform.is_xpu():
if isinstance(layer, FusedMoE):
if layer.ep_size > 1:
from fastdeploy.model_executor.layers.backends import (
XPUWeightOnlyMoeEpMethod,
)
from fastdeploy.model_executor.layers.backends import (
XPUWeightOnlyMoEMethod,
)
return XPUWeightOnlyMoeEpMethod(self)
else:
from fastdeploy.model_executor.layers.backends import (
XPUWeightOnlyMoEMethod,
)
return XPUWeightOnlyMoEMethod(self)
return XPUWeightOnlyMoEMethod(self)
else:
from fastdeploy.model_executor.layers.backends import (
XPUWeightOnlyLinearMethod,