[Iluvatar] refactor attn and moe code (#6887)

This commit is contained in:
yzwu
2026-03-18 10:31:00 +08:00
committed by GitHub
parent 0359794e08
commit 8b890c0d72
16 changed files with 877 additions and 140 deletions
@@ -149,6 +149,22 @@ class WeightOnlyConfig(QuantConfigBase):
else:
return GPUWeightOnlyLinearMethod(self)
elif current_platform.is_iluvatar():
if isinstance(layer, FusedMoE):
if layer.use_method == "cutlass":
from fastdeploy.model_executor.layers.backends import (
IluvatarCutlassWeightOnlyMoEMethod,
)
return IluvatarCutlassWeightOnlyMoEMethod(self)
else:
raise ValueError(f"Unsupported MOE backend {layer.use_method}")
else:
from fastdeploy.model_executor.layers.backends import (
IluvatarWeightOnlyLinearMethod,
)
return IluvatarWeightOnlyLinearMethod(self)
else:
if isinstance(layer, FusedMoE):
if layer.use_method == "cutlass":