[Iluvatar] refactor attn and moe code (#6887)

This commit is contained in:
yzwu
2026-03-18 10:31:00 +08:00
committed by GitHub
parent 0359794e08
commit 8b890c0d72
16 changed files with 877 additions and 140 deletions
+5 -1
View File
@@ -47,10 +47,14 @@ def get_moe_method(layer=None):
return moe method based on device platform
"""
if current_platform.is_cuda() or current_platform.is_iluvatar():
if current_platform.is_cuda():
from .fused_moe_cutlass_backend import CutlassMoEMethod
return CutlassMoEMethod(None)
elif current_platform.is_iluvatar():
from fastdeploy.model_executor.layers.backends import IluvatarCutlassMoEMethod
return IluvatarCutlassMoEMethod(None)
elif current_platform.is_xpu():
from fastdeploy.model_executor.layers.backends import XPUMoEMethod