[Iluvatar] refactor attn and moe code (#6887)

2026-04-23 00:17:25 +08:00 · 2026-03-18 10:31:00 +08:00
parent 0359794e08
commit 8b890c0d72
16 changed files with 877 additions and 140 deletions
@@ -47,10 +47,14 @@ def get_moe_method(layer=None):
    return moe method based on device platform
    """

-    if current_platform.is_cuda() or current_platform.is_iluvatar():
+    if current_platform.is_cuda():
        from .fused_moe_cutlass_backend import CutlassMoEMethod

        return CutlassMoEMethod(None)
+    elif current_platform.is_iluvatar():
+        from fastdeploy.model_executor.layers.backends import IluvatarCutlassMoEMethod
+
+        return IluvatarCutlassMoEMethod(None)
    elif current_platform.is_xpu():
        from fastdeploy.model_executor.layers.backends import XPUMoEMethod