[Metax] support cutlass moe & optimize flash attention (#4208)

2026-04-23 00:17:25 +08:00 · 2025-09-29 11:22:43 +08:00
parent 2b2b645296
commit 7c919070f7
20 changed files with 2786 additions and 103 deletions
@@ -50,12 +50,7 @@ def get_moe_method():
        from fastdeploy.model_executor.layers.backends import GCUFusedMoeMethod

        return GCUFusedMoeMethod(None)
-    elif current_platform.is_maca():
-        from fastdeploy.model_executor.layers.backends import (
-            MetaxTritonWeightOnlyMoEMethod,
-        )

-        return MetaxTritonWeightOnlyMoEMethod(None)
    elif current_platform.is_intel_hpu():
        from fastdeploy.model_executor.layers.backends import HpuMoEMethod