[Iluvatar] refactor attn and moe code (#6887)

2026-04-23 08:21:53 +08:00 · 2026-03-18 10:31:00 +08:00
parent 0359794e08
commit 8b890c0d72
16 changed files with 877 additions and 140 deletions
@@ -149,6 +149,22 @@ class WeightOnlyConfig(QuantConfigBase):
            else:

                return GPUWeightOnlyLinearMethod(self)
+        elif current_platform.is_iluvatar():
+            if isinstance(layer, FusedMoE):
+                if layer.use_method == "cutlass":
+                    from fastdeploy.model_executor.layers.backends import (
+                        IluvatarCutlassWeightOnlyMoEMethod,
+                    )
+
+                    return IluvatarCutlassWeightOnlyMoEMethod(self)
+                else:
+                    raise ValueError(f"Unsupported MOE backend {layer.use_method}")
+            else:
+                from fastdeploy.model_executor.layers.backends import (
+                    IluvatarWeightOnlyLinearMethod,
+                )
+
+                return IluvatarWeightOnlyLinearMethod(self)
        else:
            if isinstance(layer, FusedMoE):
                if layer.use_method == "cutlass":