[XPU] move xpu_attn_backend.py to FastDeploy/fastdeploy/model_executor/layers/backends/xpu (#5878)

2026-04-22 16:07:51 +08:00 · 2026-01-09 16:34:57 +08:00
parent d4a386dfc4
commit 20de04e249
5 changed files with 5 additions and 5 deletions
@@ -23,14 +23,12 @@ from .iluvatar_attn_backend import IluvatarAttnBackend
 from .mla_attention_backend import MLAAttentionBackend
 from .moba_attention_backend import PlasAttentionBackend
 from .native_paddle_backend import PaddleNativeAttnBackend
-from .xpu_attn_backend import XPUAttentionBackend

 __all__ = [
    "AttentionBackend",
    "PaddleNativeAttnBackend",
    "get_attention_backend",
    "AppendAttentionBackend",
-    "XPUAttentionBackend",
    "MLAAttentionBackend",
    "FlashAttentionBackend",
    "IluvatarAttnBackend",
@@ -16,6 +16,7 @@
 xpu backend methods
 """

+from .attention import XPUAttentionBackend
 from .moe.fused_moe import XPUMoEMethod, XPUWeightOnlyMoEMethod
 from .quantization.weight_only import XPUWeightOnlyLinearMethod

@@ -23,4 +24,5 @@ __all__ = [
    "XPUWeightOnlyLinearMethod",
    "XPUMoEMethod",
    "XPUWeightOnlyMoEMethod",
+    "XPUAttentionBackend",
 ]
@@ -51,8 +51,8 @@ class XPUPlatform(Platform):
        get_attention_backend_cls
        """
        # TODO: 等支持配置 attention engine 之后再改回去
-        return "fastdeploy.model_executor.layers.attention.XPUAttentionBackend"
+        return "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend"
        if selected_backend == _Backend.NATIVE_ATTN:
-            return "fastdeploy.model_executor.layers.attention.XPUAttentionBackend"
+            return "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend"
        else:
            logger.warning("Other backends are not supported for now for XPU.")
@@ -220,7 +220,7 @@ class TestXPUPlatform(unittest.TestCase):

    def test_get_attention_backend_cls(self):
        """Verify NATIVE_ATTN returns correct XPU backend class"""
-        expected_cls = "fastdeploy.model_executor.layers.attention.XPUAttentionBackend"
+        expected_cls = "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend"
        self.assertEqual(XPUPlatform.get_attention_backend_cls(_Backend.NATIVE_ATTN), expected_cls)