From 20de04e249d94f846c1f81d220aa2eab5b27e4ce Mon Sep 17 00:00:00 2001 From: zccjjj <62829461+zccjjj@users.noreply.github.com> Date: Fri, 9 Jan 2026 16:34:57 +0800 Subject: [PATCH] [XPU] move xpu_attn_backend.py to FastDeploy/fastdeploy/model_executor/layers/backends/xpu (#5878) --- fastdeploy/model_executor/layers/attention/__init__.py | 2 -- fastdeploy/model_executor/layers/backends/xpu/__init__.py | 2 ++ .../xpu_attn_backend.py => backends/xpu/attention.py} | 0 fastdeploy/platforms/xpu.py | 4 ++-- tests/platforms/test_platforms.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) rename fastdeploy/model_executor/layers/{attention/xpu_attn_backend.py => backends/xpu/attention.py} (100%) diff --git a/fastdeploy/model_executor/layers/attention/__init__.py b/fastdeploy/model_executor/layers/attention/__init__.py index 1ae0ef361d..c6f326a872 100644 --- a/fastdeploy/model_executor/layers/attention/__init__.py +++ b/fastdeploy/model_executor/layers/attention/__init__.py @@ -23,14 +23,12 @@ from .iluvatar_attn_backend import IluvatarAttnBackend from .mla_attention_backend import MLAAttentionBackend from .moba_attention_backend import PlasAttentionBackend from .native_paddle_backend import PaddleNativeAttnBackend -from .xpu_attn_backend import XPUAttentionBackend __all__ = [ "AttentionBackend", "PaddleNativeAttnBackend", "get_attention_backend", "AppendAttentionBackend", - "XPUAttentionBackend", "MLAAttentionBackend", "FlashAttentionBackend", "IluvatarAttnBackend", diff --git a/fastdeploy/model_executor/layers/backends/xpu/__init__.py b/fastdeploy/model_executor/layers/backends/xpu/__init__.py index e3cf1e1cc7..33beae2553 100644 --- a/fastdeploy/model_executor/layers/backends/xpu/__init__.py +++ b/fastdeploy/model_executor/layers/backends/xpu/__init__.py @@ -16,6 +16,7 @@ xpu backend methods """ +from .attention import XPUAttentionBackend from .moe.fused_moe import XPUMoEMethod, XPUWeightOnlyMoEMethod from .quantization.weight_only import XPUWeightOnlyLinearMethod @@ -23,4 +24,5 @@ __all__ = [ "XPUWeightOnlyLinearMethod", "XPUMoEMethod", "XPUWeightOnlyMoEMethod", + "XPUAttentionBackend", ] diff --git a/fastdeploy/model_executor/layers/attention/xpu_attn_backend.py b/fastdeploy/model_executor/layers/backends/xpu/attention.py similarity index 100% rename from fastdeploy/model_executor/layers/attention/xpu_attn_backend.py rename to fastdeploy/model_executor/layers/backends/xpu/attention.py diff --git a/fastdeploy/platforms/xpu.py b/fastdeploy/platforms/xpu.py index 8bc8236359..9f1738a045 100644 --- a/fastdeploy/platforms/xpu.py +++ b/fastdeploy/platforms/xpu.py @@ -51,8 +51,8 @@ class XPUPlatform(Platform): get_attention_backend_cls """ # TODO: 等支持配置 attention engine 之后再改回去 - return "fastdeploy.model_executor.layers.attention.XPUAttentionBackend" + return "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend" if selected_backend == _Backend.NATIVE_ATTN: - return "fastdeploy.model_executor.layers.attention.XPUAttentionBackend" + return "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend" else: logger.warning("Other backends are not supported for now for XPU.") diff --git a/tests/platforms/test_platforms.py b/tests/platforms/test_platforms.py index 5f2d142967..09541a7a3a 100644 --- a/tests/platforms/test_platforms.py +++ b/tests/platforms/test_platforms.py @@ -220,7 +220,7 @@ class TestXPUPlatform(unittest.TestCase): def test_get_attention_backend_cls(self): """Verify NATIVE_ATTN returns correct XPU backend class""" - expected_cls = "fastdeploy.model_executor.layers.attention.XPUAttentionBackend" + expected_cls = "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend" self.assertEqual(XPUPlatform.get_attention_backend_cls(_Backend.NATIVE_ATTN), expected_cls)