[XPU] move xpu_attn_backend.py to FastDeploy/fastdeploy/model_executor/layers/backends/xpu (#5878)

This commit is contained in:
zccjjj
2026-01-09 16:34:57 +08:00
committed by GitHub
parent d4a386dfc4
commit 20de04e249
5 changed files with 5 additions and 5 deletions
@@ -23,14 +23,12 @@ from .iluvatar_attn_backend import IluvatarAttnBackend
from .mla_attention_backend import MLAAttentionBackend from .mla_attention_backend import MLAAttentionBackend
from .moba_attention_backend import PlasAttentionBackend from .moba_attention_backend import PlasAttentionBackend
from .native_paddle_backend import PaddleNativeAttnBackend from .native_paddle_backend import PaddleNativeAttnBackend
from .xpu_attn_backend import XPUAttentionBackend
__all__ = [ __all__ = [
"AttentionBackend", "AttentionBackend",
"PaddleNativeAttnBackend", "PaddleNativeAttnBackend",
"get_attention_backend", "get_attention_backend",
"AppendAttentionBackend", "AppendAttentionBackend",
"XPUAttentionBackend",
"MLAAttentionBackend", "MLAAttentionBackend",
"FlashAttentionBackend", "FlashAttentionBackend",
"IluvatarAttnBackend", "IluvatarAttnBackend",
@@ -16,6 +16,7 @@
xpu backend methods xpu backend methods
""" """
from .attention import XPUAttentionBackend
from .moe.fused_moe import XPUMoEMethod, XPUWeightOnlyMoEMethod from .moe.fused_moe import XPUMoEMethod, XPUWeightOnlyMoEMethod
from .quantization.weight_only import XPUWeightOnlyLinearMethod from .quantization.weight_only import XPUWeightOnlyLinearMethod
@@ -23,4 +24,5 @@ __all__ = [
"XPUWeightOnlyLinearMethod", "XPUWeightOnlyLinearMethod",
"XPUMoEMethod", "XPUMoEMethod",
"XPUWeightOnlyMoEMethod", "XPUWeightOnlyMoEMethod",
"XPUAttentionBackend",
] ]
+2 -2
View File
@@ -51,8 +51,8 @@ class XPUPlatform(Platform):
get_attention_backend_cls get_attention_backend_cls
""" """
# TODO: 等支持配置 attention engine 之后再改回去 # TODO: 等支持配置 attention engine 之后再改回去
return "fastdeploy.model_executor.layers.attention.XPUAttentionBackend" return "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend"
if selected_backend == _Backend.NATIVE_ATTN: if selected_backend == _Backend.NATIVE_ATTN:
return "fastdeploy.model_executor.layers.attention.XPUAttentionBackend" return "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend"
else: else:
logger.warning("Other backends are not supported for now for XPU.") logger.warning("Other backends are not supported for now for XPU.")
+1 -1
View File
@@ -220,7 +220,7 @@ class TestXPUPlatform(unittest.TestCase):
def test_get_attention_backend_cls(self): def test_get_attention_backend_cls(self):
"""Verify NATIVE_ATTN returns correct XPU backend class""" """Verify NATIVE_ATTN returns correct XPU backend class"""
expected_cls = "fastdeploy.model_executor.layers.attention.XPUAttentionBackend" expected_cls = "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend"
self.assertEqual(XPUPlatform.get_attention_backend_cls(_Backend.NATIVE_ATTN), expected_cls) self.assertEqual(XPUPlatform.get_attention_backend_cls(_Backend.NATIVE_ATTN), expected_cls)