mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[XPU] move xpu_attn_backend.py to FastDeploy/fastdeploy/model_executor/layers/backends/xpu (#5878)
This commit is contained in:
@@ -23,14 +23,12 @@ from .iluvatar_attn_backend import IluvatarAttnBackend
|
|||||||
from .mla_attention_backend import MLAAttentionBackend
|
from .mla_attention_backend import MLAAttentionBackend
|
||||||
from .moba_attention_backend import PlasAttentionBackend
|
from .moba_attention_backend import PlasAttentionBackend
|
||||||
from .native_paddle_backend import PaddleNativeAttnBackend
|
from .native_paddle_backend import PaddleNativeAttnBackend
|
||||||
from .xpu_attn_backend import XPUAttentionBackend
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"AttentionBackend",
|
"AttentionBackend",
|
||||||
"PaddleNativeAttnBackend",
|
"PaddleNativeAttnBackend",
|
||||||
"get_attention_backend",
|
"get_attention_backend",
|
||||||
"AppendAttentionBackend",
|
"AppendAttentionBackend",
|
||||||
"XPUAttentionBackend",
|
|
||||||
"MLAAttentionBackend",
|
"MLAAttentionBackend",
|
||||||
"FlashAttentionBackend",
|
"FlashAttentionBackend",
|
||||||
"IluvatarAttnBackend",
|
"IluvatarAttnBackend",
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
xpu backend methods
|
xpu backend methods
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from .attention import XPUAttentionBackend
|
||||||
from .moe.fused_moe import XPUMoEMethod, XPUWeightOnlyMoEMethod
|
from .moe.fused_moe import XPUMoEMethod, XPUWeightOnlyMoEMethod
|
||||||
from .quantization.weight_only import XPUWeightOnlyLinearMethod
|
from .quantization.weight_only import XPUWeightOnlyLinearMethod
|
||||||
|
|
||||||
@@ -23,4 +24,5 @@ __all__ = [
|
|||||||
"XPUWeightOnlyLinearMethod",
|
"XPUWeightOnlyLinearMethod",
|
||||||
"XPUMoEMethod",
|
"XPUMoEMethod",
|
||||||
"XPUWeightOnlyMoEMethod",
|
"XPUWeightOnlyMoEMethod",
|
||||||
|
"XPUAttentionBackend",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -51,8 +51,8 @@ class XPUPlatform(Platform):
|
|||||||
get_attention_backend_cls
|
get_attention_backend_cls
|
||||||
"""
|
"""
|
||||||
# TODO: 等支持配置 attention engine 之后再改回去
|
# TODO: 等支持配置 attention engine 之后再改回去
|
||||||
return "fastdeploy.model_executor.layers.attention.XPUAttentionBackend"
|
return "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend"
|
||||||
if selected_backend == _Backend.NATIVE_ATTN:
|
if selected_backend == _Backend.NATIVE_ATTN:
|
||||||
return "fastdeploy.model_executor.layers.attention.XPUAttentionBackend"
|
return "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend"
|
||||||
else:
|
else:
|
||||||
logger.warning("Other backends are not supported for now for XPU.")
|
logger.warning("Other backends are not supported for now for XPU.")
|
||||||
|
|||||||
@@ -220,7 +220,7 @@ class TestXPUPlatform(unittest.TestCase):
|
|||||||
|
|
||||||
def test_get_attention_backend_cls(self):
|
def test_get_attention_backend_cls(self):
|
||||||
"""Verify NATIVE_ATTN returns correct XPU backend class"""
|
"""Verify NATIVE_ATTN returns correct XPU backend class"""
|
||||||
expected_cls = "fastdeploy.model_executor.layers.attention.XPUAttentionBackend"
|
expected_cls = "fastdeploy.model_executor.layers.backends.xpu.XPUAttentionBackend"
|
||||||
self.assertEqual(XPUPlatform.get_attention_backend_cls(_Backend.NATIVE_ATTN), expected_cls)
|
self.assertEqual(XPUPlatform.get_attention_backend_cls(_Backend.NATIVE_ATTN), expected_cls)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user