mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[XPU] move xpu_attn_backend.py to FastDeploy/fastdeploy/model_executor/layers/backends/xpu (#5878)
This commit is contained in:
@@ -23,14 +23,12 @@ from .iluvatar_attn_backend import IluvatarAttnBackend
|
||||
from .mla_attention_backend import MLAAttentionBackend
|
||||
from .moba_attention_backend import PlasAttentionBackend
|
||||
from .native_paddle_backend import PaddleNativeAttnBackend
|
||||
from .xpu_attn_backend import XPUAttentionBackend
|
||||
|
||||
__all__ = [
|
||||
"AttentionBackend",
|
||||
"PaddleNativeAttnBackend",
|
||||
"get_attention_backend",
|
||||
"AppendAttentionBackend",
|
||||
"XPUAttentionBackend",
|
||||
"MLAAttentionBackend",
|
||||
"FlashAttentionBackend",
|
||||
"IluvatarAttnBackend",
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
xpu backend methods
|
||||
"""
|
||||
|
||||
from .attention import XPUAttentionBackend
|
||||
from .moe.fused_moe import XPUMoEMethod, XPUWeightOnlyMoEMethod
|
||||
from .quantization.weight_only import XPUWeightOnlyLinearMethod
|
||||
|
||||
@@ -23,4 +24,5 @@ __all__ = [
|
||||
"XPUWeightOnlyLinearMethod",
|
||||
"XPUMoEMethod",
|
||||
"XPUWeightOnlyMoEMethod",
|
||||
"XPUAttentionBackend",
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user