mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Feature] support flash_mask_attention backend (#5134)
* [Feature] suppert flash_mask_attention backend * fix unittest * clean code
This commit is contained in:
@@ -18,6 +18,7 @@ from .attention_selecter import get_attention_backend
|
||||
from .base_attention_backend import AttentionBackend
|
||||
from .block_multihead_attn_backend import BlockAttentionBackend
|
||||
from .flash_attn_backend import FlashAttentionBackend
|
||||
from .flash_mask_attn_backend import FlashMaskAttentionBackend
|
||||
from .iluvatar_attn_backend import IluvatarAttnBackend
|
||||
from .mla_attention_backend import MLAAttentionBackend
|
||||
from .moba_attention_backend import PlasAttentionBackend
|
||||
@@ -36,4 +37,5 @@ __all__ = [
|
||||
"BlockAttentionBackend",
|
||||
"Attention",
|
||||
"PlasAttentionBackend",
|
||||
"FlashMaskAttentionBackend",
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user