[Feature] support flash_mask_attention backend (#5134)

* [Feature] suppert flash_mask_attention backend

* fix unittest

* clean code
This commit is contained in:
lizhenyun01
2025-11-28 10:12:16 +08:00
committed by GitHub
parent b935101008
commit aba4fc657f
13 changed files with 542 additions and 69 deletions
@@ -18,6 +18,7 @@ from .attention_selecter import get_attention_backend
from .base_attention_backend import AttentionBackend
from .block_multihead_attn_backend import BlockAttentionBackend
from .flash_attn_backend import FlashAttentionBackend
from .flash_mask_attn_backend import FlashMaskAttentionBackend
from .iluvatar_attn_backend import IluvatarAttnBackend
from .mla_attention_backend import MLAAttentionBackend
from .moba_attention_backend import PlasAttentionBackend
@@ -36,4 +37,5 @@ __all__ = [
"BlockAttentionBackend",
"Attention",
"PlasAttentionBackend",
"FlashMaskAttentionBackend",
]