[Models][OP][Optimization] Support DeepSeek-v3.2 model, integrate DSA & Indexer architecture with FlashMLA/DeepGEMM (#6689)

* Support DeepSeek-v3.2 model, integrate DSA & Indexer architecture with FlashMLA/DeepGEMM
This commit is contained in:
AIbin
2026-03-10 15:05:14 +08:00
committed by GitHub
parent 25c479312d
commit c3aceb6bdc
22 changed files with 8022 additions and 143 deletions
@@ -17,6 +17,7 @@ from .attention import Attention
from .attention_selecter import get_attention_backend
from .base_attention_backend import AttentionBackend
from .block_multihead_attn_backend import BlockAttentionBackend
from .dsa_attention_backend import DSAAttentionBackend
from .flash_attn_backend import FlashAttentionBackend
from .flash_mask_attn_backend import FlashMaskAttentionBackend
from .iluvatar_attn_backend import IluvatarAttnBackend
@@ -30,6 +31,7 @@ __all__ = [
"get_attention_backend",
"AppendAttentionBackend",
"MLAAttentionBackend",
"DSAAttentionBackend",
"FlashAttentionBackend",
"IluvatarAttnBackend",
"BlockAttentionBackend",