mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-24 01:29:57 +08:00
[Models][OP][Optimization] Support DeepSeek-v3.2 model, integrate DSA & Indexer architecture with FlashMLA/DeepGEMM (#6689)
* Support DeepSeek-v3.2 model, integrate DSA & Indexer architecture with FlashMLA/DeepGEMM
This commit is contained in:
@@ -17,6 +17,7 @@ from .attention import Attention
|
||||
from .attention_selecter import get_attention_backend
|
||||
from .base_attention_backend import AttentionBackend
|
||||
from .block_multihead_attn_backend import BlockAttentionBackend
|
||||
from .dsa_attention_backend import DSAAttentionBackend
|
||||
from .flash_attn_backend import FlashAttentionBackend
|
||||
from .flash_mask_attn_backend import FlashMaskAttentionBackend
|
||||
from .iluvatar_attn_backend import IluvatarAttnBackend
|
||||
@@ -30,6 +31,7 @@ __all__ = [
|
||||
"get_attention_backend",
|
||||
"AppendAttentionBackend",
|
||||
"MLAAttentionBackend",
|
||||
"DSAAttentionBackend",
|
||||
"FlashAttentionBackend",
|
||||
"IluvatarAttnBackend",
|
||||
"BlockAttentionBackend",
|
||||
|
||||
Reference in New Issue
Block a user