mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Models][OP][Optimization] Support DeepSeek-v3.2 model, integrate DSA & Indexer architecture with FlashMLA/DeepGEMM (#6689)
* Support DeepSeek-v3.2 model, integrate DSA & Indexer architecture with FlashMLA/DeepGEMM
This commit is contained in:
@@ -394,6 +394,8 @@ elif paddle.is_compiled_with_cuda():
|
||||
)
|
||||
sources += ["gpu_ops/append_attention.cu"]
|
||||
sources += find_end_files("gpu_ops/append_attn", ".cu")
|
||||
# sparse indexer
|
||||
sources += find_end_files("gpu_ops/sparse_indexer", ".cu")
|
||||
# mla
|
||||
sources += ["gpu_ops/multi_head_latent_attention.cu"]
|
||||
# gemm_dequant
|
||||
|
||||
Reference in New Issue
Block a user