mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Metax] optimize cutlass moe and flash attention backend (#5128)
This commit is contained in:
@@ -629,6 +629,7 @@ elif paddle.device.is_compiled_with_custom_device("metax_gpu"):
|
||||
"metax_ops/moe_ffn.cu",
|
||||
"metax_ops/moe_reduce.cu",
|
||||
"metax_ops/fused_moe.cu",
|
||||
"metax_ops/apply_rope.cu",
|
||||
]
|
||||
|
||||
sources += find_end_files("gpu_ops/speculate_decoding", ".cu")
|
||||
|
||||
Reference in New Issue
Block a user