[Metax] optimize cutlass moe and flash attention backend (#5128)

This commit is contained in:
Neil Zhu
2025-11-20 16:12:35 +08:00
committed by GitHub
parent f1e36ff2f7
commit 0edda75a56
5 changed files with 469 additions and 161 deletions
+1
View File
@@ -629,6 +629,7 @@ elif paddle.device.is_compiled_with_custom_device("metax_gpu"):
"metax_ops/moe_ffn.cu",
"metax_ops/moe_reduce.cu",
"metax_ops/fused_moe.cu",
"metax_ops/apply_rope.cu",
]
sources += find_end_files("gpu_ops/speculate_decoding", ".cu")