[Metax] optimize cutlass moe and flash attention backend (#5128)

2026-04-23 00:17:25 +08:00 · 2025-11-20 16:12:35 +08:00
parent f1e36ff2f7
commit 0edda75a56
5 changed files with 469 additions and 161 deletions
@@ -629,6 +629,7 @@ elif paddle.device.is_compiled_with_custom_device("metax_gpu"):
        "metax_ops/moe_ffn.cu",
        "metax_ops/moe_reduce.cu",
        "metax_ops/fused_moe.cu",
+        "metax_ops/apply_rope.cu",
    ]

    sources += find_end_files("gpu_ops/speculate_decoding", ".cu")