support dsv3 use flashmla (#6593)

2026-04-23 00:17:25 +08:00 · 2026-03-03 11:09:43 +08:00
parent 0f718baaf2
commit 3cc09418f1
5 changed files with 266 additions and 52 deletions
@@ -313,7 +313,6 @@ void GetBlockShapeAndSplitKVBlock(
  // decoder
  if (max_dec_len_this_time > 0) {
    if (mla_backend) {
-      PADDLE_ENFORCE(group_size <= 64, "now only group_size <= 64");
      const int set_chunk_size = get_mla_dec_chunk_size(bsz);

      CUDA_CHECK(cudaMemsetAsync(