support dsv3 use flashmla (#6593)

This commit is contained in:
周周周
2026-03-03 11:09:43 +08:00
committed by GitHub
parent 0f718baaf2
commit 3cc09418f1
5 changed files with 266 additions and 52 deletions
@@ -313,7 +313,6 @@ void GetBlockShapeAndSplitKVBlock(
// decoder
if (max_dec_len_this_time > 0) {
if (mla_backend) {
PADDLE_ENFORCE(group_size <= 64, "now only group_size <= 64");
const int set_chunk_size = get_mla_dec_chunk_size(bsz);
CUDA_CHECK(cudaMemsetAsync(