mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
support dsv3 use flashmla (#6593)
This commit is contained in:
@@ -313,7 +313,6 @@ void GetBlockShapeAndSplitKVBlock(
|
||||
// decoder
|
||||
if (max_dec_len_this_time > 0) {
|
||||
if (mla_backend) {
|
||||
PADDLE_ENFORCE(group_size <= 64, "now only group_size <= 64");
|
||||
const int set_chunk_size = get_mla_dec_chunk_size(bsz);
|
||||
|
||||
CUDA_CHECK(cudaMemsetAsync(
|
||||
|
||||
Reference in New Issue
Block a user