[Feature] Support mtp overlap schedule (#7001)

This commit is contained in:
sunxin
2026-04-01 14:24:26 +08:00
committed by GitHub
parent c6f0c5c3a6
commit c29e86fc9d
23 changed files with 215 additions and 138 deletions
+1
View File
@@ -62,6 +62,7 @@ __global__ void RebuildAppendPaddingKernel(T *output_data,
i += gridDim.x * blockDim.x * VecSize) {
const int out_token_id = i / dim_embed;
const int bi = batch_id_per_token_output[out_token_id];
if (bi < 0) continue;
if (seq_len_this_time[bi] == 0) continue;
if (seq_len_decoder[bi] == 0 && seq_len_encoder[bi] == 0) continue;