[Speculative Decoding] Support suffix decoding (#6403)

* support suffix decoding
This commit is contained in:
GoldPancake
2026-02-26 11:42:05 +08:00
committed by GitHub
parent 6d3fede240
commit 2178f2829b
18 changed files with 587 additions and 30 deletions
@@ -66,13 +66,11 @@ __global__ void update_attn_mask_offsets_kernel(
attn_mask_offsets_decoder[bid] += seq_len_this_time;
// Speculative decoding in text_generation
if (seq_len_this_time > 1) {
for (int i = 0; i < decode_states_len; i++) {
if (i < seq_len_this_time) {
decode_states_now[i] = 0;
} else {
decode_states_now[i] = -1;
}
for (int i = 0; i < decode_states_len; i++) {
if (i < seq_len_this_time && decode_states_now[i] != 1) {
decode_states_now[i] = 0;
} else {
decode_states_now[i] = -1;
}
}
}