mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Speculative Decoding] Support suffix decoding (#6403)
* support suffix decoding
This commit is contained in:
@@ -66,13 +66,11 @@ __global__ void update_attn_mask_offsets_kernel(
|
||||
attn_mask_offsets_decoder[bid] += seq_len_this_time;
|
||||
|
||||
// Speculative decoding in text_generation
|
||||
if (seq_len_this_time > 1) {
|
||||
for (int i = 0; i < decode_states_len; i++) {
|
||||
if (i < seq_len_this_time) {
|
||||
decode_states_now[i] = 0;
|
||||
} else {
|
||||
decode_states_now[i] = -1;
|
||||
}
|
||||
for (int i = 0; i < decode_states_len; i++) {
|
||||
if (i < seq_len_this_time && decode_states_now[i] != 1) {
|
||||
decode_states_now[i] = 0;
|
||||
} else {
|
||||
decode_states_now[i] = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user