[XPU] Speculate Decoding + PD, benchmark fix (#6036)

* fix mtp pd

* fix kernel

* fix code style

* fix kernel

* fix test / clear debug code

* fix test / clear debug code

* fix codestyle

* fix codestyle

* fix codestyle
This commit is contained in:
cmcamdy
2026-01-15 19:19:03 +08:00
committed by GitHub
parent 6619298b50
commit 59d8ae0a25
13 changed files with 995 additions and 31 deletions
@@ -574,6 +574,31 @@ DLL_EXPORT int speculate_free_and_reschedule(Context* ctx,
const int max_decoder_block_num,
const int max_draft_tokens);
DLL_EXPORT int speculate_schedule_cache(Context* ctx,
const int64_t* draft_tokens,
int* block_tables,
bool* stop_flags,
const int64_t* prompt_lens,
int* seq_lens_this_time,
int* seq_lens_encoder,
int* seq_lens_decoder,
int* step_seq_lens_decoder,
int64_t* step_draft_tokens,
int* step_seq_lens_this_time,
int* accept_num,
int64_t* accept_tokens,
bool* is_block_step,
bool* not_need_stop,
const int64_t* stop_nums,
const int real_bsz,
const int max_bsz,
const int max_next_step_tokens,
const int draft_tokens_len,
const int accept_tokens_len,
const int block_size,
const int block_num_per_seq,
const bool prefill_one_step_stop);
DLL_EXPORT int speculate_update_v3(Context* ctx,
int* seq_lens_encoder,
int* seq_lens_decoder,