mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
[XPU] Speculate Decoding + PD, benchmark fix (#6036)
* fix mtp pd * fix kernel * fix code style * fix kernel * fix test / clear debug code * fix test / clear debug code * fix codestyle * fix codestyle * fix codestyle
This commit is contained in:
@@ -574,6 +574,31 @@ DLL_EXPORT int speculate_free_and_reschedule(Context* ctx,
|
||||
const int max_decoder_block_num,
|
||||
const int max_draft_tokens);
|
||||
|
||||
DLL_EXPORT int speculate_schedule_cache(Context* ctx,
|
||||
const int64_t* draft_tokens,
|
||||
int* block_tables,
|
||||
bool* stop_flags,
|
||||
const int64_t* prompt_lens,
|
||||
int* seq_lens_this_time,
|
||||
int* seq_lens_encoder,
|
||||
int* seq_lens_decoder,
|
||||
int* step_seq_lens_decoder,
|
||||
int64_t* step_draft_tokens,
|
||||
int* step_seq_lens_this_time,
|
||||
int* accept_num,
|
||||
int64_t* accept_tokens,
|
||||
bool* is_block_step,
|
||||
bool* not_need_stop,
|
||||
const int64_t* stop_nums,
|
||||
const int real_bsz,
|
||||
const int max_bsz,
|
||||
const int max_next_step_tokens,
|
||||
const int draft_tokens_len,
|
||||
const int accept_tokens_len,
|
||||
const int block_size,
|
||||
const int block_num_per_seq,
|
||||
const bool prefill_one_step_stop);
|
||||
|
||||
DLL_EXPORT int speculate_update_v3(Context* ctx,
|
||||
int* seq_lens_encoder,
|
||||
int* seq_lens_decoder,
|
||||
|
||||
Reference in New Issue
Block a user