[XPU] Speculate Decoding + PD, benchmark fix (#6036)

* fix mtp pd * fix kernel * fix code style * fix kernel * fix test / clear debug code * fix test / clear debug code * fix codestyle * fix codestyle * fix codestyle
2026-04-23 08:21:53 +08:00 · 2026-01-15 19:19:03 +08:00
parent 6619298b50
commit 59d8ae0a25
13 changed files with 995 additions and 31 deletions
@@ -574,6 +574,31 @@ DLL_EXPORT int speculate_free_and_reschedule(Context* ctx,
                                             const int max_decoder_block_num,
                                             const int max_draft_tokens);

+DLL_EXPORT int speculate_schedule_cache(Context* ctx,
+                                        const int64_t* draft_tokens,
+                                        int* block_tables,
+                                        bool* stop_flags,
+                                        const int64_t* prompt_lens,
+                                        int* seq_lens_this_time,
+                                        int* seq_lens_encoder,
+                                        int* seq_lens_decoder,
+                                        int* step_seq_lens_decoder,
+                                        int64_t* step_draft_tokens,
+                                        int* step_seq_lens_this_time,
+                                        int* accept_num,
+                                        int64_t* accept_tokens,
+                                        bool* is_block_step,
+                                        bool* not_need_stop,
+                                        const int64_t* stop_nums,
+                                        const int real_bsz,
+                                        const int max_bsz,
+                                        const int max_next_step_tokens,
+                                        const int draft_tokens_len,
+                                        const int accept_tokens_len,
+                                        const int block_size,
+                                        const int block_num_per_seq,
+                                        const bool prefill_one_step_stop);
+
 DLL_EXPORT int speculate_update_v3(Context* ctx,
                                   int* seq_lens_encoder,
                                   int* seq_lens_decoder,