mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[MTP]support mtp chunk_prefill_v1 (#4366)
* support mtp chunk_prefill_v1 * fix mtp chunkprefill output, fix unit test * fix unit test * fix save_output
This commit is contained in:
@@ -64,7 +64,6 @@ else:
|
||||
save_output,
|
||||
save_output_topk,
|
||||
set_stop_value_multi_ends,
|
||||
speculate_clear_accept_nums,
|
||||
speculate_get_output_padding_offset,
|
||||
speculate_get_padding_offset,
|
||||
speculate_get_seq_lens_output,
|
||||
@@ -369,12 +368,13 @@ def post_process_specualate(
|
||||
model_output.accept_tokens,
|
||||
model_output.accept_num,
|
||||
model_output.not_need_stop,
|
||||
model_output.seq_lens_decoder,
|
||||
model_output.prompt_lens,
|
||||
model_output.mp_rank,
|
||||
save_each_rank,
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER,
|
||||
)
|
||||
|
||||
speculate_clear_accept_nums(model_output.accept_num, model_output.seq_lens_decoder)
|
||||
|
||||
# Update pre_ids through accept tokens
|
||||
|
||||
speculate_set_value_by_flags_and_idx(
|
||||
|
||||
Reference in New Issue
Block a user