mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Metax] adapt to the latest develop (#6282)
This commit is contained in:
@@ -56,11 +56,23 @@ elif current_platform.is_maca():
|
||||
limit_thinking_content_length_v1,
|
||||
limit_thinking_content_length_v2,
|
||||
save_output,
|
||||
save_output_topk,
|
||||
set_stop_value_multi_ends,
|
||||
speculate_get_output_padding_offset,
|
||||
speculate_get_padding_offset,
|
||||
speculate_get_seq_lens_output,
|
||||
speculate_limit_thinking_content_length_v1,
|
||||
speculate_limit_thinking_content_length_v2,
|
||||
speculate_save_output,
|
||||
speculate_save_output_topk,
|
||||
speculate_set_stop_value_multi_seqs,
|
||||
speculate_set_value_by_flags_and_idx,
|
||||
speculate_step_paddle,
|
||||
speculate_step_reschedule,
|
||||
speculate_step_system_cache,
|
||||
speculate_update,
|
||||
step_paddle,
|
||||
step_reschedule,
|
||||
step_system_cache,
|
||||
update_inputs,
|
||||
update_inputs_v1,
|
||||
@@ -515,7 +527,7 @@ def post_process_specualate(
|
||||
share_inputs["preempted_idx"],
|
||||
model_output.mp_rank,
|
||||
save_each_rank,
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER,
|
||||
bool(envs.ENABLE_V1_KVCACHE_SCHEDULER),
|
||||
)
|
||||
else:
|
||||
speculate_save_output_topk(
|
||||
|
||||
Reference in New Issue
Block a user