mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[OP][Feature] 统一 limit_thinking_content_length CUDA 算子,支持回复长度限制与注入序列 (#6493)
* Initial plan * Migrate PRs #6311, #6129, #6305 to develop and merge unit tests Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com> * fix * update * fix * fix ci * fix ci * Initial plan * test: add test_chat_with_response_max_tokens to test_EB_VL_Lite_serving.py Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com> * test: add disable-thinking case to test_chat_with_response_max_tokens Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com> * test: add both reasoning_max_tokens and response_max_tokens case Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com> * fix ci * fix ci * fix ci --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com>
This commit is contained in:
@@ -555,6 +555,15 @@ class LLMEngine:
|
||||
line_break_id = int(line_break_ids)
|
||||
if line_break_id >= 0:
|
||||
llm_logger.info(f"Get line_break_id {line_break_id} from tokenizer.")
|
||||
try:
|
||||
think_truncate_prompt_ids = self.data_processor.tokenizer.convert_tokens_to_ids(
|
||||
self.data_processor.tokenizer.tokenize(self.data_processor.tokenizer.think_truncate_prompt)
|
||||
)
|
||||
except Exception:
|
||||
think_truncate_prompt_ids = self.data_processor.tokenizer.convert_tokens_to_ids(
|
||||
self.data_processor.tokenizer.tokenize(envs.FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR)
|
||||
)
|
||||
llm_logger.info(f"Get think_truncate_prompt_ids {think_truncate_prompt_ids} from tokenizer.")
|
||||
|
||||
ports = ",".join(map(str, self.cfg.parallel_config.engine_worker_queue_port))
|
||||
ips = None
|
||||
@@ -586,6 +595,7 @@ class LLMEngine:
|
||||
f" --think_end_id {think_end_id}"
|
||||
f" --image_patch_id {image_patch_id}"
|
||||
f" --line_break_id {line_break_id}"
|
||||
f" --think_truncate_prompt_ids '{json.dumps(think_truncate_prompt_ids)}'"
|
||||
f" --speculative_config '{self.cfg.speculative_config.to_json_string()}'"
|
||||
f" --graph_optimization_config '{self.cfg.graph_opt_config.to_json_string()}'"
|
||||
f" --guided_decoding_backend {self.cfg.structured_outputs_config.guided_decoding_backend}"
|
||||
|
||||
Reference in New Issue
Block a user