[Cherry-Pick] Unify the registration name recognition for tool_parser and reasoning_parser to “-” (#4668) (#4737)
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled

* [Feature] add a new reasoning parser (#4571)

* add new reasoning_parser initial commit

* add parser file content

* add register

* ernie_test_reasoning_parser

* support <tool_call> token and add tool_parser

* add and fix unit tests

* modify reasoning_parser

* modify reasoning parser and tool parser

* modify unit tests

* modify reasoning_parser and tool_parser

* modify unit tests

* fix tool_parser

* modify the logic of reasoning_parser and tool_parser

* add and modify unit tests

* standardize code style

* simplify reasoning_parser and tool_parser

* modify unit test

* [BugFix] Fix finish reason in _create_chat_completion_choice  (#4582)

* fix n_param _create_chat_completion_choicel

* fix unit test

* fix final_res

* modify unit tests

* [BugFix] fix offline llm chat "enable_thinking" is always "False" (#4686)

* fix enable_thinking

* recover ernie4_5_vl_processor

* [Feature] Unify the registration name recognition for tool_parser and reasoning_parser to “-” (#4668)

* parser register name unify

* change ernie_x1 to ernie-x1

* change ernie4_5_vl to ernie-45-vl

* fix unit test
This commit is contained in:
kxz2002
2025-10-31 23:27:21 +08:00
committed by GitHub
parent d11e27a188
commit 24b85b752b
21 changed files with 36 additions and 34 deletions
+1 -1
View File
@@ -197,7 +197,7 @@ class Request:
guided_grammar=d.get("guided_grammar", None),
structural_tag=d.get("structural_tag", None),
guided_json_object=d.get("guided_json_object", None),
enable_thinking=d.get("enable_thinking", False),
enable_thinking=d.get("enable_thinking", None),
reasoning_max_tokens=d.get("reasoning_max_tokens", None),
trace_carrier=d.get("trace_carrier", {}),
chat_template=d.get("chat_template", None),
@@ -621,7 +621,7 @@ class OpenAIServingChat:
if output is not None and output.get("metrics") and output["metrics"].get("request_start_time"):
work_process_metrics.e2e_request_latency.observe(
time.time() - output.get("metrics").get("request_start_time")
time.time() - data.get("metrics").get("request_start_time")
)
message = ChatMessage(
role="assistant",
@@ -655,7 +655,7 @@ class OpenAIServingChat:
finish_reason = "tool_calls"
else:
finish_reason = "length"
if output.get("error_msg") is not None and "Recover" in output["error_msg"]:
if data.get("error_msg") is not None and "Recover" in data["error_msg"]:
finish_reason = "recover_stop"
return ChatCompletionResponseChoice(
@@ -95,6 +95,7 @@ class ToolParserManager:
Raise a KeyError exception if the name is not registered.
"""
name = name.replace("_", "-")
if name in cls.tool_parsers:
return cls.tool_parsers[name]
@@ -44,7 +44,7 @@ from fastdeploy.entrypoints.openai.tool_parsers.abstract_tool_parser import (
from fastdeploy.utils import data_processor_logger
@ToolParserManager.register_module("ernie_45-vl-thinking")
@ToolParserManager.register_module("ernie-45-vl-thinking")
class Ernie45VLThinkingToolParser(ToolParser):
"""
Tool parser for Ernie model version 4.5.1.
@@ -44,7 +44,7 @@ from fastdeploy.entrypoints.openai.tool_parsers.abstract_tool_parser import (
from fastdeploy.utils import data_processor_logger
@ToolParserManager.register_module("ernie_x1")
@ToolParserManager.register_module("ernie-x1")
class ErnieX1ToolParser(ToolParser):
"""
Tool parser for Ernie model version 4.5.1.
+1 -1
View File
@@ -122,7 +122,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
"FD_ENABLE_SWAP_SPACE_CLEARING": lambda: int(os.getenv("FD_ENABLE_SWAP_SPACE_CLEARING", "0")),
# enable return text, used when FD_ENABLE_INTERNAL_ADAPTER=1
"FD_ENABLE_RETURN_TEXT": lambda: bool(int(os.getenv("FD_ENABLE_RETURN_TEXT", "0"))),
# Used to truncate the string inserted during thinking when reasoning in a model. (</think> for ernie4_5_vl, \n</think>\n\n for ernie_x1)
# Used to truncate the string inserted during thinking when reasoning in a model. (</think> for ernie-45-vl, \n</think>\n\n for ernie-x1)
"FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR": lambda: os.getenv("FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR", "</think>"),
# Timeout for cache_transfer_manager process exit
"FD_CACHE_PROC_EXIT_TIMEOUT": lambda: int(os.getenv("FD_CACHE_PROC_EXIT_TIMEOUT", "600")),
+1 -1
View File
@@ -130,7 +130,7 @@ class Ernie4_5Processor(BaseDataProcessor):
if chat_template_kwargs:
if isinstance(chat_template_kwargs, dict):
for k, v in chat_template_kwargs.items():
if k not in task:
if k not in task or task[k] is None:
task[k] = v
else:
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
+1 -1
View File
@@ -245,7 +245,7 @@ class DataProcessor(BaseDataProcessor):
if chat_template_kwargs:
if isinstance(chat_template_kwargs, dict):
for k, v in chat_template_kwargs.items():
if k not in task:
if k not in task or task[k] is None:
task[k] = v
else:
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
@@ -101,7 +101,7 @@ def limit_thinking_content_length(
line_break_id: int = None,
):
if limit_strategy == "</think>":
# for ernie4_5_vl
# for ernie-45-vl
limit_thinking_content_length_v1(
sampled_token_ids,
max_think_lens,
@@ -110,7 +110,7 @@ def limit_thinking_content_length(
think_end_id,
)
elif limit_strategy == "\n</think>\n\n":
# for ernie_x1
# for ernie-x1
assert line_break_id > 0
limit_thinking_content_length_v2(
sampled_token_ids,
@@ -136,7 +136,7 @@ def speculate_limit_thinking_content_length(
line_break_id: int = None,
):
if limit_strategy == "</think>":
# for ernie4_5_vl
# for ernie-45-vl
speculate_limit_thinking_content_length_v1(
accept_tokens,
max_think_lens,
@@ -147,7 +147,7 @@ def speculate_limit_thinking_content_length(
think_end_id,
)
elif limit_strategy == "\n</think>\n\n":
# for ernie_x1
# for ernie-x1
assert line_break_id > 0
speculate_limit_thinking_content_length_v2(
accept_tokens,
@@ -125,6 +125,7 @@ class ReasoningParserManager:
Raise a KeyError exception if the name is not registered.
"""
name = name.replace("_", "-")
if name in cls.reasoning_parsers:
return cls.reasoning_parsers[name]
@@ -5,10 +5,10 @@ from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest, DeltaM
from fastdeploy.reasoning import ReasoningParser, ReasoningParserManager
@ReasoningParserManager.register_module("ernie_x1")
@ReasoningParserManager.register_module("ernie-x1")
class ErnieX1ReasoningParser(ReasoningParser):
"""
Reasoning parser for ernie_x1 model with stricter boundary checking.
Reasoning parser for ernie-x1 model with stricter boundary checking.
Unified rules:
- Do not strip newline before </think>
+2 -2
View File
@@ -203,7 +203,7 @@ def xpu_post_process(
step_idx = share_inputs["step_idx"]
limit_think_status = share_inputs["limit_think_status"]
if limit_strategy == "</think>":
# for ernie4_5_vl
# for ernie-45-vl
limit_thinking_content_length_v1(
sampled_token_ids,
max_think_lens,
@@ -212,7 +212,7 @@ def xpu_post_process(
think_end_id,
)
elif limit_strategy == "\n</think>\n\n":
# for ernie_x1
# for ernie-x1
assert line_break_id > 0
limit_thinking_content_length_v2(
sampled_token_ids,