diff --git a/benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml b/benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml
index 30a50170bd..a5bb750ba9 100644
--- a/benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml
+++ b/benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml
@@ -1,5 +1,5 @@
-reasoning-parser: ernie_x1
-tool_call_parser: ernie_x1
+reasoning-parser: ernie-x1
+tool_call_parser: ernie-x1
 tensor_parallel_size: 4
 max_model_len: 65536
 max_num_seqs: 128
diff --git a/benchmarks/yaml/x1-a3b-128k-wint8-h800-tp1.yaml b/benchmarks/yaml/x1-a3b-128k-wint8-h800-tp1.yaml
index 09236610af..4476a55a9f 100644
--- a/benchmarks/yaml/x1-a3b-128k-wint8-h800-tp1.yaml
+++ b/benchmarks/yaml/x1-a3b-128k-wint8-h800-tp1.yaml
@@ -1,7 +1,7 @@
 tensor_parallel_size: 1
 max_model_len: 131072
 max_num_seqs: 32
-reasoning_parser: ernie_x1
-tool_call_parser: ernie_x1
+reasoning_parser: ernie-x1
+tool_call_parser: ernie-x1
 load_choices: "default_v1"
 quantization: wint8
diff --git a/docs/best_practices/ERNIE-4.5-21B-A3B-Thinking.md b/docs/best_practices/ERNIE-4.5-21B-A3B-Thinking.md
index 05328ff08f..a67be76fef 100644
--- a/docs/best_practices/ERNIE-4.5-21B-A3B-Thinking.md
+++ b/docs/best_practices/ERNIE-4.5-21B-A3B-Thinking.md
@@ -33,8 +33,8 @@ python -m fastdeploy.entrypoints.openai.api_server \
        --tensor-parallel-size 1 \
        --max-model-len 131072 \
        --quantization wint8 \
-       --reasoning-parser ernie_x1 \
-       --tool-call-parser ernie_x1 \
+       --reasoning-parser ernie-x1 \
+       --tool-call-parser ernie-x1 \
        --max-num-seqs 32
 ```
 - `--quantization`: Indicates the quantization strategy used by the model. Different quantization strategies will result in different performance and accuracy of the model. It could be one of `wint8` / `wint4` / `block_wise_fp8`(Hopper is needed).
diff --git a/docs/usage/environment_variables.md b/docs/usage/environment_variables.md
index 378a80a7b6..c4c319f83a 100644
--- a/docs/usage/environment_variables.md
+++ b/docs/usage/environment_variables.md
@@ -80,7 +80,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
     # Whether to use Machete for wint4 dense GEMM.
     "FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "1"),
 
-    # Used to truncate the string inserted during thinking when reasoning in a model. (</think> for ernie4_5_vl, \n</think>\n\n for ernie_x1)
+    # Used to truncate the string inserted during thinking when reasoning in a model. (</think> for ernie-45-vl, \n</think>\n\n for ernie-x1)
     "FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR": lambda: os.getenv("FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR", "</think>"),
 
     # Timeout for cache_transfer_manager process exit
diff --git a/docs/zh/best_practices/ERNIE-4.5-21B-A3B-Thinking.md b/docs/zh/best_practices/ERNIE-4.5-21B-A3B-Thinking.md
index 0dc0db5277..c2648ceb33 100644
--- a/docs/zh/best_practices/ERNIE-4.5-21B-A3B-Thinking.md
+++ b/docs/zh/best_practices/ERNIE-4.5-21B-A3B-Thinking.md
@@ -33,8 +33,8 @@ python -m fastdeploy.entrypoints.openai.api_server \
        --tensor-parallel-size 1 \
        --max-model-len 131072 \
        --quantization wint8 \
-       --reasoning-parser ernie_x1 \
-       --tool-call-parser ernie_x1 \
+       --reasoning-parser ernie-x1 \
+       --tool-call-parser ernie-x1 \
        --max-num-seqs 32
 ```
 其中：
diff --git a/docs/zh/usage/environment_variables.md b/docs/zh/usage/environment_variables.md
index f778735eeb..b0a162a8aa 100644
--- a/docs/zh/usage/environment_variables.md
+++ b/docs/zh/usage/environment_variables.md
@@ -80,7 +80,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
     # 是否使用 Machete 后端的 wint4 GEMM.
     "FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "1"),
 
-    # Used to truncate the string inserted during thinking when reasoning in a model. (</think> for ernie4_5_vl, \n</think>\n\n for ernie_x1)
+    # Used to truncate the string inserted during thinking when reasoning in a model. (</think> for ernie-45-vl, \n</think>\n\n for ernie-x1)
     "FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR": lambda: os.getenv("FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR", "</think>"),
 
     # cache_transfer_manager 进程残留时退出等待超时时间
diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py
index 1b846d8fc0..3036f530fc 100644
--- a/fastdeploy/engine/request.py
+++ b/fastdeploy/engine/request.py
@@ -197,7 +197,7 @@ class Request:
             guided_grammar=d.get("guided_grammar", None),
             structural_tag=d.get("structural_tag", None),
             guided_json_object=d.get("guided_json_object", None),
-            enable_thinking=d.get("enable_thinking", False),
+            enable_thinking=d.get("enable_thinking", None),
             reasoning_max_tokens=d.get("reasoning_max_tokens", None),
             trace_carrier=d.get("trace_carrier", {}),
             chat_template=d.get("chat_template", None),
diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py
index b18fc51022..cf11ba8fff 100644
--- a/fastdeploy/entrypoints/openai/serving_chat.py
+++ b/fastdeploy/entrypoints/openai/serving_chat.py
@@ -621,7 +621,7 @@ class OpenAIServingChat:
 
         if output is not None and output.get("metrics") and output["metrics"].get("request_start_time"):
             work_process_metrics.e2e_request_latency.observe(
-                time.time() - output.get("metrics").get("request_start_time")
+                time.time() - data.get("metrics").get("request_start_time")
             )
         message = ChatMessage(
             role="assistant",
@@ -655,7 +655,7 @@ class OpenAIServingChat:
                 finish_reason = "tool_calls"
         else:
             finish_reason = "length"
-        if output.get("error_msg") is not None and "Recover" in output["error_msg"]:
+        if data.get("error_msg") is not None and "Recover" in data["error_msg"]:
             finish_reason = "recover_stop"
 
         return ChatCompletionResponseChoice(
diff --git a/fastdeploy/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/fastdeploy/entrypoints/openai/tool_parsers/abstract_tool_parser.py
index d6ac8f81aa..906483f445 100644
--- a/fastdeploy/entrypoints/openai/tool_parsers/abstract_tool_parser.py
+++ b/fastdeploy/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@@ -95,6 +95,7 @@ class ToolParserManager:
 
         Raise a KeyError exception if the name is not registered.
         """
+        name = name.replace("_", "-")
         if name in cls.tool_parsers:
             return cls.tool_parsers[name]
 
diff --git a/fastdeploy/entrypoints/openai/tool_parsers/ernie_45_vl_thinking_tool_parser.py b/fastdeploy/entrypoints/openai/tool_parsers/ernie_45_vl_thinking_tool_parser.py
index 131c17e6ab..1cb8c0ab71 100644
--- a/fastdeploy/entrypoints/openai/tool_parsers/ernie_45_vl_thinking_tool_parser.py
+++ b/fastdeploy/entrypoints/openai/tool_parsers/ernie_45_vl_thinking_tool_parser.py
@@ -44,7 +44,7 @@ from fastdeploy.entrypoints.openai.tool_parsers.abstract_tool_parser import (
 from fastdeploy.utils import data_processor_logger
 
 
-@ToolParserManager.register_module("ernie_45-vl-thinking")
+@ToolParserManager.register_module("ernie-45-vl-thinking")
 class Ernie45VLThinkingToolParser(ToolParser):
     """
     Tool parser for Ernie model version 4.5.1.
diff --git a/fastdeploy/entrypoints/openai/tool_parsers/ernie_x1_tool_parser.py b/fastdeploy/entrypoints/openai/tool_parsers/ernie_x1_tool_parser.py
index 14a784f174..8a14abee87 100644
--- a/fastdeploy/entrypoints/openai/tool_parsers/ernie_x1_tool_parser.py
+++ b/fastdeploy/entrypoints/openai/tool_parsers/ernie_x1_tool_parser.py
@@ -44,7 +44,7 @@ from fastdeploy.entrypoints.openai.tool_parsers.abstract_tool_parser import (
 from fastdeploy.utils import data_processor_logger
 
 
-@ToolParserManager.register_module("ernie_x1")
+@ToolParserManager.register_module("ernie-x1")
 class ErnieX1ToolParser(ToolParser):
     """
     Tool parser for Ernie model version 4.5.1.
diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py
index 05b042d7a9..d60750d6a9 100644
--- a/fastdeploy/envs.py
+++ b/fastdeploy/envs.py
@@ -122,7 +122,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "FD_ENABLE_SWAP_SPACE_CLEARING": lambda: int(os.getenv("FD_ENABLE_SWAP_SPACE_CLEARING", "0")),
     # enable return text, used when FD_ENABLE_INTERNAL_ADAPTER=1
     "FD_ENABLE_RETURN_TEXT": lambda: bool(int(os.getenv("FD_ENABLE_RETURN_TEXT", "0"))),
-    # Used to truncate the string inserted during thinking when reasoning in a model. (</think> for ernie4_5_vl, \n</think>\n\n for ernie_x1)
+    # Used to truncate the string inserted during thinking when reasoning in a model. (</think> for ernie-45-vl, \n</think>\n\n for ernie-x1)
     "FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR": lambda: os.getenv("FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR", "</think>"),
     # Timeout for cache_transfer_manager process exit
     "FD_CACHE_PROC_EXIT_TIMEOUT": lambda: int(os.getenv("FD_CACHE_PROC_EXIT_TIMEOUT", "600")),
diff --git a/fastdeploy/input/ernie4_5_processor.py b/fastdeploy/input/ernie4_5_processor.py
index e0daacdc6a..a151dbfdd6 100644
--- a/fastdeploy/input/ernie4_5_processor.py
+++ b/fastdeploy/input/ernie4_5_processor.py
@@ -130,7 +130,7 @@ class Ernie4_5Processor(BaseDataProcessor):
                 if chat_template_kwargs:
                     if isinstance(chat_template_kwargs, dict):
                         for k, v in chat_template_kwargs.items():
-                            if k not in task:
+                            if k not in task or task[k] is None:
                                 task[k] = v
                     else:
                         raise ValueError("Invalid input: chat_template_kwargs must be a dict")
diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py
index 75e068c400..45ce5dda25 100644
--- a/fastdeploy/input/text_processor.py
+++ b/fastdeploy/input/text_processor.py
@@ -245,7 +245,7 @@ class DataProcessor(BaseDataProcessor):
                 if chat_template_kwargs:
                     if isinstance(chat_template_kwargs, dict):
                         for k, v in chat_template_kwargs.items():
-                            if k not in task:
+                            if k not in task or task[k] is None:
                                 task[k] = v
                     else:
                         raise ValueError("Invalid input: chat_template_kwargs must be a dict")
diff --git a/fastdeploy/model_executor/pre_and_post_process.py b/fastdeploy/model_executor/pre_and_post_process.py
index bddb12b496..bcbd25dbf1 100644
--- a/fastdeploy/model_executor/pre_and_post_process.py
+++ b/fastdeploy/model_executor/pre_and_post_process.py
@@ -101,7 +101,7 @@ def limit_thinking_content_length(
     line_break_id: int = None,
 ):
     if limit_strategy == "</think>":
-        # for ernie4_5_vl
+        # for ernie-45-vl
         limit_thinking_content_length_v1(
             sampled_token_ids,
             max_think_lens,
@@ -110,7 +110,7 @@ def limit_thinking_content_length(
             think_end_id,
         )
     elif limit_strategy == "\n</think>\n\n":
-        # for ernie_x1
+        # for ernie-x1
         assert line_break_id > 0
         limit_thinking_content_length_v2(
             sampled_token_ids,
@@ -136,7 +136,7 @@ def speculate_limit_thinking_content_length(
     line_break_id: int = None,
 ):
     if limit_strategy == "</think>":
-        # for ernie4_5_vl
+        # for ernie-45-vl
         speculate_limit_thinking_content_length_v1(
             accept_tokens,
             max_think_lens,
@@ -147,7 +147,7 @@ def speculate_limit_thinking_content_length(
             think_end_id,
         )
     elif limit_strategy == "\n</think>\n\n":
-        # for ernie_x1
+        # for ernie-x1
         assert line_break_id > 0
         speculate_limit_thinking_content_length_v2(
             accept_tokens,
diff --git a/fastdeploy/reasoning/abs_reasoning_parsers.py b/fastdeploy/reasoning/abs_reasoning_parsers.py
index 50e01e5a9f..0f3e6e3183 100644
--- a/fastdeploy/reasoning/abs_reasoning_parsers.py
+++ b/fastdeploy/reasoning/abs_reasoning_parsers.py
@@ -125,6 +125,7 @@ class ReasoningParserManager:
 
         Raise a KeyError exception if the name is not registered.
         """
+        name = name.replace("_", "-")
         if name in cls.reasoning_parsers:
             return cls.reasoning_parsers[name]
 
diff --git a/fastdeploy/reasoning/ernie_x1_reasoning_parsers.py b/fastdeploy/reasoning/ernie_x1_reasoning_parsers.py
index 54b72a0eb5..77fc1d5ada 100644
--- a/fastdeploy/reasoning/ernie_x1_reasoning_parsers.py
+++ b/fastdeploy/reasoning/ernie_x1_reasoning_parsers.py
@@ -5,10 +5,10 @@ from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest, DeltaM
 from fastdeploy.reasoning import ReasoningParser, ReasoningParserManager
 
 
-@ReasoningParserManager.register_module("ernie_x1")
+@ReasoningParserManager.register_module("ernie-x1")
 class ErnieX1ReasoningParser(ReasoningParser):
     """
-    Reasoning parser for ernie_x1 model with stricter boundary checking.
+    Reasoning parser for ernie-x1 model with stricter boundary checking.
 
     Unified rules:
     - Do not strip newline before </think>
diff --git a/fastdeploy/worker/xpu_model_runner.py b/fastdeploy/worker/xpu_model_runner.py
index f9dad431e7..58d7ff8a45 100644
--- a/fastdeploy/worker/xpu_model_runner.py
+++ b/fastdeploy/worker/xpu_model_runner.py
@@ -203,7 +203,7 @@ def xpu_post_process(
         step_idx = share_inputs["step_idx"]
         limit_think_status = share_inputs["limit_think_status"]
         if limit_strategy == "</think>":
-            # for ernie4_5_vl
+            # for ernie-45-vl
             limit_thinking_content_length_v1(
                 sampled_token_ids,
                 max_think_lens,
@@ -212,7 +212,7 @@ def xpu_post_process(
                 think_end_id,
             )
         elif limit_strategy == "\n</think>\n\n":
-            # for ernie_x1
+            # for ernie-x1
             assert line_break_id > 0
             limit_thinking_content_length_v2(
                 sampled_token_ids,
diff --git a/tests/entrypoints/openai/test_max_streaming_tokens.py b/tests/entrypoints/openai/test_max_streaming_tokens.py
index 01b6346e03..3a772f9193 100644
--- a/tests/entrypoints/openai/test_max_streaming_tokens.py
+++ b/tests/entrypoints/openai/test_max_streaming_tokens.py
@@ -412,7 +412,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
                 "test_data": {
                     "request_id": "test_1",
                     "outputs": {
-                        "token_ids": [789],
+                        "token_ids": [123, 456, 789],
                         "text": "Edge case response",
                         "reasoning_content": None,
                         "tool_call": None,
@@ -424,7 +424,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
                     "previous_num_tokens": 1,
                 },
                 "mock_request": ChatCompletionRequest(
-                    model="test", messages=[], return_token_ids=True, max_tokens=5, n=2
+                    model="test", messages=[], return_token_ids=True, max_tokens=1, n=2
                 ),
                 "expected": {
                     "index": 1,
@@ -434,7 +434,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
                     "raw_prediction": None,
                     "num_cached_tokens": 0,
                     "num_image_tokens": 0,
-                    "finish_reason": "stop",
+                    "finish_reason": "length",
                 },
             },
         ]
diff --git a/tests/entrypoints/openai/test_serving_completion.py b/tests/entrypoints/openai/test_serving_completion.py
index d48ce4d5b7..c2a36d1855 100644
--- a/tests/entrypoints/openai/test_serving_completion.py
+++ b/tests/entrypoints/openai/test_serving_completion.py
@@ -73,9 +73,9 @@ class TestOpenAIServingCompletion(unittest.TestCase):
         self.assertTrue(serving_completion._check_master())
 
     def test_calc_finish_reason_tool_calls(self):
-        # 创建一个模拟的engine_client，并设置reasoning_parser为"ernie_x1"
+        # 创建一个模拟的engine_client，并设置reasoning_parser为"ernie-x1"
         engine_client = Mock()
-        engine_client.reasoning_parser = "ernie_x1"
+        engine_client.reasoning_parser = "ernie-x1"
         # 创建一个OpenAIServingCompletion实例
         serving_completion = OpenAIServingCompletion(engine_client, None, "pid", "ips", 360)
         # 创建一个模拟的output，并设置finish_reason为"tool_call"
@@ -86,9 +86,9 @@ class TestOpenAIServingCompletion(unittest.TestCase):
         assert result == "tool_calls"
 
     def test_calc_finish_reason_stop(self):
-        # 创建一个模拟的engine_client，并设置reasoning_parser为"ernie_x1"
+        # 创建一个模拟的engine_client，并设置reasoning_parser为"ernie-x1"
         engine_client = Mock()
-        engine_client.reasoning_parser = "ernie_x1"
+        engine_client.reasoning_parser = "ernie-x1"
         # 创建一个OpenAIServingCompletion实例
         serving_completion = OpenAIServingCompletion(engine_client, None, "pid", "ips", 360)
         # 创建一个模拟的output，并设置finish_reason为其他值
diff --git a/tests/reasoning/test_reasoning_parser.py b/tests/reasoning/test_reasoning_parser.py
index 9e06523b02..26a7457db9 100644
--- a/tests/reasoning/test_reasoning_parser.py
+++ b/tests/reasoning/test_reasoning_parser.py
@@ -91,7 +91,7 @@ class TestReasoningParserManager(unittest.TestCase):
         Test that a parser can be registered and retrieved successfully.
         Verifies normal registration and retrieval functionality.
         """
-        ReasoningParserManager.register_module(module=TestReasoningParser, name="test_parser", force=True)
+        ReasoningParserManager.register_module(module=TestReasoningParser, name="test-parser", force=True)
         parser_cls = ReasoningParserManager.get_reasoning_parser("test_parser")
         self.assertIs(parser_cls, TestReasoningParser)