[Feature] implement log channel separation and request log level system (#7190)

* feat: implement log channel separation and request log level system * fix: log system improvements based on review * add request_id to error logs, use RequestLogLevel enum, and unify logger implementation from utils to logger module
2026-04-23 00:17:25 +08:00 · 2026-04-16 15:13:05 +08:00
parent 29495b2cf1
commit 6e16438a57
52 changed files with 1956 additions and 639 deletions
@@ -22,6 +22,7 @@ from unittest.mock import MagicMock, Mock, patch
 import paddle

 from fastdeploy.engine.request import RequestMetrics, RequestOutput
+from fastdeploy.logger.request_logger import RequestLogLevel
 from fastdeploy.output.token_processor import TokenProcessor

 paddle.set_device("cpu")
@@ -295,9 +296,9 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase):
        processor.tokens_counter[task_id] = 0
        processor.tokens_counter[task2.request_id] = 0

-        # Mock llm_logger to capture the log message and envs.ENABLE_V1_KVCACHE_SCHEDULER
+        # Mock log_request to capture the log message and envs.ENABLE_V1_KVCACHE_SCHEDULER
        with (
-            patch("fastdeploy.output.token_processor.llm_logger") as mock_logger,
+            patch("fastdeploy.output.token_processor.log_request") as mock_log_request,
            patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 0),
        ):
            # Call the method
@@ -306,7 +307,11 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase):
            # In speculative decoding mode, when accept_num[i] == PREEMPTED_TOKEN_ID,
            # the code logs "sync preemption" and continues without triggering abort recycling
            # This is the expected behavior for speculative decoding mode
-            mock_logger.info.assert_any_call(f"sync preemption for request_id {task_id} done.")
+            mock_log_request.assert_any_call(
+                RequestLogLevel.STAGES,
+                message="sync preemption for request_id {request_id} done.",
+                request_id=task_id,
+            )
            # Verify that _recycle_resources was NOT called for the aborted task
            # (it may be called for other tasks like test_request_2 if they receive EOS tokens)
            for call in processor._recycle_resources.call_args_list:
@@ -21,6 +21,7 @@ from unittest.mock import MagicMock, patch
 import numpy as np

 from fastdeploy.engine.request import CompletionOutput, RequestMetrics, RequestOutput
+from fastdeploy.logger.request_logger import RequestLogLevel
 from fastdeploy.output.token_processor import TokenProcessor
 from fastdeploy.worker.output import LogprobsLists

@@ -171,16 +172,20 @@ class TestTokenProcessorLogprobs(unittest.TestCase):
        # Mock _recycle_resources to track if it's called
        self.processor._recycle_resources = MagicMock()

-        # Mock the llm_logger module and envs.ENABLE_V1_KVCACHE_SCHEDULER
+        # Mock the log_request function and envs.ENABLE_V1_KVCACHE_SCHEDULER
        with (
-            patch("fastdeploy.output.token_processor.llm_logger") as mock_logger,
+            patch("fastdeploy.output.token_processor.log_request") as mock_log_request,
            patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 1),
        ):
            # Call the method
            result = self.processor._process_batch_output_use_zmq([stream_data])

-            # Verify the recycling logic was triggered
-            mock_logger.info.assert_any_call(f"start to recycle abort request_id {task_id}")
+            # Verify the recycling logic was triggered via log_request
+            mock_log_request.assert_any_call(
+                RequestLogLevel.STAGES,
+                message="start to recycle abort request_id {request_id}",
+                request_id=task_id,
+            )
            self.processor.resource_manager.recycle_abort_task.assert_called_once_with(task_id)
            self.assertNotIn(task_id, self.processor.resource_manager.to_be_aborted_req_id_set)
            self.assertEqual(len(result), 0)  # Aborted task is skipped (continue)