[Feature] implement log channel separation and request log level system (#7190)

* feat: implement log channel separation and request log level system

* fix: log system improvements based on review

* add request_id to error logs, use RequestLogLevel enum, and unify logger implementation from utils to logger module
This commit is contained in:
zhouchong
2026-04-16 15:13:05 +08:00
committed by GitHub
parent 29495b2cf1
commit 6e16438a57
52 changed files with 1956 additions and 639 deletions
+8 -3
View File
@@ -22,6 +22,7 @@ from unittest.mock import MagicMock, Mock, patch
import paddle
from fastdeploy.engine.request import RequestMetrics, RequestOutput
from fastdeploy.logger.request_logger import RequestLogLevel
from fastdeploy.output.token_processor import TokenProcessor
paddle.set_device("cpu")
@@ -295,9 +296,9 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase):
processor.tokens_counter[task_id] = 0
processor.tokens_counter[task2.request_id] = 0
# Mock llm_logger to capture the log message and envs.ENABLE_V1_KVCACHE_SCHEDULER
# Mock log_request to capture the log message and envs.ENABLE_V1_KVCACHE_SCHEDULER
with (
patch("fastdeploy.output.token_processor.llm_logger") as mock_logger,
patch("fastdeploy.output.token_processor.log_request") as mock_log_request,
patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 0),
):
# Call the method
@@ -306,7 +307,11 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase):
# In speculative decoding mode, when accept_num[i] == PREEMPTED_TOKEN_ID,
# the code logs "sync preemption" and continues without triggering abort recycling
# This is the expected behavior for speculative decoding mode
mock_logger.info.assert_any_call(f"sync preemption for request_id {task_id} done.")
mock_log_request.assert_any_call(
RequestLogLevel.STAGES,
message="sync preemption for request_id {request_id} done.",
request_id=task_id,
)
# Verify that _recycle_resources was NOT called for the aborted task
# (it may be called for other tasks like test_request_2 if they receive EOS tokens)
for call in processor._recycle_resources.call_args_list:
@@ -21,6 +21,7 @@ from unittest.mock import MagicMock, patch
import numpy as np
from fastdeploy.engine.request import CompletionOutput, RequestMetrics, RequestOutput
from fastdeploy.logger.request_logger import RequestLogLevel
from fastdeploy.output.token_processor import TokenProcessor
from fastdeploy.worker.output import LogprobsLists
@@ -171,16 +172,20 @@ class TestTokenProcessorLogprobs(unittest.TestCase):
# Mock _recycle_resources to track if it's called
self.processor._recycle_resources = MagicMock()
# Mock the llm_logger module and envs.ENABLE_V1_KVCACHE_SCHEDULER
# Mock the log_request function and envs.ENABLE_V1_KVCACHE_SCHEDULER
with (
patch("fastdeploy.output.token_processor.llm_logger") as mock_logger,
patch("fastdeploy.output.token_processor.log_request") as mock_log_request,
patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 1),
):
# Call the method
result = self.processor._process_batch_output_use_zmq([stream_data])
# Verify the recycling logic was triggered
mock_logger.info.assert_any_call(f"start to recycle abort request_id {task_id}")
# Verify the recycling logic was triggered via log_request
mock_log_request.assert_any_call(
RequestLogLevel.STAGES,
message="start to recycle abort request_id {request_id}",
request_id=task_id,
)
self.processor.resource_manager.recycle_abort_task.assert_called_once_with(task_id)
self.assertNotIn(task_id, self.processor.resource_manager.to_be_aborted_req_id_set)
self.assertEqual(len(result), 0) # Aborted task is skipped (continue)