mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] implement log channel separation and request log level system (#7190)
* feat: implement log channel separation and request log level system * fix: log system improvements based on review * add request_id to error logs, use RequestLogLevel enum, and unify logger implementation from utils to logger module
This commit is contained in:
@@ -22,6 +22,7 @@ from unittest.mock import MagicMock, Mock, patch
|
||||
import paddle
|
||||
|
||||
from fastdeploy.engine.request import RequestMetrics, RequestOutput
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel
|
||||
from fastdeploy.output.token_processor import TokenProcessor
|
||||
|
||||
paddle.set_device("cpu")
|
||||
@@ -295,9 +296,9 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase):
|
||||
processor.tokens_counter[task_id] = 0
|
||||
processor.tokens_counter[task2.request_id] = 0
|
||||
|
||||
# Mock llm_logger to capture the log message and envs.ENABLE_V1_KVCACHE_SCHEDULER
|
||||
# Mock log_request to capture the log message and envs.ENABLE_V1_KVCACHE_SCHEDULER
|
||||
with (
|
||||
patch("fastdeploy.output.token_processor.llm_logger") as mock_logger,
|
||||
patch("fastdeploy.output.token_processor.log_request") as mock_log_request,
|
||||
patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 0),
|
||||
):
|
||||
# Call the method
|
||||
@@ -306,7 +307,11 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase):
|
||||
# In speculative decoding mode, when accept_num[i] == PREEMPTED_TOKEN_ID,
|
||||
# the code logs "sync preemption" and continues without triggering abort recycling
|
||||
# This is the expected behavior for speculative decoding mode
|
||||
mock_logger.info.assert_any_call(f"sync preemption for request_id {task_id} done.")
|
||||
mock_log_request.assert_any_call(
|
||||
RequestLogLevel.STAGES,
|
||||
message="sync preemption for request_id {request_id} done.",
|
||||
request_id=task_id,
|
||||
)
|
||||
# Verify that _recycle_resources was NOT called for the aborted task
|
||||
# (it may be called for other tasks like test_request_2 if they receive EOS tokens)
|
||||
for call in processor._recycle_resources.call_args_list:
|
||||
|
||||
@@ -21,6 +21,7 @@ from unittest.mock import MagicMock, patch
|
||||
import numpy as np
|
||||
|
||||
from fastdeploy.engine.request import CompletionOutput, RequestMetrics, RequestOutput
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel
|
||||
from fastdeploy.output.token_processor import TokenProcessor
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
@@ -171,16 +172,20 @@ class TestTokenProcessorLogprobs(unittest.TestCase):
|
||||
# Mock _recycle_resources to track if it's called
|
||||
self.processor._recycle_resources = MagicMock()
|
||||
|
||||
# Mock the llm_logger module and envs.ENABLE_V1_KVCACHE_SCHEDULER
|
||||
# Mock the log_request function and envs.ENABLE_V1_KVCACHE_SCHEDULER
|
||||
with (
|
||||
patch("fastdeploy.output.token_processor.llm_logger") as mock_logger,
|
||||
patch("fastdeploy.output.token_processor.log_request") as mock_log_request,
|
||||
patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 1),
|
||||
):
|
||||
# Call the method
|
||||
result = self.processor._process_batch_output_use_zmq([stream_data])
|
||||
|
||||
# Verify the recycling logic was triggered
|
||||
mock_logger.info.assert_any_call(f"start to recycle abort request_id {task_id}")
|
||||
# Verify the recycling logic was triggered via log_request
|
||||
mock_log_request.assert_any_call(
|
||||
RequestLogLevel.STAGES,
|
||||
message="start to recycle abort request_id {request_id}",
|
||||
request_id=task_id,
|
||||
)
|
||||
self.processor.resource_manager.recycle_abort_task.assert_called_once_with(task_id)
|
||||
self.assertNotIn(task_id, self.processor.resource_manager.to_be_aborted_req_id_set)
|
||||
self.assertEqual(len(result), 0) # Aborted task is skipped (continue)
|
||||
|
||||
Reference in New Issue
Block a user