mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
6e16438a57
* feat: implement log channel separation and request log level system * fix: log system improvements based on review * add request_id to error logs, use RequestLogLevel enum, and unify logger implementation from utils to logger module
256 lines
11 KiB
Python
256 lines
11 KiB
Python
"""
|
|
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
|
|
import asyncio
|
|
import unittest
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
from fastdeploy.engine.request import RequestStatus
|
|
from fastdeploy.entrypoints.engine_client import EngineClient
|
|
|
|
|
|
class TestEngineClientAbort(unittest.TestCase):
|
|
"""Test cases for EngineClient.abort method"""
|
|
|
|
def setUp(self):
|
|
"""Set up test fixtures"""
|
|
self.loop = asyncio.new_event_loop()
|
|
asyncio.set_event_loop(self.loop)
|
|
|
|
# Create a mock FDConfig
|
|
self.mock_fd_config = MagicMock()
|
|
self.mock_fd_config.parallel_config.tensor_parallel_size = 1
|
|
self.mock_fd_config.model_config.enable_mm = False
|
|
self.mock_fd_config.model_config.max_model_len = 2048
|
|
self.mock_fd_config.model_config.enable_logprob = True
|
|
self.mock_fd_config.cache_config.enable_prefix_caching = False
|
|
self.mock_fd_config.scheduler_config.splitwise_role = "mixed"
|
|
self.mock_fd_config.limit_mm_per_prompt = 5
|
|
self.mock_fd_config.eplb_config.enable_eplb = False
|
|
self.mock_fd_config.structured_outputs_config.reasoning_parser = None
|
|
self.mock_fd_config.mm_processor_kwargs = {}
|
|
self.mock_fd_config.tool_parser = None
|
|
self.mock_fd_config.cache_config.max_processor_cache = 0
|
|
|
|
# Create EngineClient instance
|
|
with patch("fastdeploy.entrypoints.engine_client.InputPreprocessor"):
|
|
with patch("fastdeploy.entrypoints.engine_client.IPCSignal"):
|
|
with patch("fastdeploy.entrypoints.engine_client.StatefulSemaphore"):
|
|
with patch("fastdeploy.entrypoints.engine_client.DealerConnectionManager"):
|
|
with patch("fastdeploy.entrypoints.engine_client.FileLock"):
|
|
self.engine_client = EngineClient(
|
|
pid=12345, port=8000, fd_config=self.mock_fd_config, workers=1
|
|
)
|
|
|
|
def tearDown(self):
|
|
"""Clean up test fixtures"""
|
|
self.loop.close()
|
|
|
|
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
|
@patch.object(EngineClient, "_send_task")
|
|
def test_abort_single_request(self, mock_send_task):
|
|
"""Test aborting a single request"""
|
|
request_id = "test_request"
|
|
|
|
# Run the abort method
|
|
self.loop.run_until_complete(self.engine_client.abort(request_id, n=1))
|
|
|
|
# Verify _send_task was called with correct data
|
|
expected_data = {
|
|
"request_id": "test_request_0",
|
|
"status": RequestStatus.ABORT.value,
|
|
}
|
|
mock_send_task.assert_called_once_with(expected_data)
|
|
|
|
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
|
@patch.object(EngineClient, "_send_task")
|
|
def test_abort_multiple_requests(self, mock_send_task):
|
|
"""Test aborting multiple requests"""
|
|
request_id = "test_request"
|
|
n = 3
|
|
|
|
# Run the abort method
|
|
self.loop.run_until_complete(self.engine_client.abort(request_id, n=n))
|
|
|
|
# Verify _send_task was called correct number of times
|
|
self.assertEqual(mock_send_task.call_count, n)
|
|
|
|
# Verify each call had correct request_id
|
|
expected_calls = [
|
|
({"request_id": "test_request_0", "status": RequestStatus.ABORT.value},),
|
|
({"request_id": "test_request_1", "status": RequestStatus.ABORT.value},),
|
|
({"request_id": "test_request_2", "status": RequestStatus.ABORT.value},),
|
|
]
|
|
|
|
actual_calls = [call.args for call in mock_send_task.call_args_list]
|
|
self.assertEqual(actual_calls, expected_calls)
|
|
|
|
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
|
@patch.object(EngineClient, "_send_task")
|
|
def test_abort_with_existing_suffix(self, mock_send_task):
|
|
"""Test aborting request that already has _number suffix"""
|
|
request_id = "test_request_123_2"
|
|
n = 2
|
|
|
|
# Run the abort method
|
|
self.loop.run_until_complete(self.engine_client.abort(request_id, n=n))
|
|
|
|
# Verify _send_task was called correct number of times
|
|
self.assertEqual(mock_send_task.call_count, n)
|
|
|
|
# Verify each call had correct request_id (should use prefix before existing suffix)
|
|
expected_calls = [
|
|
({"request_id": "test_request_123_0", "status": RequestStatus.ABORT.value},),
|
|
({"request_id": "test_request_123_1", "status": RequestStatus.ABORT.value},),
|
|
]
|
|
|
|
actual_calls = [call.args for call in mock_send_task.call_args_list]
|
|
self.assertEqual(actual_calls, expected_calls)
|
|
|
|
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
|
@patch.object(EngineClient, "_send_task")
|
|
def test_abort_with_no_suffix(self, mock_send_task):
|
|
"""Test aborting request without _number suffix"""
|
|
request_id = "test_request_without_suffix"
|
|
n = 2
|
|
|
|
# Run the abort method
|
|
self.loop.run_until_complete(self.engine_client.abort(request_id, n=n))
|
|
|
|
# Verify _send_task was called correct number of times
|
|
self.assertEqual(mock_send_task.call_count, n)
|
|
|
|
# Verify each call had correct request_id (should use full request_id as prefix)
|
|
expected_calls = [
|
|
({"request_id": "test_request_without_suffix_0", "status": RequestStatus.ABORT.value},),
|
|
({"request_id": "test_request_without_suffix_1", "status": RequestStatus.ABORT.value},),
|
|
]
|
|
|
|
actual_calls = [call.args for call in mock_send_task.call_args_list]
|
|
self.assertEqual(actual_calls, expected_calls)
|
|
|
|
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
|
@patch.object(EngineClient, "_send_task")
|
|
def test_abort_with_zero_n(self, mock_send_task):
|
|
"""Test aborting with n=0 should not send any requests"""
|
|
request_id = "test_request_123"
|
|
|
|
# Run the abort method
|
|
self.loop.run_until_complete(self.engine_client.abort(request_id, n=0))
|
|
|
|
# Verify _send_task was not called
|
|
mock_send_task.assert_not_called()
|
|
|
|
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
|
@patch.object(EngineClient, "_send_task")
|
|
def test_abort_with_negative_n(self, mock_send_task):
|
|
"""Test aborting with negative n should not send any requests"""
|
|
request_id = "test_request_123"
|
|
|
|
# Run the abort method
|
|
self.loop.run_until_complete(self.engine_client.abort(request_id, n=-1))
|
|
|
|
# Verify _send_task was not called
|
|
mock_send_task.assert_not_called()
|
|
|
|
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", False)
|
|
@patch.object(EngineClient, "_send_task")
|
|
def test_abort_when_feature_disabled(self, mock_send_task):
|
|
"""Test abort when FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE is False"""
|
|
request_id = "test_request_123"
|
|
|
|
# Run the abort method
|
|
self.loop.run_until_complete(self.engine_client.abort(request_id, n=1))
|
|
|
|
# Verify _send_task was not called
|
|
mock_send_task.assert_not_called()
|
|
|
|
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
|
@patch.object(EngineClient, "_send_task")
|
|
def test_abort_request_id_regex_parsing(self, mock_send_task):
|
|
"""Test that request_id regex parsing works correctly for various formats"""
|
|
test_cases = [
|
|
("simple_request", "simple_request"),
|
|
("request_with_underscores", "request_with_underscores"),
|
|
("request_123", "request"),
|
|
("request_123_456", "request_123"),
|
|
("request_0", "request"),
|
|
("complex_name_123_456_789", "complex_name_123_456"),
|
|
]
|
|
|
|
for input_request_id, expected_prefix in test_cases:
|
|
with self.subTest(input_request_id=input_request_id):
|
|
mock_send_task.reset_mock()
|
|
|
|
# Run the abort method
|
|
self.loop.run_until_complete(self.engine_client.abort(input_request_id, n=1))
|
|
|
|
# Verify _send_task was called with correct prefix
|
|
expected_data = {
|
|
"request_id": f"{expected_prefix}_0",
|
|
"status": RequestStatus.ABORT.value,
|
|
}
|
|
mock_send_task.assert_called_once_with(expected_data)
|
|
|
|
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
|
@patch("fastdeploy.entrypoints.engine_client.log_request")
|
|
@patch.object(EngineClient, "_send_task")
|
|
def test_abort_logging(self, mock_send_task, mock_log_request):
|
|
"""Test that abort method logs correctly"""
|
|
request_id = "test_request"
|
|
n = 2
|
|
|
|
# Run the abort method
|
|
self.loop.run_until_complete(self.engine_client.abort(request_id, n=n))
|
|
|
|
# Verify log_request was called twice
|
|
self.assertEqual(mock_log_request.call_count, 2)
|
|
|
|
# Verify the first log message (abort start)
|
|
first_call = mock_log_request.call_args_list[0]
|
|
self.assertIn("abort request_id", first_call[1].get("message", ""))
|
|
|
|
# Verify the second log message (abort completion with request IDs)
|
|
second_call = mock_log_request.call_args_list[1]
|
|
self.assertIn("Aborted request(s)", second_call[1].get("message", ""))
|
|
|
|
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
|
@patch("fastdeploy.entrypoints.engine_client.api_server_logger")
|
|
@patch.object(EngineClient, "_send_task")
|
|
def test_abort_warning_logging_for_invalid_format(self, mock_send_task, mock_logger):
|
|
"""Test that abort method logs warning for invalid request_id format"""
|
|
request_id = "invalid_format_no_suffix" # This should actually not trigger warning
|
|
n = 1
|
|
|
|
# Run the abort method
|
|
self.loop.run_until_complete(self.engine_client.abort(request_id, n=n))
|
|
|
|
# Verify warning log was called (this case might not actually trigger warning)
|
|
# The warning is only triggered when regex doesn't match, but our test case has valid format
|
|
# Let's test with a case that should trigger warning
|
|
mock_logger.reset_mock()
|
|
|
|
# This should trigger warning because it doesn't end with _number
|
|
request_id_no_suffix = "just_a_string"
|
|
self.loop.run_until_complete(self.engine_client.abort(request_id_no_suffix, n=1))
|
|
|
|
# Should have logged warning about format error
|
|
mock_logger.warning.assert_called()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|