mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
abort requests (#6992)
This commit is contained in:
@@ -3510,3 +3510,215 @@ class TestCommonEngineAdditionalCoverage(unittest.TestCase):
|
||||
# At least one sleep call was made, confirming the inner function executed
|
||||
self.assertGreaterEqual(call_count[0], 1)
|
||||
self._detach_finalizer(eng)
|
||||
|
||||
# ── _control_abort_requests / _wait_abort_complete ───────────────
|
||||
|
||||
def _make_abort_engine(self, splitwise_role="mixed"):
|
||||
"""Create an engine wired up for abort tests."""
|
||||
extra = {}
|
||||
if splitwise_role != "mixed":
|
||||
extra["router"] = "0.0.0.0:9000"
|
||||
cfg = self._make_cfg(splitwise_role=splitwise_role, num_gpu_blocks_override=4, **extra)
|
||||
eng = self._make_engine(cfg)
|
||||
eng.llm_logger = MagicMock()
|
||||
|
||||
# data_processor with eos token
|
||||
eng.data_processor = MagicMock()
|
||||
eng.data_processor.eos_token_ids = [2]
|
||||
|
||||
# resource_manager with requests dict and abort sets
|
||||
eng.resource_manager = MagicMock()
|
||||
eng.resource_manager.requests = {}
|
||||
eng.resource_manager.waiting_abort_req_id_set = set()
|
||||
eng.resource_manager.to_be_aborted_req_id_set = set()
|
||||
eng.resource_manager.get_reqs_in_aborting = lambda: (
|
||||
eng.resource_manager.waiting_abort_req_id_set | eng.resource_manager.to_be_aborted_req_id_set
|
||||
)
|
||||
|
||||
# scheduler with requests dict and put_results
|
||||
eng.scheduler = MagicMock()
|
||||
eng.scheduler.requests = {}
|
||||
eng.scheduler.put_results = MagicMock()
|
||||
|
||||
return eng
|
||||
|
||||
def _make_fake_request(self, output_token_ids=None):
|
||||
"""Create a fake request object for abort tests."""
|
||||
req = MagicMock()
|
||||
req.output_token_ids = output_token_ids or [10, 20, 30]
|
||||
req.metrics = MagicMock()
|
||||
req.metrics.arrival_time = 1000.0
|
||||
req.metrics.inference_start_time = 1000.1
|
||||
req.metrics.engine_recv_first_token_time = 1000.2
|
||||
return req
|
||||
|
||||
def test_control_abort_requests_not_v1_raises(self):
|
||||
"""abort_requests raises when ENABLE_V1_KVCACHE_SCHEDULER is off."""
|
||||
eng = self._make_abort_engine()
|
||||
control_req = ControlRequest("ctrl-1", "abort_requests", {"abort_all": True, "req_ids": []})
|
||||
with patch("fastdeploy.engine.common_engine.envs.ENABLE_V1_KVCACHE_SCHEDULER", 0):
|
||||
with self.assertRaises(Exception) as ctx:
|
||||
eng._control_abort_requests(control_req)
|
||||
self.assertIn("only supported", str(ctx.exception))
|
||||
self._detach_finalizer(eng)
|
||||
|
||||
def test_control_abort_requests_abort_all(self):
|
||||
"""abort_all=True aborts all requests in resource_manager + scheduler."""
|
||||
eng = self._make_abort_engine()
|
||||
eng.resource_manager.requests = {"req-1_0": self._make_fake_request([10, 20])}
|
||||
eng.scheduler.requests = {"req-2_0": MagicMock(raw=self._make_fake_request([30]))}
|
||||
|
||||
control_req = ControlRequest("ctrl-1", "abort_requests", {"abort_all": True, "req_ids": []})
|
||||
|
||||
def clear_abort_sets(req_id):
|
||||
# Simulate immediate abort completion
|
||||
eng.resource_manager.waiting_abort_req_id_set.discard(req_id)
|
||||
|
||||
eng.resource_manager.add_abort_req_ids = MagicMock(side_effect=clear_abort_sets)
|
||||
|
||||
with patch("fastdeploy.engine.common_engine.envs.ENABLE_V1_KVCACHE_SCHEDULER", 1):
|
||||
result = eng._control_abort_requests(control_req)
|
||||
|
||||
self.assertEqual(len(result["aborted"]), 2)
|
||||
self.assertEqual(result["not_found"], [])
|
||||
ids = {a["request_id"] for a in result["aborted"]}
|
||||
self.assertEqual(ids, {"req-1_0", "req-2_0"})
|
||||
# put_results should have been called (not prefill)
|
||||
eng.scheduler.put_results.assert_called_once()
|
||||
self._detach_finalizer(eng)
|
||||
|
||||
def test_control_abort_requests_by_req_ids_with_suffix_match(self):
|
||||
"""req_ids match both exact and _0 suffix."""
|
||||
eng = self._make_abort_engine()
|
||||
eng.resource_manager.requests = {
|
||||
"req-A_0": self._make_fake_request([1, 2, 3]),
|
||||
"req-B": self._make_fake_request([4, 5]),
|
||||
}
|
||||
|
||||
control_req = ControlRequest(
|
||||
"ctrl-1",
|
||||
"abort_requests",
|
||||
{
|
||||
"abort_all": False,
|
||||
"req_ids": ["req-A", "req-B", "req-C"],
|
||||
},
|
||||
)
|
||||
|
||||
def clear_abort_sets(req_id):
|
||||
eng.resource_manager.waiting_abort_req_id_set.discard(req_id)
|
||||
|
||||
eng.resource_manager.add_abort_req_ids = MagicMock(side_effect=clear_abort_sets)
|
||||
|
||||
with patch("fastdeploy.engine.common_engine.envs.ENABLE_V1_KVCACHE_SCHEDULER", 1):
|
||||
result = eng._control_abort_requests(control_req)
|
||||
|
||||
aborted_ids = {a["request_id"] for a in result["aborted"]}
|
||||
self.assertIn("req-A_0", aborted_ids) # matched via _0 suffix
|
||||
self.assertIn("req-B", aborted_ids) # exact match
|
||||
self.assertEqual(result["not_found"], ["req-C"])
|
||||
self._detach_finalizer(eng)
|
||||
|
||||
def test_control_abort_requests_no_match(self):
|
||||
"""No requests found returns empty aborted and all in not_found."""
|
||||
eng = self._make_abort_engine()
|
||||
control_req = ControlRequest(
|
||||
"ctrl-1",
|
||||
"abort_requests",
|
||||
{
|
||||
"abort_all": False,
|
||||
"req_ids": ["nonexistent"],
|
||||
},
|
||||
)
|
||||
|
||||
with patch("fastdeploy.engine.common_engine.envs.ENABLE_V1_KVCACHE_SCHEDULER", 1):
|
||||
result = eng._control_abort_requests(control_req)
|
||||
|
||||
self.assertEqual(result["aborted"], [])
|
||||
self.assertEqual(result["not_found"], ["nonexistent"])
|
||||
self._detach_finalizer(eng)
|
||||
|
||||
def test_control_abort_requests_prefill_skips_wait_and_put(self):
|
||||
"""Prefill role skips _wait_abort_complete and put_results."""
|
||||
eng = self._make_abort_engine(splitwise_role="prefill")
|
||||
eng.resource_manager.requests = {"req-1_0": self._make_fake_request()}
|
||||
|
||||
control_req = ControlRequest("ctrl-1", "abort_requests", {"abort_all": True, "req_ids": []})
|
||||
eng.resource_manager.add_abort_req_ids = MagicMock()
|
||||
|
||||
with patch("fastdeploy.engine.common_engine.envs.ENABLE_V1_KVCACHE_SCHEDULER", 1):
|
||||
result = eng._control_abort_requests(control_req)
|
||||
|
||||
self.assertEqual(len(result["aborted"]), 1)
|
||||
eng.scheduler.put_results.assert_not_called()
|
||||
self._detach_finalizer(eng)
|
||||
|
||||
def test_control_abort_requests_output_token_count(self):
|
||||
"""output_token_count reflects partial_token_ids length."""
|
||||
eng = self._make_abort_engine()
|
||||
eng.resource_manager.requests = {"req-1_0": self._make_fake_request([10, 20, 30, 40, 50])}
|
||||
|
||||
control_req = ControlRequest("ctrl-1", "abort_requests", {"abort_all": True, "req_ids": []})
|
||||
|
||||
def clear_abort_sets(req_id):
|
||||
eng.resource_manager.waiting_abort_req_id_set.discard(req_id)
|
||||
|
||||
eng.resource_manager.add_abort_req_ids = MagicMock(side_effect=clear_abort_sets)
|
||||
|
||||
with patch("fastdeploy.engine.common_engine.envs.ENABLE_V1_KVCACHE_SCHEDULER", 1):
|
||||
result = eng._control_abort_requests(control_req)
|
||||
|
||||
self.assertEqual(result["aborted"][0]["output_token_count"], 5)
|
||||
self._detach_finalizer(eng)
|
||||
|
||||
def test_wait_abort_complete_immediate(self):
|
||||
"""_wait_abort_complete returns immediately when all requests already cleaned."""
|
||||
eng = self._make_abort_engine()
|
||||
# Empty abort sets → remaining is empty → returns immediately
|
||||
eng._wait_abort_complete(["req-1_0"])
|
||||
self._detach_finalizer(eng)
|
||||
|
||||
def test_wait_abort_complete_progress(self):
|
||||
"""_wait_abort_complete exits when background thread cleans up."""
|
||||
eng = self._make_abort_engine()
|
||||
eng.resource_manager.waiting_abort_req_id_set = {"req-1_0"}
|
||||
|
||||
call_count = [0]
|
||||
|
||||
def fake_sleep(s):
|
||||
call_count[0] += 1
|
||||
# Simulate background thread cleaning up after first sleep
|
||||
eng.resource_manager.waiting_abort_req_id_set.discard("req-1_0")
|
||||
|
||||
with patch("fastdeploy.engine.common_engine.time.sleep", fake_sleep):
|
||||
eng._wait_abort_complete(["req-1_0"])
|
||||
|
||||
self.assertGreaterEqual(call_count[0], 1)
|
||||
self._detach_finalizer(eng)
|
||||
|
||||
def test_wait_abort_complete_force_cleanup_stuck_in_to_be_aborted(self):
|
||||
"""Stall timeout triggers force cleanup for requests in to_be_aborted_req_id_set."""
|
||||
eng = self._make_abort_engine()
|
||||
eng.resource_manager.to_be_aborted_req_id_set = {"req-1_0"}
|
||||
|
||||
def mock_recycle(req_id):
|
||||
eng.resource_manager.to_be_aborted_req_id_set.discard(req_id)
|
||||
|
||||
eng.resource_manager.recycle_abort_task = MagicMock(side_effect=mock_recycle)
|
||||
|
||||
# Make time.time() advance past stall_timeout
|
||||
time_values = [100.0, 100.0, 102.0, 102.0, 102.0]
|
||||
time_idx = [0]
|
||||
|
||||
def fake_time():
|
||||
idx = min(time_idx[0], len(time_values) - 1)
|
||||
time_idx[0] += 1
|
||||
return time_values[idx]
|
||||
|
||||
with (
|
||||
patch("fastdeploy.engine.common_engine.time.time", fake_time),
|
||||
patch("fastdeploy.engine.common_engine.time.sleep", lambda s: None),
|
||||
):
|
||||
eng._wait_abort_complete(["req-1_0"], stall_timeout=1)
|
||||
|
||||
eng.resource_manager.recycle_abort_task.assert_called_with("req-1_0")
|
||||
self._detach_finalizer(eng)
|
||||
|
||||
Reference in New Issue
Block a user