[Feature] Support stopping the inference for the corresponding request in the online service after a disconnection request. (#5320)

* request disconnect * request disconnect * fix bug * fix bug--amend --------- Co-authored-by: root <root@yq01-sys-rpm26xc1knu.yq01.baidu.com>
2026-04-23 08:21:53 +08:00 · 2026-01-16 11:46:13 +08:00
parent 8f035101ad
commit b2a2e11551
25 changed files with 1339 additions and 63 deletions
@@ -153,6 +153,60 @@ class TestTokenProcessorLogprobs(unittest.TestCase):

        self.assertEqual(len(result), 0)

+    def test_process_batch_output_use_zmq_aborted_task_negative_token(self):
+        """Test aborted task receiving negative token triggers recycling logic"""
+        # Set up task as aborted
+        task_id = "test_aborted_request"
+        self.task_mock.request_id = task_id
+        self.processor.resource_manager.abort_req_ids_set = {task_id}
+
+        # Create stream data with negative token
+        stream_data = MagicMock()
+        stream_data.tokens = np.array([1, 2, -1])  # Last token is negative
+        stream_data.batch_id = 0
+
+        # Mock _recycle_resources to track if it's called
+        self.processor._recycle_resources = MagicMock()
+
+        # Mock the llm_logger module and envs.ENABLE_V1_KVCACHE_SCHEDULER
+        with (
+            patch("fastdeploy.output.token_processor.llm_logger") as mock_logger,
+            patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 0),
+        ):
+            # Call the method
+            result = self.processor._process_batch_output_use_zmq([stream_data])
+
+            # Verify the recycling logic was triggered
+            mock_logger.info.assert_any_call(f"Aborted task {task_id} received negative token. Recycling.")
+            self.processor._recycle_resources.assert_called_once_with(task_id, 0, self.task_mock)
+            self.assertNotIn(task_id, self.processor.resource_manager.abort_req_ids_set)
+            self.assertEqual(len(result), 1)  # Should return abort result
+            self.assertEqual(result[0].finished, True)
+            self.assertEqual(result[0].error_code, 499)
+            self.assertIn("aborted", result[0].error_msg.lower())
+
+    def test_process_batch_output_use_zmq_non_aborted_task_negative_token(self):
+        """Test non-aborted task receiving negative token does not trigger recycling"""
+        # Set up task as not aborted
+        task_id = "test_normal_request"
+        self.task_mock.request_id = task_id
+        self.processor.resource_manager.abort_req_ids_set = set()  # Empty set
+
+        # Create stream data with negative token
+        stream_data = MagicMock()
+        stream_data.tokens = np.array([1, 2, -1])  # Last token is negative
+        stream_data.batch_id = 0
+
+        # Mock _recycle_resources to track if it's called
+        self.processor._recycle_resources = MagicMock()
+
+        # Call the method
+        self.processor._process_batch_output_use_zmq([stream_data])
+
+        # Verify recycling logic was NOT triggered
+        self.processor._recycle_resources.assert_not_called()
+        self.processor.llm_logger.info.assert_not_called()
+

 if __name__ == "__main__":
    unittest.main()