[Optimize] Optimize ttft for ep (#6098)

* optimize ttft

* fix

* fix

* fix ci

* fix ci

* fix

* fix bug

* fix

* add comments

* fix ci

* fix
This commit is contained in:
chenjian
2026-02-04 15:03:29 +08:00
committed by GitHub
parent 6e96bd0bd2
commit 90db0bdd0d
10 changed files with 118 additions and 142 deletions
-26
View File
@@ -411,32 +411,6 @@ class TestDPLocalScheduler(unittest.TestCase):
self.assertEqual(scheduler.ids, ["fresh_req"])
self.assertEqual(scheduler.ids_read_cursor, 1)
def test_get_requests_insufficient_resources(self):
"""Test getting requests when resources are insufficient."""
mock_logger.reset_mock()
# Test with insufficient blocks - mock the condition variable to avoid threading issues
with patch.object(self.scheduler, "requests_not_empty"):
requests = self.scheduler.get_requests(
available_blocks=5, block_size=16, reserved_output_blocks=10, max_num_batched_tokens=1024, batch=1
)
self.assertEqual(requests, [])
# The logger should have been called for insufficient resources
self.assertTrue(mock_logger.debug.called)
# Check the message contains expected content
call_args = mock_logger.debug.call_args[0][0]
self.assertIn("insufficient", call_args.lower())
def test_get_requests_insufficient_batch(self):
"""Test getting requests when batch size is insufficient."""
with patch.object(self.scheduler, "requests_not_empty"):
requests = self.scheduler.get_requests(
available_blocks=20, block_size=16, reserved_output_blocks=10, max_num_batched_tokens=1024, batch=0
)
self.assertEqual(requests, [])
@patch("time.time")
@patch.object(dp_scheduler_module, "envs")
def test_get_requests_no_requests_available(self, mock_envs, mock_time):