[Optimization] Improve perf for fd response token with internal adapter (#4992)

* [Optimize] Improve perf for fd response token with internal adapter

* fix

* fix bug

* fix ci

* fix ci

* fix ci

* fix ci
This commit is contained in:
chenjian
2025-11-21 19:02:03 +08:00
committed by GitHub
parent 5bcf79d780
commit 3ea1b44a58
15 changed files with 202 additions and 67 deletions
+2
View File
@@ -71,6 +71,7 @@ class MockScheduledResponse:
def __init__(self, request_output):
self.request_id = request_output.request_id
self.finished = request_output.finished
self.raw = self
# Mock LocalScheduler base class
@@ -93,6 +94,7 @@ class MockLocalScheduler:
self.ids_read_cursor = 0
self.requests_not_empty = threading.Condition()
self.responses_not_empty = threading.Condition()
self.batch_responses_per_step = list()
def calc_required_blocks(self, token_len, block_size):
return (token_len + block_size - 1) // block_size