[BugFix] prevent requests from entering running state without a slot (#7141)

* [fix] prevent requests from entering running state without a slot

* [fix] count abort set

* [fix] count preempted task in waiting list
This commit is contained in:
Yonghua Li
2026-04-03 14:07:57 +08:00
committed by GitHub
parent 0ce85190db
commit 3b8dac3b97
2 changed files with 8 additions and 3 deletions
+1 -2
View File
@@ -1152,8 +1152,7 @@ class EngineService:
time.sleep(0.005)
except RuntimeError as e:
if "cannot schedule new futures after shutdown" in str(e):
break
raise e
except Exception as e:
err_msg = "Error happened while insert task to engine: {}, {}.".format(e, str(traceback.format_exc()))
self.llm_logger.error(err_msg)
@@ -941,7 +941,13 @@ class ResourceManagerV1(ResourceManager):
if not preempted_reqs:
skip_requests: list[Request] = []
while self.waiting and token_budget > 0:
if len(self.running) == self.max_num_seqs:
if (
len(self.running)
+ len(self.to_be_rescheduled_request_id_set)
+ len(self.to_be_aborted_req_id_set)
+ sum([req.status == RequestStatus.PREEMPTED for req in self.waiting])
>= self.max_num_seqs
):
break
request = self.waiting[0]