mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix] prevent requests from entering running state without a slot (#7141)
* [fix] prevent requests from entering running state without a slot * [fix] count abort set * [fix] count preempted task in waiting list
This commit is contained in:
@@ -1152,8 +1152,7 @@ class EngineService:
|
||||
time.sleep(0.005)
|
||||
|
||||
except RuntimeError as e:
|
||||
if "cannot schedule new futures after shutdown" in str(e):
|
||||
break
|
||||
raise e
|
||||
except Exception as e:
|
||||
err_msg = "Error happened while insert task to engine: {}, {}.".format(e, str(traceback.format_exc()))
|
||||
self.llm_logger.error(err_msg)
|
||||
|
||||
@@ -941,7 +941,13 @@ class ResourceManagerV1(ResourceManager):
|
||||
if not preempted_reqs:
|
||||
skip_requests: list[Request] = []
|
||||
while self.waiting and token_budget > 0:
|
||||
if len(self.running) == self.max_num_seqs:
|
||||
if (
|
||||
len(self.running)
|
||||
+ len(self.to_be_rescheduled_request_id_set)
|
||||
+ len(self.to_be_aborted_req_id_set)
|
||||
+ sum([req.status == RequestStatus.PREEMPTED for req in self.waiting])
|
||||
>= self.max_num_seqs
|
||||
):
|
||||
break
|
||||
|
||||
request = self.waiting[0]
|
||||
|
||||
Reference in New Issue
Block a user