mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] Support stopping the inference for the corresponding request in the online service after a disconnection request. (#5320)
* request disconnect * request disconnect * fix bug * fix bug--amend --------- Co-authored-by: root <root@yq01-sys-rpm26xc1knu.yq01.baidu.com>
This commit is contained in:
@@ -58,6 +58,7 @@ class ResourceManager:
|
||||
self.req_dict = dict()
|
||||
# current batch status of the engine
|
||||
self.real_bsz = 0
|
||||
self.abort_req_ids_set = set()
|
||||
llm_logger.info(f"{self.info()}")
|
||||
main_process_metrics.max_batch_size.set(max_num_seqs)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user