mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] Support stopping the inference for the corresponding request in the online service after a disconnection request. (#5320)
* request disconnect * request disconnect * fix bug * fix bug--amend --------- Co-authored-by: root <root@yq01-sys-rpm26xc1knu.yq01.baidu.com>
This commit is contained in:
@@ -230,7 +230,13 @@ class OpenAIServingCompletion:
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
|
||||
except asyncio.CancelledError as e:
|
||||
await self.engine_client.abort(f"{request_id}_0", num_choices)
|
||||
error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR, code=ErrorCode.CLIENT_ABORTED)
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"OpenAIServingCompletion create_completion error: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
@@ -285,7 +291,9 @@ class OpenAIServingCompletion:
|
||||
except asyncio.TimeoutError:
|
||||
current_waiting_time += 10
|
||||
if current_waiting_time == 300:
|
||||
status, msg = self.engine_client.check_health(time_interval_threashold=envs.FD_WORKER_ALIVE_TIMEOUT)
|
||||
status, msg = self.engine_client.check_health(
|
||||
time_interval_threashold=envs.FD_WORKER_ALIVE_TIMEOUT
|
||||
)
|
||||
if not status:
|
||||
raise ValueError(f"Engine is not healthy: {msg}")
|
||||
else:
|
||||
@@ -455,7 +463,9 @@ class OpenAIServingCompletion:
|
||||
except asyncio.TimeoutError:
|
||||
current_waiting_time += 10
|
||||
if current_waiting_time == 300:
|
||||
status, msg = self.engine_client.check_health(time_interval_threashold=envs.FD_WORKER_ALIVE_TIMEOUT)
|
||||
status, msg = self.engine_client.check_health(
|
||||
time_interval_threashold=envs.FD_WORKER_ALIVE_TIMEOUT
|
||||
)
|
||||
if not status:
|
||||
raise ValueError(f"Engine is not healthy: {msg}")
|
||||
else:
|
||||
@@ -634,6 +644,10 @@ class OpenAIServingCompletion:
|
||||
yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
|
||||
|
||||
except asyncio.CancelledError as e:
|
||||
await self.engine_client.abort(f"{request_id}_0", num_choices)
|
||||
error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in completion_stream_generator: {e}, {str(traceback.format_exc())}")
|
||||
yield f"data: {ErrorResponse(error=ErrorInfo(message=str(e), code='400', type=ErrorType.INTERNAL_ERROR)).model_dump_json(exclude_unset=True)}\n\n"
|
||||
|
||||
Reference in New Issue
Block a user