[Feature] Support stopping the inference for the corresponding request in the online service after a disconnection request. (#5320)

* request disconnect

* request disconnect

* fix bug

* fix bug--amend

---------

Co-authored-by: root <root@yq01-sys-rpm26xc1knu.yq01.baidu.com>
This commit is contained in:
qwes5s5
2026-01-16 11:46:13 +08:00
committed by GitHub
parent 8f035101ad
commit b2a2e11551
25 changed files with 1339 additions and 63 deletions
+54
View File
@@ -15,6 +15,7 @@
"""
import asyncio
import functools
import heapq
import random
import time
@@ -22,6 +23,7 @@ from multiprocessing.reduction import ForkingPickler
import aiozmq
import zmq
from fastapi import Request
from fastdeploy.engine.args_utils import EngineArgs
from fastdeploy.metrics.metrics import main_process_metrics
@@ -253,3 +255,55 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser = EngineArgs.add_cli_args(parser)
return parser
async def listen_for_disconnect(request: Request) -> None:
"""Returns if a disconnect message is received"""
while True:
message = await request.receive()
if message["type"] == "http.disconnect":
break
def with_cancellation(handler_func):
"""Decorator that allows a route handler to be cancelled by client
disconnections.
This does _not_ use request.is_disconnected, which does not work with
middleware. Instead this follows the pattern from
starlette.StreamingResponse, which simultaneously awaits on two tasks- one
to wait for an http disconnect message, and the other to do the work that we
want done. When the first task finishes, the other is cancelled.
A core assumption of this method is that the body of the request has already
been read. This is a safe assumption to make for fastapi handlers that have
already parsed the body of the request into a pydantic model for us.
This decorator is unsafe to use elsewhere, as it will consume and throw away
all incoming messages for the request while it looks for a disconnect
message.
In the case where a `StreamingResponse` is returned by the handler, this
wrapper will stop listening for disconnects and instead the response object
will start listening for disconnects.The response object will only correctly
listen when the ASGI protocol version used by Uvicorn is less than 2.4(Excluding 2.4).
"""
# Functools.wraps is required for this wrapper to appear to fastapi as a
# normal route handler, with the correct request type hinting.
@functools.wraps(handler_func)
async def wrapper(*args, **kwargs):
# The request is either the second positional arg or `raw_request`
request = args[1] if len(args) > 1 else kwargs["req"]
handler_task = asyncio.create_task(handler_func(*args, **kwargs))
cancellation_task = asyncio.create_task(listen_for_disconnect(request))
done, pending = await asyncio.wait([handler_task, cancellation_task], return_when=asyncio.FIRST_COMPLETED)
for task in pending:
task.cancel()
if handler_task in done:
return handler_task.result()
return None
return wrapper