[Feature] Support stopping the inference for the corresponding request in the online service after a disconnection request. (#5320)

* request disconnect

* request disconnect

* fix bug

* fix bug--amend

---------

Co-authored-by: root <root@yq01-sys-rpm26xc1knu.yq01.baidu.com>
This commit is contained in:
qwes5s5
2026-01-16 11:46:13 +08:00
committed by GitHub
parent 8f035101ad
commit b2a2e11551
25 changed files with 1339 additions and 63 deletions
+7 -1
View File
@@ -59,7 +59,11 @@ from fastdeploy.entrypoints.openai.serving_embedding import OpenAIServingEmbeddi
from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels
from fastdeploy.entrypoints.openai.serving_reward import OpenAIServingReward
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser
from fastdeploy.entrypoints.openai.utils import (
UVICORN_CONFIG,
make_arg_parser,
with_cancellation,
)
from fastdeploy.entrypoints.openai.v1.serving_chat import (
OpenAIServingChat as OpenAIServingChatV1,
)
@@ -410,6 +414,7 @@ def wrap_streaming_generator(original_generator: AsyncGenerator):
@app.post("/v1/chat/completions")
@with_cancellation
async def create_chat_completion(request: ChatCompletionRequest, req: Request):
"""
Create a chat completion for the provided prompt and parameters.
@@ -446,6 +451,7 @@ async def create_chat_completion(request: ChatCompletionRequest, req: Request):
@app.post("/v1/completions")
@with_cancellation
async def create_completion(request: CompletionRequest, req: Request):
"""
Create a completion for the provided prompt and parameters.