mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] Support stopping the inference for the corresponding request in the online service after a disconnection request. (#5320)
* request disconnect * request disconnect * fix bug * fix bug--amend --------- Co-authored-by: root <root@yq01-sys-rpm26xc1knu.yq01.baidu.com>
This commit is contained in:
@@ -59,7 +59,11 @@ from fastdeploy.entrypoints.openai.serving_embedding import OpenAIServingEmbeddi
|
||||
from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels
|
||||
from fastdeploy.entrypoints.openai.serving_reward import OpenAIServingReward
|
||||
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
|
||||
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser
|
||||
from fastdeploy.entrypoints.openai.utils import (
|
||||
UVICORN_CONFIG,
|
||||
make_arg_parser,
|
||||
with_cancellation,
|
||||
)
|
||||
from fastdeploy.entrypoints.openai.v1.serving_chat import (
|
||||
OpenAIServingChat as OpenAIServingChatV1,
|
||||
)
|
||||
@@ -410,6 +414,7 @@ def wrap_streaming_generator(original_generator: AsyncGenerator):
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
@with_cancellation
|
||||
async def create_chat_completion(request: ChatCompletionRequest, req: Request):
|
||||
"""
|
||||
Create a chat completion for the provided prompt and parameters.
|
||||
@@ -446,6 +451,7 @@ async def create_chat_completion(request: ChatCompletionRequest, req: Request):
|
||||
|
||||
|
||||
@app.post("/v1/completions")
|
||||
@with_cancellation
|
||||
async def create_completion(request: CompletionRequest, req: Request):
|
||||
"""
|
||||
Create a completion for the provided prompt and parameters.
|
||||
|
||||
Reference in New Issue
Block a user