mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimization] Update ZMQ server (#6735)
* add batch zmq send reaponse
* update
* Revert "update"
This reverts commit 0234a25b47.
* update
* remove lock
* fix unit test
* add unit test
* add unit test
* pre commit
* add unit test
* fix unit test
* add unit test
* fix worker>1
* update zmq_worker_pid
* fix unit test
* fix unit test
* fix unit test
* add unit test
* fix unit test
* fix first token time
* fix logprobs
* add unit test
* op
* remore debug log
---------
Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -25,6 +25,7 @@ from typing import Any, ClassVar, Generic, Optional, TypeVar, Union
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from typing_extensions import override
|
||||
|
||||
import fastdeploy.envs as envs
|
||||
from fastdeploy.engine.request import RequestOutput
|
||||
from fastdeploy.entrypoints.openai.protocol import (
|
||||
ErrorInfo,
|
||||
@@ -276,10 +277,9 @@ class ZmqOpenAIServing(OpenAIServing):
|
||||
dealer, request_output_queue = await self.engine_client.connection_manager.get_connection(
|
||||
request_id, num_choices
|
||||
)
|
||||
for pr in ctx.preprocess_requests:
|
||||
dealer.write([b"", pr["request_id"].encode("utf-8")])
|
||||
# if self.engine_client.check_model_weight_status():
|
||||
# raise ValueError("Engine is clearing model weight")
|
||||
if not envs.ZMQ_SEND_BATCH_DATA:
|
||||
for pr in ctx.preprocess_requests:
|
||||
dealer.write([b"", pr["request_id"].encode("utf-8")])
|
||||
while num_choices > 0:
|
||||
request_output_dicts = await asyncio.wait_for(request_output_queue.get(), timeout=60)
|
||||
for request_output_dict in request_output_dicts:
|
||||
|
||||
Reference in New Issue
Block a user