mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
update
This commit is contained in:
@@ -1553,7 +1553,7 @@ class EngineService:
|
||||
else:
|
||||
new_contents.append(content)
|
||||
if new_contents:
|
||||
batch_data.append([request_id, new_contents])
|
||||
batch_data.extend(new_contents)
|
||||
|
||||
# Send all request results together in one batch
|
||||
if batch_data:
|
||||
|
||||
@@ -281,12 +281,11 @@ class ZmqOpenAIServing(OpenAIServing):
|
||||
for pr in ctx.preprocess_requests:
|
||||
dealer.write([b"", pr["request_id"].encode("utf-8")])
|
||||
while num_choices > 0:
|
||||
request_output_dicts = await asyncio.wait_for(request_output_queue.get(), timeout=60)
|
||||
for request_output_dict in request_output_dicts:
|
||||
api_server_logger.debug(f"Received RequestOutput: {request_output_dict}")
|
||||
if request_output_dict["finished"] is True:
|
||||
num_choices -= 1
|
||||
yield request_output_dict
|
||||
request_output_dict = await asyncio.wait_for(request_output_queue.get(), timeout=60)
|
||||
api_server_logger.debug(f"Received RequestOutput: {request_output_dict}")
|
||||
if request_output_dict["finished"] is True:
|
||||
num_choices -= 1
|
||||
yield request_output_dict
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error processing response: {str(e)}")
|
||||
|
||||
@@ -202,7 +202,7 @@ class DealerConnectionManager:
|
||||
async def _dispatch_batch_responses(self):
|
||||
"""
|
||||
Receive batch responses and dispatch to corresponding request queues.
|
||||
batch_data format: [[req_id, [outputs]], [req_id, [outputs]], ...]
|
||||
batch_data format: [output, output, ...] where each output contains request_id
|
||||
"""
|
||||
consecutive_errors = 0
|
||||
max_consecutive_errors = 5
|
||||
@@ -219,20 +219,15 @@ class DealerConnectionManager:
|
||||
address = f"ipc:///dev/shm/response_{self.pid}.push"
|
||||
main_process_metrics.record_zmq_stats(_zmq_metrics_stats, address)
|
||||
|
||||
# Parse request_ids (outside lock)
|
||||
parsed_items = []
|
||||
for req_id, outputs in batch_data:
|
||||
if req_id.startswith(("cmpl", "embd", "reward", "chatcmpl")):
|
||||
req_id = req_id.rsplit("_", 1)[0]
|
||||
parsed_items.append((req_id, outputs))
|
||||
|
||||
# Dispatch: dict lookup + put_nowait are both non-blocking,
|
||||
# safe to do in a single pass under lock
|
||||
# Dispatch directly: extract request_id from output and dispatch in one pass
|
||||
async with self.lock:
|
||||
for req_id, outputs in parsed_items:
|
||||
for output in batch_data:
|
||||
req_id = output.request_id
|
||||
if req_id.startswith(("cmpl", "embd", "reward", "chatcmpl")):
|
||||
req_id = req_id.rsplit("_", 1)[0]
|
||||
queue = self.request_map.get(req_id)
|
||||
if queue is not None:
|
||||
queue.put_nowait(outputs)
|
||||
queue.put_nowait(output)
|
||||
|
||||
consecutive_errors = 0
|
||||
|
||||
|
||||
@@ -316,7 +316,7 @@ class ZmqServerBase(ABC):
|
||||
def _send_batch_response(self, batch_data):
|
||||
"""
|
||||
Batch send responses for multiple requests.
|
||||
batch_data: List[[req_id, [output, ...]], ...]
|
||||
batch_data: List[output, ...] where each output contains request_id
|
||||
"""
|
||||
self._ensure_socket()
|
||||
if self.socket is None:
|
||||
@@ -325,9 +325,7 @@ class ZmqServerBase(ABC):
|
||||
try:
|
||||
# Convert outputs to dict if needed (CPU work, no lock needed)
|
||||
if not envs.ENABLE_V1_DATA_PROCESSOR:
|
||||
for req_id, outputs in batch_data:
|
||||
for i, output in enumerate(outputs):
|
||||
outputs[i] = output.to_dict()
|
||||
batch_data = [output.to_dict() for output in batch_data]
|
||||
|
||||
result = ForkingPickler.dumps(batch_data)
|
||||
result_len = len(result)
|
||||
|
||||
Reference in New Issue
Block a user