[Bugfix][RL] fix control request timeout in async update weights pipeline (#7430)

This commit is contained in:
jackyYang6
2026-04-17 16:45:33 +08:00
committed by GitHub
parent 43685a98a7
commit a729e0f729
+4 -4
View File
@@ -665,6 +665,10 @@ class EngineClient:
async def run_control_method(self, request: ControlRequest):
api_server_logger.info(f"Received control request: {request}")
request_id = request.request_id
dealer, response_queue = await self.connection_manager.get_connection(request_id)
if not envs.ZMQ_SEND_BATCH_DATA:
dealer.write([b"", request_id.encode("utf-8")])
req_dict = request.to_dict()
if envs.ZMQ_SEND_BATCH_DATA:
req_dict["zmq_worker_pid"] = self.worker_pid
@@ -672,10 +676,6 @@ class EngineClient:
self.zmq_client.send_json(req_dict)
else:
self.zmq_client.send_pyobj(req_dict)
request_id = request.request_id
dealer, response_queue = await self.connection_manager.get_connection(request_id)
if not envs.ZMQ_SEND_BATCH_DATA:
dealer.write([b"", request_id.encode("utf-8")])
try:
# todo: support user specified timeout. default 600s is enough for most control cases
response = await asyncio.wait_for(response_queue.get(), timeout=600)