[BugFix][Metrics] Fix Prometheus Multiprocess Metrics Issues and Add ZMQ Communication Metrics (#5185)

* [Feature] add metrics for ZMQ and fix multiprocess metrics

* fix test_metrics.py

---------

Co-authored-by: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com>
This commit is contained in:
fl0w2o48
2025-11-27 15:05:09 +08:00
committed by GitHub
parent ce9a49f6bf
commit e63d715fc3
17 changed files with 878 additions and 280 deletions
+10
View File
@@ -17,12 +17,15 @@
import asyncio
import heapq
import random
import time
import aiozmq
import msgpack
import zmq
from fastdeploy.engine.args_utils import EngineArgs
from fastdeploy.metrics.metrics import main_process_metrics
from fastdeploy.metrics.stats import ZMQMetricsStats
from fastdeploy.utils import FlexibleArgumentParser, api_server_logger
UVICORN_CONFIG = {
@@ -122,6 +125,13 @@ class DealerConnectionManager:
try:
raw_data = await dealer.read()
response = msgpack.unpackb(raw_data[-1])
_zmq_metrics_stats = ZMQMetricsStats()
_zmq_metrics_stats.msg_recv_total += 1
if "zmq_send_time" in response:
_zmq_metrics_stats.zmq_latency = time.perf_counter() - response["zmq_send_time"]
address = dealer.transport.getsockopt(zmq.LAST_ENDPOINT)
main_process_metrics.record_zmq_stats(_zmq_metrics_stats, address)
request_id = response[-1]["request_id"]
if request_id[:4] in ["cmpl", "embd"]:
request_id = request_id.rsplit("_", 1)[0]