mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix][Metrics] Fix Prometheus Multiprocess Metrics Issues and Add ZMQ Communication Metrics (#5185)
* [Feature] add metrics for ZMQ and fix multiprocess metrics * fix test_metrics.py --------- Co-authored-by: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com>
This commit is contained in:
@@ -30,7 +30,6 @@ from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.responses import JSONResponse, Response, StreamingResponse
|
||||
from gunicorn.app.base import BaseApplication
|
||||
from opentelemetry import trace
|
||||
from prometheus_client import CONTENT_TYPE_LATEST
|
||||
|
||||
from fastdeploy.engine.args_utils import EngineArgs
|
||||
from fastdeploy.engine.engine import LLMEngine
|
||||
@@ -58,11 +57,8 @@ from fastdeploy.entrypoints.openai.serving_reward import OpenAIServingReward
|
||||
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
|
||||
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser
|
||||
from fastdeploy.envs import environment_variables
|
||||
from fastdeploy.metrics.metrics import (
|
||||
EXCLUDE_LABELS,
|
||||
get_filtered_metrics,
|
||||
main_process_metrics,
|
||||
)
|
||||
from fastdeploy.metrics.metrics import get_filtered_metrics
|
||||
from fastdeploy.metrics.metrics_middleware import PrometheusMiddleware
|
||||
from fastdeploy.metrics.trace_util import (
|
||||
fd_start_span,
|
||||
inject_to_metadata,
|
||||
@@ -271,6 +267,9 @@ env_tokens = env_api_key_func() if env_api_key_func else []
|
||||
if tokens := [key for key in (args.api_key or env_tokens) if key]:
|
||||
app.add_middleware(AuthenticationMiddleware, tokens)
|
||||
|
||||
# add middleware for http metrics
|
||||
app.add_middleware(PrometheusMiddleware)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def connection_manager():
|
||||
@@ -586,11 +585,8 @@ async def metrics():
|
||||
"""
|
||||
metrics
|
||||
"""
|
||||
metrics_text = get_filtered_metrics(
|
||||
EXCLUDE_LABELS,
|
||||
extra_register_func=lambda reg: main_process_metrics.register_all(reg, workers=args.workers),
|
||||
)
|
||||
return Response(metrics_text, media_type=CONTENT_TYPE_LATEST)
|
||||
metrics_text = get_filtered_metrics()
|
||||
return Response(metrics_text, media_type="text/plain")
|
||||
|
||||
|
||||
@metrics_app.get("/config-info")
|
||||
@@ -629,21 +625,11 @@ def launch_metrics_server():
|
||||
if not is_port_available(args.host, args.metrics_port):
|
||||
raise Exception(f"The parameter `metrics_port`:{args.metrics_port} is already in use.")
|
||||
|
||||
# Move setting prometheus directory to fastdeploy/__init__.py
|
||||
# prom_dir = cleanup_prometheus_files(True)
|
||||
# os.environ["PROMETHEUS_MULTIPROC_DIR"] = prom_dir
|
||||
metrics_server_thread = threading.Thread(target=run_metrics_server, daemon=True)
|
||||
metrics_server_thread.start()
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
# NOTE: This is commented out since PROMETHEUS_MULTIPROC_DIR is already set up in fastdeploy/__init__.py
|
||||
# def setup_metrics_environment():
|
||||
# """Prepare Prometheus multiprocess directory before starting API workers."""
|
||||
# prom_dir = cleanup_prometheus_files(True)
|
||||
# os.environ["PROMETHEUS_MULTIPROC_DIR"] = prom_dir
|
||||
|
||||
|
||||
controller_app = FastAPI()
|
||||
|
||||
|
||||
@@ -756,7 +742,6 @@ def main():
|
||||
launch_metrics_server()
|
||||
console_logger.info(f"Launching metrics service at http://{args.host}:{args.metrics_port}/metrics")
|
||||
else:
|
||||
# setup_metrics_environment()
|
||||
console_logger.info(f"Launching metrics service at http://{args.host}:{args.port}/metrics")
|
||||
console_logger.info(f"Launching chat completion service at http://{args.host}:{args.port}/v1/chat/completions")
|
||||
console_logger.info(f"Launching completion service at http://{args.host}:{args.port}/v1/completions")
|
||||
|
||||
@@ -39,7 +39,7 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
UsageInfo,
|
||||
)
|
||||
from fastdeploy.entrypoints.openai.response_processors import ChatResponseProcessor
|
||||
from fastdeploy.metrics.work_metrics import work_process_metrics
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.trace.constants import LoggingEventName
|
||||
from fastdeploy.trace.trace_logger import print as trace_print
|
||||
from fastdeploy.utils import (
|
||||
@@ -382,7 +382,7 @@ class OpenAIServingChat:
|
||||
)
|
||||
if res["finished"]:
|
||||
num_choices -= 1
|
||||
work_process_metrics.e2e_request_latency.observe(
|
||||
main_process_metrics.e2e_request_latency.observe(
|
||||
time.time() - res["metrics"]["request_start_time"]
|
||||
)
|
||||
if previous_num_tokens[idx] != max_tokens:
|
||||
@@ -631,7 +631,7 @@ class OpenAIServingChat:
|
||||
output = data["outputs"]
|
||||
|
||||
if output is not None and output.get("metrics") and output["metrics"].get("request_start_time"):
|
||||
work_process_metrics.e2e_request_latency.observe(
|
||||
main_process_metrics.e2e_request_latency.observe(
|
||||
time.time() - data.get("metrics").get("request_start_time")
|
||||
)
|
||||
message = ChatMessage(
|
||||
|
||||
@@ -17,12 +17,15 @@
|
||||
import asyncio
|
||||
import heapq
|
||||
import random
|
||||
import time
|
||||
|
||||
import aiozmq
|
||||
import msgpack
|
||||
import zmq
|
||||
|
||||
from fastdeploy.engine.args_utils import EngineArgs
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.metrics.stats import ZMQMetricsStats
|
||||
from fastdeploy.utils import FlexibleArgumentParser, api_server_logger
|
||||
|
||||
UVICORN_CONFIG = {
|
||||
@@ -122,6 +125,13 @@ class DealerConnectionManager:
|
||||
try:
|
||||
raw_data = await dealer.read()
|
||||
response = msgpack.unpackb(raw_data[-1])
|
||||
_zmq_metrics_stats = ZMQMetricsStats()
|
||||
_zmq_metrics_stats.msg_recv_total += 1
|
||||
if "zmq_send_time" in response:
|
||||
_zmq_metrics_stats.zmq_latency = time.perf_counter() - response["zmq_send_time"]
|
||||
address = dealer.transport.getsockopt(zmq.LAST_ENDPOINT)
|
||||
main_process_metrics.record_zmq_stats(_zmq_metrics_stats, address)
|
||||
|
||||
request_id = response[-1]["request_id"]
|
||||
if request_id[:4] in ["cmpl", "embd"]:
|
||||
request_id = request_id.rsplit("_", 1)[0]
|
||||
|
||||
Reference in New Issue
Block a user