diff --git a/docs/observability/README.md b/docs/observability/README.md
new file mode 100644
index 0000000000..10246f2904
--- /dev/null
+++ b/docs/observability/README.md
@@ -0,0 +1,173 @@
+## Observability Example Configuration (`examples/observability`)
+
+This directory provides a complete, Docker Compose–based observability example environment, including:
+
+* **Prometheus**: Metrics collection
+* **Grafana**: Metrics visualization
+* **OpenTelemetry Collector**: Distributed tracing data ingestion and processing
+
+Developers can use this example to **launch a local monitoring and tracing system with a single command**.
+
+---
+
+### Prerequisites
+
+Please make sure the following components are installed in advance:
+
+* Docker
+* Docker Compose (or a newer Docker CLI version that supports `docker compose`)
+
+---
+
+### Usage
+
+#### Start All Services
+
+Enter the directory:
+
+```bash
+cd examples/observability
+```
+
+Run the following command to start the complete monitoring and tracing stack:
+
+```bash
+docker compose -f docker-compose.yaml up -d
+```
+
+After startup, you can access:
+
+* **Prometheus**: [http://localhost:9090](http://localhost:9090)
+* **Grafana**: [http://localhost:3000](http://localhost:3000)
+* **OTLP receiver**: Applications should send traces to the default ports of the OTel Collector (usually `4317` or `4318`)
+
+ * gRPC: `4317`
+ * HTTP: `4318`
+* **Jaeger UI**: [http://localhost:16886](http://localhost:16886)
+
+**Notes:**
+
+* Update the Prometheus scrape targets to match your actual application endpoints.
+* Map Grafana’s service port to a port that is accessible on your machine.
+* Map the Jaeger UI port to a port that is accessible on your machine.
+* When starting the full stack, there is no need to start individual sub-services separately.
+
+---
+
+#### Start Metrics Services Only
+
+Enter the directory:
+
+```bash
+cd examples/observability/metrics
+```
+
+Run the following command:
+
+```bash
+docker compose -f prometheus_compose.yaml up -d
+```
+
+After startup, you can access:
+
+* **Grafana**: [http://localhost:3000](http://localhost:3000)
+
+---
+
+#### Start Tracing Services Only
+
+Enter the directory:
+
+```bash
+cd examples/observability/tracing
+```
+
+Run the following command:
+
+```bash
+docker compose -f tracing_compose.yaml up -d
+```
+
+After startup, you can access:
+
+* **OTLP receiver**: Applications should send traces to the default ports of the OTel Collector (usually `4317` or `4318`)
+
+ * gRPC: `4317`
+ * HTTP: `4318`
+* **Jaeger UI**: [http://localhost:16886](http://localhost:16886)
+
+---
+
+### Directory Structure and File Descriptions
+
+#### Core Startup File
+
+| File Name | Purpose | Description |
+| --------------------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `docker-compose.yaml` | Main entry | Defines and starts the full observability stack (Prometheus, Grafana, OTel Collector, and Jaeger). This is the single entry point to launch the entire environment. |
+
+---
+
+#### Metrics and Monitoring Configuration
+
+| File / Directory | Purpose | Description |
+| --------------------------------------------------- | ------------------------ | ------------------------------------------------------------------------------------------------------------------------- |
+| `metrics` | Metrics root directory | Contains all Prometheus- and metrics-related configurations. |
+| `prometheus.yaml` | Prometheus main config | Defines scrape targets, global scrape parameters, and optional recording rules. All monitored endpoints are defined here. |
+| `prometheus_compose.yaml` | Prometheus Docker config | Defines the Prometheus container, volume mounts, and network settings. |
+| `grafana/datasources/datasource.yaml` | Datasource configuration | Configures how Grafana connects to Prometheus. |
+| `grafana/dashboards/config/dashboard.yaml` | Dashboard provisioning | Specifies the locations of dashboard JSON files to be loaded. |
+| `grafana/dashboards/json/fastdeploy-dashboard.json` | Dashboard definition | Contains visualization layouts and queries for `fastdeploy` monitoring metrics. |
+
+---
+
+#### Distributed Tracing Configuration
+
+| File / Directory | Purpose | Description |
+| ------------------------------------------------------------------------------- | ---------------------- | ---------------------------------------------------------------------- |
+| `tracing` | Tracing root directory | Contains all configurations related to distributed tracing. |
+| `opentelemetry.yaml` | OTel Collector config | Defines the Collector data pipelines: |
+| • **receivers**: receive OTLP data (traces, metrics, logs) | | |
+| • **processors**: data processing and batching | | |
+| • **exporters**: export data to tracing backends (such as Jaeger) or files | | |
+| • **extensions**: health check, pprof, and zpages | | |
+| • **pipelines**: define complete processing flows for traces, metrics, and logs | | |
+| `tracing_compose.yaml` | Tracing Docker config | Defines the container configuration for the OTel Collector and Jaeger. |
+
+---
+
+### Customization
+
+#### 4.1 Modify Metrics Scrape Targets
+
+If your application’s metrics endpoint, port, or path changes, edit:
+
+```plain
+metrics/prometheus.yaml
+```
+
+---
+
+#### 4.2 Adjust Tracing Sampling Rate or Processing Logic
+
+Edit:
+
+```plain
+tracing/opentelemetry.yaml
+```
+
+---
+
+#### 4.3 Add Custom Grafana Dashboards
+
+1. Add the new dashboard JSON file to:
+
+```plain
+grafana/dashboards/json/
+```
+
+2. Register the dashboard so Grafana can load it automatically by editing:
+
+```plain
+grafana/dashboards/config/dashboard.yaml
+```
diff --git a/docs/observability/trace.md b/docs/observability/trace.md
new file mode 100644
index 0000000000..ca7501ab0b
--- /dev/null
+++ b/docs/observability/trace.md
@@ -0,0 +1,202 @@
+# FastDeploy Tracing with OpenTelemetry
+
+**FastDeploy** exports request tracing data through the **OpenTelemetry Collector**.
+Tracing can be enabled when starting the server using the `--trace-enable` flag, and the OpenTelemetry Collector endpoint can be configured via `--otlp-traces-endpoint`.
+
+---
+
+## Setup Guide
+
+### 1. Install Dependencies
+
+```bash
+# Manual installation
+pip install opentelemetry-sdk \
+ opentelemetry-api \
+ opentelemetry-exporter-otlp \
+ opentelemetry-exporter-otlp-proto-grpc
+```
+
+---
+
+### 2. Start OpenTelemetry Collector and Jaeger
+
+```bash
+docker compose -f examples/observability/tracing/tracing_compose.yaml up -d
+```
+
+---
+
+### 3. Start FastDeploy Server with Tracing Enabled
+
+#### Configure FastDeploy Environment Variables
+
+```shell
+# Enable tracing
+"TRACES_ENABLE": "true",
+
+# Service name
+"FD_SERVICE_NAME": "FastDeploy",
+
+# Instance name
+"FD_HOST_NAME": "trace_test",
+
+# Exporter type
+"TRACES_EXPORTER": "otlp",
+
+# OTLP endpoint:
+# gRPC: 4317
+# HTTP: 4318
+"EXPORTER_OTLP_ENDPOINT": "http://localhost:4317",
+
+# Optional headers
+"EXPORTER_OTLP_HEADERS": "Authentication=Txxxxx",
+
+# Export protocol
+"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "grpc",
+```
+
+#### Start FastDeploy
+
+Start the FastDeploy server with the above configuration and ensure that tracing is enabled.
+
+---
+
+### 4. Send Requests and View Traces
+
+* Open the **Jaeger UI** in your browser (port `16686`) to visualize request traces.
+* The OpenTelemetry Collector will also export the trace data to a local file:
+
+```plain
+/tmp/otel_trace.json
+```
+
+---
+
+## Adding Tracing to Your Own Code
+
+FastDeploy already inserts tracing points at most critical execution stages.
+Developers can use the APIs provided in `trace.py` to add more fine-grained tracing.
+
+---
+
+### 4.1 Initialize Tracing
+
+Each **process** involved in tracing must call:
+
+```python
+process_tracing_init()
+```
+
+Each **thread** that participates in a traced request must call:
+
+```python
+trace_set_thread_info("thread_label", tp_rank, dp_rank)
+```
+
+* `thread_label`: identifier used for visual distinction of threads.
+* `tp_rank` / `dp_rank`: optional values to label tensor parallelism or data parallelism ranks.
+
+---
+
+### 4.2 Mark Request Start and Finish
+
+```python
+trace_req_start(rid, bootstrap_room, ts, role)
+trace_req_finish(rid, ts, attrs)
+```
+
+* Creates both a **Bootstrap Room Span** and a **Root Span**.
+* Supports inheritance from spans created by the **FastAPI Instrumentor** (context copying).
+* `attrs` can be used to attach additional attributes to the request span.
+
+---
+
+### 4.3 Add Tracing for Slices
+
+#### Standard Slice
+
+```python
+trace_slice_start("slice_name", rid)
+trace_slice_end("slice_name", rid)
+```
+
+#### Mark Thread Completion
+
+The last slice in a thread can mark the thread span as finished:
+
+```python
+trace_slice_end("slice_name", rid, thread_finish_flag=True)
+```
+
+---
+
+### 4.4 Trace Context Propagation Across Threads
+
+#### Sender Side (ZMQ)
+
+```python
+trace_context = trace_get_proc_propagate_context(rid)
+req.trace_context = trace_context
+```
+
+#### Receiver Side (ZMQ)
+
+```python
+trace_set_proc_propagate_context(rid, req.trace_context)
+```
+
+---
+
+### 4.5 Add Events and Attributes
+
+#### Events (recorded on the current slice)
+
+```python
+trace_event("event_name", rid, ts, attrs)
+```
+
+#### Attributes (attached to the current slice)
+
+```python
+trace_slice_add_attr(rid, attrs)
+```
+
+---
+
+## Extending the Tracing Framework
+
+### 5.1 Trace Context Hierarchy
+
+* Two levels of Trace Context:
+
+ * **`TraceReqContext`** – request-level context
+ * **`TraceThreadContext`** – thread-level context
+
+* Three-level Span hierarchy:
+
+ * `req_root_span`
+ * `thread_span`
+ * `slice_span`
+
+---
+
+### 5.2 Available Span Name Enum (`TraceSpanName`)
+
+```python
+FASTDEPLOY
+PREPROCESS
+SCHEDULE
+PREFILL
+DECODE
+POSTPROCESS
+```
+
+* These enums can be used when creating slices to ensure consistent naming.
+
+---
+
+### 5.3 Important Notes
+
+1. Each **thread span must be closed** when the final slice of that thread finishes.
+2. Spans created by **FastAPI Instrumentor** are automatically inherited by the internal tracing context.
diff --git a/docs/zh/observability/README.md b/docs/zh/observability/README.md
new file mode 100644
index 0000000000..43de520ccf
--- /dev/null
+++ b/docs/zh/observability/README.md
@@ -0,0 +1,149 @@
+## Observability 示例配置 (`examples/observability`)
+
+该目录提供了一套完整的、基于 Docker Compose 的可观测性(Observability)示例,包括:
+
+- Prometheus:指标收集
+- Grafana:指标可视化
+- OpenTelemetry Collector:分布式追踪数据接收与处理
+
+开发者可以使用此示例环境 一键启动本地监控与追踪系统。
+
+### 先决条件
+
+需要确保提前安装以下组件:
+
+- Docker
+- Docker Compose(或新版 Docker CLI 支持 `docker compose`)
+
+### 使用方法
+
+#### 整体启动
+
+进入目录:
+
+```shell
+cd examples/observability
+```
+
+在 `examples/observability` 目录下执行以下命令即可启动完整的监控和追踪服务:
+
+```bash
+docker compose -f docker-compose.yaml up -d
+```
+
+启动完成后可访问:
+
+- Prometheus 访问: http://localhost:9090
+- Grafana 访问: http://localhost:3000
+- OTLP 接收端: 应用程序应将 Traces 发送到 OTel Collector 的默认端口(通常是 `4317` 或 `4318`)。
+ - grpc: 4317端口
+ - http: 4318端口
+- Jeager 访问:http://localhost:16886
+
+【注意事项】:
+
+- Prometheus的抓取地址换成自己的地址
+
+- Grafana的展示端口映射成自己可以访问的端口
+
+- Jaeger的展示端口映射成自己可以访问的端口
+
+- 如果启动了整体服务就不需要再单独去启动子服务了
+
+#### metrics启动
+
+进入目录:
+
+```shell
+cd examples/observability/metrics
+```
+
+在 `examples/observability` 目录下执行以下命令即可启动完整的监控和追踪服务:
+
+```bash
+docker compose -f prometheus_compose.yaml up -d
+```
+
+启动完成后可访问:
+
+- Grafana 访问: http://localhost:3000
+
+#### trace启动
+
+进入目录:
+
+```shell
+cd examples/observability/tracing
+```
+
+在 `examples/observability` 目录下执行以下命令即可启动完整的监控和追踪服务:
+
+```bash
+docker compose -f tracing_compose.yaml up -d
+```
+
+启动完成后可访问:
+
+- OTLP 接收端:应用程序应将 Traces 发送到 OTel Collector 的默认端口(通常是 `4317` 或 `4318`)。
+ - grpc: 4317端口
+ - http: 4318端口
+- Jeager 访问:http://localhost:16886
+
+### 目录结构与文件说明
+
+#### 核心启动文件
+
+| 文件名 | 作用 | 详情 |
+| ------------------- | ---------- | ------------------------------------------------------------ |
+| docker-compose.yaml | 主启动文件 | 定义并启动完整的可观测性组件(Prometheus、Grafana、OTel Collector、Jaeger)。这是启动整个 Observability 环境的唯一入口。 |
+
+#### 指标 (Metrics) 与监控配置
+
+| 文件/目录 | 作用 | 详情 |
+| ------------------------------------------------- | ---------------------- | ------------------------------------------------------------ |
+| metrics | 指标配置根目录 | 包含所有与指标收集和 Prometheus 相关的配置。 |
+| prometheus.yaml | Prometheus 主配置 | 定义抓取目标(scrape targets)、全局采集参数,并可选地配置记录规则(recording rules)。所有监控端点都在此定义。 |
+| prometheus_compose.yaml | Prometheus Docker 配置 | 定义 Prometheus 容器、卷挂载和网络设置。 |
+| grafana/datasources/datasource.yaml | 数据源配置 | 定义 Grafana 连接 Prometheus 的方式。 |
+| grafana/dashboards/config/dashboard.yaml | 仪表板加载配置 | 指定仪表板 JSON 文件所在路径。 |
+| grafana/dashboards/json/fastdeploy-dashboard.json | 仪表板 | 包含 `fastdeploy`监控指标的可视化布局与查询定义。 |
+
+#### 分布式追踪 (Tracing) 配置
+
+| 文件/目录 | 作用 | 详情 |
+| -------------------- | ------------------- | ------------------------------------------------------------ |
+| tracing | 追踪配置根目录 | 包含所有与分布式追踪相关的配置。 |
+| opentelemetry.yaml | OTel Collector 配置 | 定义 Collector 的数据管道:
• receivers:接收 OTLP 数据(traces, metrics, logs)
• processors:处理与批次化数据
• exporters:将数据导出到追踪后端(如 Jaeger)或文件
• extensions:健康检查、pprof 和 zpages
• pipelines:定义 traces、metrics 和 logs 的完整处理流程 |
+| tracing_compose.yaml | Tracing Docker 配置 | 定义 OTel Collector 和 Jaeger 的容器配置。 |
+
+### 4. 如何定制
+
+#### 4.1 修改指标抓取目标
+
+若应用程序端口、路径更改,请编辑:
+
+```plain
+metrics/prometheus.yaml
+```
+
+#### 4.2 调整追踪采样率或处理逻辑
+
+编辑:
+
+```plain
+tracing/opentelemetry.yaml
+```
+
+#### 4.3 添加自定义 Grafana 仪表盘
+
+1. 新增 JSON 仪表盘至:
+
+```plain
+grafana/dashboards/json/
+```
+
+1. 在下方文件中注册该仪表盘,使 Grafana 自动加载:
+
+```plain
+grafana/dashboards/config/dashboard.yaml
+```
diff --git a/docs/zh/observability/trace.md b/docs/zh/observability/trace.md
new file mode 100644
index 0000000000..87d4651d50
--- /dev/null
+++ b/docs/zh/observability/trace.md
@@ -0,0 +1,150 @@
+**FastDeploy** 基于**OpenTelemetry Collector** 导出请求追踪数据。
+可通过在启动服务器时添加 `--trace-enable` 来开启追踪,并使用 `--otlp-traces-endpoint` 配置 OpenTelemetry Collector 接收端点。
+
+## 配置指南(Setup Guide)
+
+### 1. 安装依赖和工具
+
+```bash
+# 手动安装
+pip install opentelemetry-sdk opentelemetry-api opentelemetry-exporter-otlp opentelemetry-exporter-otlp-proto-grpc
+```
+
+### 2. 启动 OpenTelemetry Collector 和 Jaeger
+
+```bash
+docker compose -f examples/observability/tracing/tracing_compose.yaml up -d
+```
+
+### 3. 启动带追踪功能的 FastDeploy 服务器
+
+- FastDeploy设置环境变量
+
+```shell
+# 开启Trace
+"TRACES_ENABLE": "true",
+# 服务名称
+"FD_SERVICE_NAME": "FastDeploy",
+# 实例名称
+"FD_HOST_NAME": "trace_test",
+"TRACES_EXPORTER": "otlp",
+# grpc方式导出端口为4317, http方式导出端口为4318
+"EXPORTER_OTLP_ENDPOINT": "http://localhost:4317",
+"EXPORTER_OTLP_HEADERS": "Authentication=Txxxxx",
+# 导出方式
+"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "grpc",
+```
+
+- 启动FastDeploy
+
+### 4. 发送请求并观察追踪数据
+
+- 在浏览器访问 Jaeger UI(端口 `16686`)可视化请求追踪。
+
+- Collector 同时会将追踪数据导出为 `/tmp/otel_trace.json`。
+
+## 如何为自己的代码添加追踪
+
+FastDeploy 已在主要节点插入了追踪点。开发者可使用 `trace.py` 提供的 API 进行更精细的追踪。
+
+### 4.1 初始化追踪
+
+每个涉及追踪的**进程**执行:
+
+```python
+process_tracing_init()
+```
+
+请求涉及到的每个**线程**执行:
+
+```python
+trace_set_thread_info("thread_label", tp_rank, dp_rank)
+```
+
+- `thread_label` 用于线程区分,可视化显示
+- `tp_rank`/`dp_rank` 可选,标记张量并行或数据并行 rank
+
+### 4.2 标记请求开始和结束
+
+```python
+trace_req_start(rid, bootstrap_room, ts, role)
+trace_req_finish(rid, ts, attrs)
+```
+
+- 会创建 Bootstrap Room Span与 Root Span
+- 支持 FastAPI Instrumentor 已创建 Span 的继承(context copy)
+- `attrs` 可添加额外属性
+
+### 4.3 为 Slice 添加追踪
+
+普通 Slice:
+
+```python
+trace_slice_start("slice_name", rid)
+trace_slice_end("slice_name", rid)
+```
+
+- 最后一个 Slice 可标记线程结束:
+
+```python
+trace_slice_end("slice_name", rid, thread_finish_flag=True)
+```
+
+### 4.4 请求跨线程 Trace Context 传播
+
+发送端(ZMQ):
+
+```python
+trace_context = trace_get_proc_propagate_context(rid)
+req.trace_context = trace_context
+```
+
+接收端(ZMQ):
+
+```python
+trace_set_proc_propagate_context(rid, req.trace_context)
+```
+
+### 4.5 添加事件和属性
+
+事件(记录到当前 Slice):
+
+```python
+trace_event("event_name", rid, ts, attrs)
+```
+
+属性(添加到当前 Slice):
+
+```python
+trace_slice_add_attr(rid, attrs)
+```
+
+## 扩展追踪框架
+
+### 5.1 Trace Context 层级
+
+- 两级 Trace Context:
+ - `TraceReqContext` → 请求级上下文
+ - `TraceThreadContext` → 线程级上下文
+- 三级 Span 结构:
+ - `req_root_span`
+ - `thread_span`
+ - `slice_span`
+
+### 5.2 可用的 Span 名枚举(`TraceSpanName`)
+
+```python
+FASTDEPLOY
+PREPROCESS
+SCHEDULE
+PREFILL
+DECODE
+POSTPROCESS
+```
+
+- 在创建 slice 时可使用枚举,保证命名规范化
+
+### 5.3 注意事项
+
+1. 每个线程 Span 必须在最后一个 Slice 结束时关闭。
+2. FastAPI Instrumentor 已创建的 Span 会被继承到内部追踪上下文。
diff --git a/examples/observability/docker-compose.yaml b/examples/observability/docker-compose.yaml
new file mode 100644
index 0000000000..ec15e63412
--- /dev/null
+++ b/examples/observability/docker-compose.yaml
@@ -0,0 +1,52 @@
+version: '1.0'
+services:
+ prometheus:
+ image: prom/prometheus:latest
+ container_name: prometheus
+ ports:
+ - "9090:9090"
+ volumes:
+ - ./metrics/prometheus.yaml:/etc/prometheus/prometheus.yml
+ command:
+ - '--config.file=/etc/prometheus/prometheus.yml'
+ - '--storage.tsdb.path=/prometheus'
+
+ grafana:
+ image: grafana/grafana:latest
+ container_name: grafana
+ ports:
+ - "3000:3000"
+ volumes:
+ - ./metrics/grafana/datasources:/etc/grafana/provisioning/datasources
+ - ./metrics/grafana/dashboards/config:/etc/grafana/provisioning/dashboards
+ - ./metrics/grafana/dashboards/json:/var/lib/grafana/dashboards
+ environment:
+ - GF_AUTH_ANONYMOUS_ENABLED=true
+ - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
+ - GF_AUTH_BASIC_ENABLED=false
+ - GF_USERS_ALLOW_SIGN_UP=false
+ - GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/var/lib/grafana/dashboards/fastdeploy-dashboard.json
+ depends_on:
+ - prometheus
+
+ jaeger:
+ image: jaegertracing/all-in-one
+ container_name: jaeger
+ ports:
+ - "16686:16686"
+ environment:
+ - COLLECTOR_OTLP_ENABLED=true
+ restart: unless-stopped
+
+ otel-collector:
+ image: docker.io/otel/opentelemetry-collector
+ volumes:
+ - ./tracing/opentelemetry.yaml:/etc/otelcol/config.yaml
+ - /tmp:/tmp
+ ports:
+ - "4317:4317" # OTLP gRPC
+ - "4318:4318" # OTLP HTTP
+ depends_on:
+ - jaeger
+ - prometheus
+ restart: unless-stopped
diff --git a/examples/observability/metrics/grafana/dashboards/config/dashboard.yaml b/examples/observability/metrics/grafana/dashboards/config/dashboard.yaml
new file mode 100644
index 0000000000..5d347a8442
--- /dev/null
+++ b/examples/observability/metrics/grafana/dashboards/config/dashboard.yaml
@@ -0,0 +1,11 @@
+apiVersion: 1
+providers:
+ - name: 'FastDeploy'
+ orgId: 1
+ folder: 'FastDeploy Monitoring'
+ type: file
+ disableDeletion: false
+ updateIntervalSeconds: 10
+ allowUiUpdates: false
+ options:
+ path: /var/lib/grafana/dashboards
diff --git a/examples/observability/metrics/grafana/dashboards/json/fastdeploy-dashboard.json b/examples/observability/metrics/grafana/dashboards/json/fastdeploy-dashboard.json
new file mode 100644
index 0000000000..eaff9cb5e2
--- /dev/null
+++ b/examples/observability/metrics/grafana/dashboards/json/fastdeploy-dashboard.json
@@ -0,0 +1,2397 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "id": 4,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "df2i7osj6pssge"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 0
+ },
+ "id": 25,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "df2i7osj6pssge"
+ },
+ "editorMode": "code",
+ "expr": "rate(fastdeploy:time_to_first_token_seconds_sum[5m]) / rate(fastdeploy:time_to_first_token_seconds_count[5m])",
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "首Token时延",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "df2i7osj6pssge"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 0
+ },
+ "id": 26,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "df2i7osj6pssge"
+ },
+ "editorMode": "code",
+ "expr": "histogram_quantile(0.95,sum(rate(fastdeploy:time_to_first_token_seconds_bucket[5m])) by (le))",
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "首Token时延95分位",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 13,
+ "w": 12,
+ "x": 0,
+ "y": 10
+ },
+ "id": 1,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:num_requests_running",
+ "refId": "A"
+ }
+ ],
+ "title": "当前运行请求数",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 13,
+ "w": 12,
+ "x": 12,
+ "y": 10
+ },
+ "id": 2,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:num_requests_waiting",
+ "refId": "A"
+ }
+ ],
+ "title": "当前等待请求数",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 23
+ },
+ "id": 3,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "rate(fastdeploy:requests_number_total[1m])",
+ "refId": "A"
+ }
+ ],
+ "title": "总请求数 (增量)",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "df2i7osj6pssge"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 12,
+ "y": 23
+ },
+ "id": 4,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "editorMode": "code",
+ "expr": "fastdeploy:request_success_total",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "成功请求总数 (增量)",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 34
+ },
+ "id": 5,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "rate(fastdeploy:generation_tokens_total[1m])",
+ "refId": "A"
+ }
+ ],
+ "title": "生成 token 总数 (增量)",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 12,
+ "y": 34
+ },
+ "id": 6,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "rate(fastdeploy:prompt_tokens_total[1m])",
+ "refId": "A"
+ }
+ ],
+ "title": "Prompt token 总数 (增量)",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 45
+ },
+ "id": 7,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:first_token_latency",
+ "refId": "A"
+ }
+ ],
+ "title": "首 token 延迟 (秒)",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 12,
+ "y": 45
+ },
+ "id": 8,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:infer_latency",
+ "refId": "A"
+ }
+ ],
+ "title": "单 token 推理延迟 (秒)",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 56
+ },
+ "id": 9,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:batch_size",
+ "refId": "A"
+ }
+ ],
+ "title": "当前 batch size",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 12,
+ "y": 56
+ },
+ "id": 10,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:max_batch_size",
+ "refId": "A"
+ }
+ ],
+ "title": "最大 batch size",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 13,
+ "w": 12,
+ "x": 0,
+ "y": 67
+ },
+ "id": 11,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:available_batch_size",
+ "refId": "A"
+ }
+ ],
+ "title": "Decode 阶段可插入请求数",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 13,
+ "w": 12,
+ "x": 12,
+ "y": 67
+ },
+ "id": 12,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:gpu_cache_usage_perc",
+ "refId": "A"
+ }
+ ],
+ "title": "GPU KV-cache 使用率",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 13,
+ "w": 12,
+ "x": 0,
+ "y": 80
+ },
+ "id": 13,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:available_gpu_resource",
+ "refId": "A"
+ }
+ ],
+ "title": "可用 GPU 资源百分比",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 13,
+ "w": 12,
+ "x": 12,
+ "y": 80
+ },
+ "id": 14,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:available_gpu_block_num",
+ "refId": "A"
+ }
+ ],
+ "title": "可用 GPU block 数",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 93
+ },
+ "id": 15,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:hit_req_rate",
+ "refId": "A"
+ }
+ ],
+ "title": "请求级缓存命中率",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 93
+ },
+ "id": 16,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:hit_token_rate",
+ "refId": "A"
+ }
+ ],
+ "title": "Token 级缓存命中率",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 105
+ },
+ "id": 17,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:cpu_hit_token_rate",
+ "refId": "A"
+ }
+ ],
+ "title": "CPU 缓存命中率",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 105
+ },
+ "id": 18,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "fastdeploy:gpu_hit_token_rate",
+ "refId": "A"
+ }
+ ],
+ "title": "GPU 缓存命中率",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 117
+ },
+ "id": 19,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(fastdeploy:request_queue_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p50",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(fastdeploy:request_queue_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p95",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(fastdeploy:request_queue_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p99",
+ "refId": "C"
+ }
+ ],
+ "title": "队列等待时间 (秒)",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 117
+ },
+ "id": 20,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(fastdeploy:request_prefill_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p50",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(fastdeploy:request_prefill_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p95",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(fastdeploy:request_prefill_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p99",
+ "refId": "C"
+ }
+ ],
+ "title": "Prefill 阶段耗时 (秒)",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 128
+ },
+ "id": 21,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(fastdeploy:request_decode_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p50",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(fastdeploy:request_decode_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p95",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(fastdeploy:request_decode_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p99",
+ "refId": "C"
+ }
+ ],
+ "title": "Decode 阶段耗时 (秒)",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 129
+ },
+ "id": 22,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(fastdeploy:request_inference_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p50",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(fastdeploy:request_inference_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p95",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(fastdeploy:request_inference_time_seconds_bucket[1m])) by (le))",
+ "legendFormat": "p99",
+ "refId": "C"
+ }
+ ],
+ "title": "Inference 阶段耗时 (秒)",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 140
+ },
+ "id": 23,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(fastdeploy:request_generation_tokens_bucket[1m])) by (le))",
+ "legendFormat": "p50",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(fastdeploy:request_generation_tokens_bucket[1m])) by (le))",
+ "legendFormat": "p95",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(fastdeploy:request_generation_tokens_bucket[1m])) by (le))",
+ "legendFormat": "p99",
+ "refId": "C"
+ }
+ ],
+ "title": "单请求生成 token 数",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "showValues": false,
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": 0
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 141
+ },
+ "id": 24,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "hideZeros": false,
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "12.2.1",
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(fastdeploy:request_prompt_tokens_bucket[1m])) by (le))",
+ "legendFormat": "p50",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(fastdeploy:request_prompt_tokens_bucket[1m])) by (le))",
+ "legendFormat": "p95",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(fastdeploy:request_prompt_tokens_bucket[1m])) by (le))",
+ "legendFormat": "p99",
+ "refId": "C"
+ }
+ ],
+ "title": "单请求 prefill token 数",
+ "type": "timeseries"
+ }
+ ],
+ "preload": false,
+ "refresh": "auto",
+ "schemaVersion": 42,
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-15m",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "",
+ "title": "FastDeploy 全指标监控",
+ "uid": "55071365-a765-4f8e-915c-336c8c35abac",
+ "version": 11
+}
diff --git a/examples/observability/metrics/grafana/datasources/datasource.yaml b/examples/observability/metrics/grafana/datasources/datasource.yaml
new file mode 100644
index 0000000000..12eb5b8dbc
--- /dev/null
+++ b/examples/observability/metrics/grafana/datasources/datasource.yaml
@@ -0,0 +1,9 @@
+apiVersion: 1
+datasources:
+ - name: Prometheus
+ type: prometheus
+ access: proxy
+ # url: http://localhost:9090
+ url: http://prometheus:9090
+ isDefault: true
+ editable: false
diff --git a/examples/observability/metrics/prometheus.yaml b/examples/observability/metrics/prometheus.yaml
new file mode 100644
index 0000000000..6df97e6444
--- /dev/null
+++ b/examples/observability/metrics/prometheus.yaml
@@ -0,0 +1,10 @@
+# prometheus.yaml
+global:
+ scrape_interval: 5s
+ evaluation_interval: 30s
+
+scrape_configs:
+ - job_name: 'fastdeploy'
+ static_configs:
+ # list all your targets here
+ - targets: ['127.0.0.1:30000']
diff --git a/examples/observability/metrics/prometheus_compose.yaml b/examples/observability/metrics/prometheus_compose.yaml
new file mode 100644
index 0000000000..7659db3b5a
--- /dev/null
+++ b/examples/observability/metrics/prometheus_compose.yaml
@@ -0,0 +1,30 @@
+version: '1'
+services:
+ prometheus:
+ image: prom/prometheus:latest
+ container_name: prometheus
+ ports:
+ - "9090:9090"
+ volumes:
+ - ./prometheus.yaml:/etc/prometheus/prometheus.yml
+ command:
+ - '--config.file=/etc/prometheus/prometheus.yml'
+ - '--storage.tsdb.path=/prometheus'
+
+ grafana:
+ image: grafana/grafana:latest
+ container_name: grafana
+ ports:
+ - "3000:3000"
+ volumes:
+ - ./grafana/datasources:/etc/grafana/provisioning/datasources
+ - ./grafana/dashboards/config:/etc/grafana/provisioning/dashboards
+ - ./grafana/dashboards/json:/var/lib/grafana/dashboards
+ environment:
+ - GF_AUTH_ANONYMOUS_ENABLED=true
+ - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
+ - GF_AUTH_BASIC_ENABLED=false
+ - GF_USERS_ALLOW_SIGN_UP=false
+ - GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/var/lib/grafana/dashboards/fastdeploy-dashboard.json
+ depends_on:
+ - prometheus
diff --git a/examples/observability/tracing/opentelemetry.yaml b/examples/observability/tracing/opentelemetry.yaml
new file mode 100644
index 0000000000..8593d9182e
--- /dev/null
+++ b/examples/observability/tracing/opentelemetry.yaml
@@ -0,0 +1,38 @@
+receivers:
+ otlp:
+ protocols:
+ grpc:
+ endpoint: 0.0.0.0:4317
+ http:
+ endpoint: 0.0.0.0:4318
+processors:
+ batch:
+
+exporters:
+ otlp:
+ endpoint: jaeger:4317
+ tls:
+ insecure: true
+ file:
+ path: /tmp/otel_trace.json
+
+extensions:
+ health_check:
+ pprof:
+ zpages:
+
+service:
+ extensions: [health_check, pprof, zpages]
+ pipelines:
+ traces:
+ receivers: [otlp]
+ processors: [batch]
+ exporters: [otlp, file]
+ metrics:
+ receivers: [otlp]
+ processors: [batch]
+ exporters: [otlp]
+ logs:
+ receivers: [otlp]
+ processors: [batch]
+ exporters: [otlp]
diff --git a/examples/observability/tracing/tracing_compose.yaml b/examples/observability/tracing/tracing_compose.yaml
new file mode 100644
index 0000000000..7ed1ecdda3
--- /dev/null
+++ b/examples/observability/tracing/tracing_compose.yaml
@@ -0,0 +1,21 @@
+services:
+ otel-collector:
+ image: docker.io/otel/opentelemetry-collector
+ volumes:
+ - ./opentelemetry.yaml:/etc/otelcol/config.yaml
+ - /tmp:/tmp
+ ports:
+ - "4317:4317" # OTLP gRPC
+ - "4318:4318" # OTLP HTTP
+ depends_on:
+ - jaeger
+ restart: unless-stopped
+
+ jaeger:
+ image: jaegertracing/all-in-one
+ container_name: jaeger
+ ports:
+ - "16686:16686"
+ environment:
+ - COLLECTOR_OTLP_ENABLED=true
+ restart: unless-stopped
diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py
index 45f7bf7f21..bc3dbd78d0 100644
--- a/fastdeploy/engine/common_engine.py
+++ b/fastdeploy/engine/common_engine.py
@@ -35,9 +35,9 @@ import numpy as np
import paddle
import requests
import zmq
-from opentelemetry import trace
from tqdm import tqdm
+import fastdeploy.metrics.trace as tracing
from fastdeploy.engine.request import Request, RequestOutput, RequestType
from fastdeploy.engine.resource_manager import ResourceManager
from fastdeploy.engine.sched.resource_manager_v1 import ResourceManagerV1
@@ -51,7 +51,6 @@ from fastdeploy.inter_communicator import (
ZmqTcpServer,
)
from fastdeploy.metrics.metrics import main_process_metrics
-from fastdeploy.metrics.trace_util import start_span, start_span_request
from fastdeploy.model_executor.guided_decoding import schema_checker
from fastdeploy.plugins.token_processor import load_token_processor_plugins
from fastdeploy.router.utils import check_service_health
@@ -417,13 +416,16 @@ class EngineService:
"""
if not isinstance(tasks, list):
tasks = [tasks]
- for task in tasks:
- start_span_request("DEQUEUE", task, trace.SpanKind.CONSUMER)
self.resource_manager.check_and_free_block_tables()
need_delete_tasks = []
for task in tasks:
+ rid = task.request_id.split("_")[0]
+ trace_carrier = task.trace_carrier
+ if trace_carrier:
+ tracing.trace_set_proc_propagate_context(rid, trace_carrier)
+ task.trace_carrier = tracing.trace_get_proc_propagate_context(rid)
if self.cfg.scheduler_config.splitwise_role == "prefill":
status, msg = self.split_connector.check_decode_allocated(task)
if status:
@@ -447,6 +449,7 @@ class EngineService:
for item in tasks:
trace_print(LoggingEventName.RESOURCE_ALLOCATE_START, item.request_id, getattr(item, "user", ""))
+
available_batch = np.sum(self.resource_manager.stop_flags)
if len(tasks) > available_batch:
self.llm_logger.error(f"Inserting batch:{len(tasks)} exceeds the available batch:{available_batch}.")
@@ -484,6 +487,13 @@ class EngineService:
self.llm_logger.info(f"Tasks are sent to engine, req_ids={req_ids}")
for task in tasks:
task.metrics.inference_start_time = time.time()
+ tracing.trace_report_span(
+ tracing.TraceSpanName.SCHEDULE,
+ task.request_id.split("_")[0],
+ int(task.metrics.scheduler_recv_req_time * 1e9),
+ int(task.metrics.inference_start_time * 1e9),
+ thread_finish_flag=True,
+ )
trace_print(LoggingEventName.RESOURCE_ALLOCATE_END, task.request_id, getattr(task, "user", ""))
trace_print(LoggingEventName.REQUEST_SCHEDULE_END, task.request_id, getattr(task, "user", ""))
trace_print(LoggingEventName.INFERENCE_START, task.request_id, getattr(task, "user", ""))
@@ -694,6 +704,7 @@ class EngineService:
Insert task to engine thread, monitor scheduler request queue.
if the engine has resource, insert task to engine
"""
+ tracing.trace_set_thread_info("Scheduler Task to Work")
current_id = 0
while getattr(self, "running", True):
try:
@@ -764,6 +775,7 @@ class EngineService:
"""
Insert tasks to worker with scheduler v1 (ENABLE_V1_KVCACHE_SCHEDULER=1).
"""
+ tracing.trace_set_thread_info("Scheduler Task to Work")
get_request_pool = ThreadPoolExecutor(max_workers=1)
is_fetching = False
@@ -981,6 +993,18 @@ class EngineService:
self.resource_manager.get_real_bsz()
for task in tasks:
if task.task_type == RequestType.PREFILL:
+ rid = task.request_id.split("_")[0]
+ trace_carrier = task.trace_carrier
+ tracing.trace_set_proc_propagate_context(rid, trace_carrier)
+ trace_carrier = tracing.trace_get_proc_propagate_context(rid)
+ task.trace_carrier = trace_carrier
+ tracing.trace_report_span(
+ tracing.TraceSpanName.SCHEDULE,
+ rid,
+ int(task.metrics.scheduler_recv_req_time * 1e9),
+ int(time.time() * 1e9),
+ thread_finish_flag=True,
+ )
trace_print(
LoggingEventName.RESOURCE_ALLOCATE_END, task.request_id, getattr(task, "user", "")
)
@@ -1038,6 +1062,7 @@ class EngineService:
self.receive_output_thread.start()
def _insert_zmq_task_to_scheduler(self):
+ tracing.trace_set_thread_info("Insert Task to Scheduler")
added_requests: Dict[str, int] = dict()
if envs.FD_ENABLE_INTERNAL_ADAPTER:
if self.cfg.scheduler_config.splitwise_role == "decode":
@@ -1067,7 +1092,6 @@ class EngineService:
try:
request = Request.from_dict(data)
request.metrics.scheduler_recv_req_time = time.time()
- start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER)
main_process_metrics.requests_number.inc()
trace_print(LoggingEventName.PREPROCESSING_END, data["request_id"], data.get("user", ""))
trace_print(LoggingEventName.REQUEST_SCHEDULE_START, data["request_id"], data.get("user", ""))
diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py
index ce167c3ab2..a02273d720 100644
--- a/fastdeploy/engine/engine.py
+++ b/fastdeploy/engine/engine.py
@@ -34,6 +34,7 @@ import numpy as np
import paddle
from tqdm import tqdm
+import fastdeploy.metrics.trace as tracing
from fastdeploy.engine.args_utils import EngineArgs
from fastdeploy.engine.common_engine import EngineService
from fastdeploy.engine.expert_service import start_data_parallel_service
@@ -97,6 +98,8 @@ class LLMEngine:
main_process_metrics.set_cache_config_info(obj=self.cfg.cache_config)
+ tracing.trace_set_thread_info("engine")
+
def start(self, api_server_pid=None):
"""
Initializes the engine and starts its sub-services.
diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py
index d871de533b..1c02a9ac98 100644
--- a/fastdeploy/engine/request.py
+++ b/fastdeploy/engine/request.py
@@ -624,6 +624,7 @@ class RequestOutput:
# for internal adapter
ic_req_data: Optional[dict] = None,
prompt_token_ids_len: Optional[int] = 0,
+ trace_carrier: dict = dict(),
) -> None:
self.request_id = request_id
self.prompt = prompt
@@ -640,6 +641,7 @@ class RequestOutput:
self.error_msg = error_msg
self.ic_req_data = ic_req_data
self.prompt_token_ids_len = prompt_token_ids_len
+ self.trace_carrier = trace_carrier
if prompt_token_ids is None:
self.prompt_token_ids = []
@@ -690,6 +692,7 @@ class RequestOutput:
f"metrics={self.metrics}, "
f"error_code={self.error_code}, "
f"error_msg={self.error_msg},"
+ f"trace_carrier={self.trace_carrier}"
)
@classmethod
@@ -705,7 +708,8 @@ class RequestOutput:
else:
d.pop("metrics", None)
metrics = None
- return RequestOutput(**d, outputs=completion_output, metrics=metrics)
+ trace_carrier = d.pop("trace_carrier", {})
+ return RequestOutput(**d, outputs=completion_output, metrics=metrics, trace_carrier=trace_carrier)
def to_dict(self):
"""convert RequestOutput into a serializable dict"""
@@ -726,6 +730,7 @@ class RequestOutput:
"error_msg": self.error_msg,
"ic_req_data": self.ic_req_data,
"prompt_token_ids_len": self.prompt_token_ids_len,
+ "trace_carrier": self.trace_carrier,
}
diff --git a/fastdeploy/entrypoints/cli/tokenizer.py b/fastdeploy/entrypoints/cli/tokenizer.py
index 3012fd1f6c..17e22bb118 100644
--- a/fastdeploy/entrypoints/cli/tokenizer.py
+++ b/fastdeploy/entrypoints/cli/tokenizer.py
@@ -196,7 +196,7 @@ def main(args: argparse.Namespace) -> None:
# 检查参数
if not any([args.encode, args.decode, args.vocab_size, args.info, args.vocab_export]):
- print("请至少指定一个参数:--encode, --decode, --vocab-size, --info, --export-vocab")
+ print("请至少指定一个参数:--encode, --decode, --vocab-size, --info, --vocab-export")
return
# 初始化tokenizer
diff --git a/fastdeploy/entrypoints/engine_client.py b/fastdeploy/entrypoints/engine_client.py
index f7d58ea22c..9babe8fec7 100644
--- a/fastdeploy/entrypoints/engine_client.py
+++ b/fastdeploy/entrypoints/engine_client.py
@@ -25,6 +25,7 @@ from http import HTTPStatus
import numpy as np
from filelock import FileLock
+import fastdeploy.metrics.trace as tracing
from fastdeploy import envs
from fastdeploy.config import FDConfig
from fastdeploy.entrypoints.openai.utils import DealerConnectionManager
@@ -271,6 +272,8 @@ class EngineClient:
"""
task["preprocess_start_time"] = time.time()
+ request_id = task.get("request_id").split("_")[0]
+ tracing.trace_slice_start(tracing.TraceSpanName.PREPROCESSING, request_id)
trace_print(LoggingEventName.PREPROCESSING_START, task["request_id"], task.get("user", ""))
try:
chat_template_kwargs = task.get("chat_template_kwargs") or {}
@@ -349,10 +352,15 @@ class EngineClient:
else:
request_id = parts[0]
index = int(parts[1])
+ trace_carrier = tracing.trace_get_proc_propagate_context(request_id)
+ task["trace_carrier"] = trace_carrier
for i in range(index * n, (index + 1) * n):
child_task = copy(task)
child_task["request_id"] = f"{request_id}_{i}"
self._send_task(child_task)
+ tracing.trace_slice_end(
+ tracing.TraceSpanName.PREPROCESSING, task.get("request_id").split("_")[0], thread_finish_flag=True
+ )
except Exception as e:
api_server_logger.error(f"zmq_client send task error: {e}, {str(traceback.format_exc())}")
raise EngineError(str(e), error_code=400)
diff --git a/fastdeploy/entrypoints/openai/api_server.py b/fastdeploy/entrypoints/openai/api_server.py
index 8da7754895..2744c9388c 100644
--- a/fastdeploy/entrypoints/openai/api_server.py
+++ b/fastdeploy/entrypoints/openai/api_server.py
@@ -30,7 +30,10 @@ from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse, Response, StreamingResponse
from gunicorn.app.base import BaseApplication
from opentelemetry import trace
+from opentelemetry.propagate import extract
+import fastdeploy.metrics.trace as tracing
+from fastdeploy import envs
from fastdeploy.engine.args_utils import EngineArgs
from fastdeploy.engine.engine import LLMEngine
from fastdeploy.engine.expert_service import ExpertService
@@ -58,12 +61,6 @@ from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser
from fastdeploy.envs import environment_variables
from fastdeploy.metrics.metrics import get_filtered_metrics
-from fastdeploy.metrics.trace_util import (
- fd_start_span,
- inject_to_metadata,
- instrument,
- lable_span,
-)
from fastdeploy.utils import (
ExceptionHandler,
FlexibleArgumentParser,
@@ -74,6 +71,8 @@ from fastdeploy.utils import (
retrive_model_from_server,
)
+tracing.process_tracing_init()
+
parser = make_arg_parser(FlexibleArgumentParser())
args = parser.parse_args()
@@ -246,7 +245,6 @@ async def lifespan(app: FastAPI):
app = FastAPI(lifespan=lifespan)
app.add_exception_handler(RequestValidationError, ExceptionHandler.handle_request_validation_exception)
app.add_exception_handler(Exception, ExceptionHandler.handle_exception)
-instrument(app)
env_api_key_func = environment_variables.get("FD_API_KEY")
@@ -367,19 +365,23 @@ def wrap_streaming_generator(original_generator: AsyncGenerator):
@app.post("/v1/chat/completions")
-async def create_chat_completion(request: ChatCompletionRequest):
+async def create_chat_completion(request: ChatCompletionRequest, req: Request):
"""
Create a chat completion for the provided prompt and parameters.
"""
api_server_logger.debug(f"Chat Received request: {request.model_dump_json()}")
+ if envs.TRACES_ENABLE:
+ if req.headers:
+ headers = dict(req.headers)
+ trace_context = extract(headers)
+ request.trace_context = trace_context
if app.state.dynamic_load_weight:
status, msg = app.state.engine_client.is_workers_alive()
if not status:
return JSONResponse(content={"error": "Worker Service Not Healthy"}, status_code=304)
try:
async with connection_manager():
- inject_to_metadata(request)
- lable_span(request)
+ tracing.label_span(request)
generator = await app.state.chat_handler.create_chat_completion(request)
if isinstance(generator, ErrorResponse):
api_server_logger.debug(f"release: {connection_semaphore.status()}")
@@ -399,18 +401,23 @@ async def create_chat_completion(request: ChatCompletionRequest):
@app.post("/v1/completions")
-async def create_completion(request: CompletionRequest):
+async def create_completion(request: CompletionRequest, req: Request):
"""
Create a completion for the provided prompt and parameters.
"""
api_server_logger.info(f"Completion Received request: {request.model_dump_json()}")
+ if envs.TRACES_ENABLE:
+ if req.headers:
+ headers = dict(req.headers)
+ trace_context = extract(headers)
+ request.trace_context = trace_context
if app.state.dynamic_load_weight:
status, msg = app.state.engine_client.is_workers_alive()
if not status:
return JSONResponse(content={"error": "Worker Service Not Healthy"}, status_code=304)
try:
async with connection_manager():
- lable_span(request)
+ tracing.label_span(request)
generator = await app.state.completion_handler.create_completion(request)
if isinstance(generator, ErrorResponse):
connection_semaphore.release()
@@ -471,6 +478,7 @@ async def create_embedding(request: EmbeddingRequest):
@app.get("/update_model_weight")
+@tracing.trace_span("update_model_weight")
def update_model_weight(request: Request) -> Response:
"""
update model weight
@@ -485,6 +493,7 @@ def update_model_weight(request: Request) -> Response:
@app.get("/clear_load_weight")
+@tracing.trace_span("clear_load_weight")
def clear_load_weight(request: Request) -> Response:
"""
clear model weight
@@ -499,6 +508,7 @@ def clear_load_weight(request: Request) -> Response:
@app.post("/rearrange_experts")
+@tracing.trace_span("rearrange_experts")
async def rearrange_experts(request: Request):
"""
rearrange experts
@@ -509,6 +519,7 @@ async def rearrange_experts(request: Request):
@app.post("/get_per_expert_tokens_stats")
+@tracing.trace_span("get_per_expert_tokens_stats")
async def get_per_expert_tokens_stats(request: Request):
"""
get per expert tokens stats
@@ -519,6 +530,7 @@ async def get_per_expert_tokens_stats(request: Request):
@app.post("/check_redundant")
+@tracing.trace_span("check_redundant")
async def check_redundant(request: Request):
"""
check redundant
@@ -537,7 +549,7 @@ def launch_api_server() -> None:
api_server_logger.info(f"launch Fastdeploy api server... port: {args.port}")
api_server_logger.info(f"args: {args.__dict__}")
- fd_start_span("FD_START")
+ # fd_start_span("FD_START")
options = {
"bind": f"{args.host}:{args.port}",
@@ -565,6 +577,7 @@ if _metrics_port is None or (_main_port is not None and _metrics_port == _main_p
@metrics_app.get("/metrics")
+@tracing.trace_span("metrics")
async def metrics():
"""
metrics
@@ -574,6 +587,7 @@ async def metrics():
@metrics_app.get("/config-info")
+@tracing.trace_span("config-info")
def config_info() -> Response:
"""
Get the current configuration of the API server.
diff --git a/fastdeploy/entrypoints/openai/protocol.py b/fastdeploy/entrypoints/openai/protocol.py
index b103b4e3eb..af9f3e55d9 100644
--- a/fastdeploy/entrypoints/openai/protocol.py
+++ b/fastdeploy/entrypoints/openai/protocol.py
@@ -505,6 +505,7 @@ class CompletionRequest(BaseModel):
mm_hashes: Optional[list] = None
# doc: end-completion-extra-params
+ trace_context: Optional[str] = None
collect_metrics: Optional[bool] = False
@@ -681,6 +682,7 @@ class ChatCompletionRequest(BaseModel):
mm_hashes: Optional[list] = None
completion_token_ids: Optional[List[int]] = None
# doc: end-chat-completion-extra-params
+ trace_context: Optional[str] = None
collect_metrics: Optional[bool] = False
diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py
index 4ef89f881b..580b11199a 100644
--- a/fastdeploy/entrypoints/openai/serving_chat.py
+++ b/fastdeploy/entrypoints/openai/serving_chat.py
@@ -24,6 +24,7 @@ from typing import List, Optional
import numpy as np
+import fastdeploy.metrics.trace as tracing
from fastdeploy.entrypoints.openai.protocol import (
ChatCompletionRequest,
ChatCompletionResponse,
@@ -104,6 +105,7 @@ class OpenAIServingChat:
"""
Create a new chat completion using the specified parameters.
"""
+ tracing.trace_set_thread_info("API Server")
if not self._check_master():
err_msg = (
f"Only master node can accept completion request, please send request to master node: {self.master_ip}"
@@ -135,6 +137,8 @@ class OpenAIServingChat:
request_id = f"chatcmpl-{request.user}-{uuid.uuid4()}"
else:
request_id = f"chatcmpl-{uuid.uuid4()}"
+ tracing.trace_req_start(rid=request_id, trace_content=request.trace_context, role="FastDeploy")
+ del request.trace_context
api_server_logger.info(f"create chat completion request: {request_id}")
prompt_tokens = None
max_tokens = None
@@ -421,6 +425,19 @@ class OpenAIServingChat:
speculate_metrics=output_speculate_metrics,
)
if res["finished"]:
+ trace_carrier = res.get("trace_carrier")
+ if trace_carrier:
+ tracing.trace_set_proc_propagate_context(request_id, trace_carrier)
+ start_time = res["metrics"]["engine_recv_latest_token_time"]
+ tracing.trace_report_span(
+ tracing.TraceSpanName.POSTPROCESSING,
+ request_id,
+ int(start_time * 1e9),
+ int(time.time() * 1e9),
+ thread_finish_flag=True,
+ )
+ if "trace_carrier" in res:
+ del res["trace_carrier"]
num_choices -= 1
main_process_metrics.e2e_request_latency.observe(
time.time() - res["metrics"]["request_start_time"]
@@ -494,6 +511,7 @@ class OpenAIServingChat:
)
yield f"data: {error_data}\n\n"
finally:
+ tracing.trace_req_finish(request_id)
await self.engine_client.connection_manager.cleanup_request(request_id)
self.engine_client.semaphore.release()
trace_print(LoggingEventName.POSTPROCESSING_END, request_id, getattr(request, "user", ""))
@@ -620,6 +638,19 @@ class OpenAIServingChat:
prompt_logprobs_res_list[idx].extend(clamp_prompt_logprobs(prompt_logprobs_res))
speculate_metrics[idx] = data["metrics"].get("speculate_metrics", None)
if data["finished"]:
+ trace_carrier = data.get("trace_carrier")
+ if trace_carrier:
+ tracing.trace_set_proc_propagate_context(request_id, trace_carrier)
+ start_time = data["metrics"]["engine_recv_latest_token_time"]
+ tracing.trace_report_span(
+ tracing.TraceSpanName.POSTPROCESSING,
+ request_id,
+ int(start_time * 1e9),
+ int(time.time() * 1e9),
+ thread_finish_flag=True,
+ )
+ if "trace_carrier" in data:
+ del data["trace_carrier"]
num_choices -= 1
reasoning_num_tokens[idx] = data["outputs"].get("reasoning_token_num", 0)
if data["outputs"].get("image_token_num"):
@@ -645,6 +676,7 @@ class OpenAIServingChat:
)
choices.append(choice)
finally:
+ tracing.trace_req_finish(request_id)
await self.engine_client.connection_manager.cleanup_request(request_id)
self.engine_client.semaphore.release()
api_server_logger.info(f"release {self.engine_client.semaphore.status()}")
diff --git a/fastdeploy/entrypoints/openai/serving_completion.py b/fastdeploy/entrypoints/openai/serving_completion.py
index 0f0ee3b253..0801bd0a73 100644
--- a/fastdeploy/entrypoints/openai/serving_completion.py
+++ b/fastdeploy/entrypoints/openai/serving_completion.py
@@ -24,6 +24,7 @@ from typing import List, Optional
import numpy as np
+import fastdeploy.metrics.trace as tracing
from fastdeploy.engine.request import RequestOutput
from fastdeploy.entrypoints.openai.protocol import (
CompletionLogprobs,
@@ -82,6 +83,7 @@ class OpenAIServingCompletion:
"""
Create a completion for the given prompt.
"""
+ tracing.trace_set_thread_info("API Server")
if not self._check_master():
err_msg = (
f"Only master node can accept completion request, please send request to master node: {self.master_ip}"
@@ -106,6 +108,8 @@ class OpenAIServingCompletion:
else:
request_id = f"cmpl-{uuid.uuid4()}"
api_server_logger.info(f"Initialize request {request_id}: {request}")
+ tracing.trace_req_start(rid=request_id, trace_content=request.trace_context, role="FastDeploy")
+ del request.trace_context
request_prompt_ids = None
request_prompts = None
@@ -322,6 +326,19 @@ class OpenAIServingCompletion:
aggregated_speculate_metrics[rid] = output_speculate_metrics
if data.get("finished", False):
+ trace_carrier = data.get("trace_carrier")
+ if trace_carrier:
+ tracing.trace_set_proc_propagate_context(request_id, trace_carrier)
+ start_time = data["metrics"]["engine_recv_latest_token_time"]
+ tracing.trace_report_span(
+ tracing.TraceSpanName.POSTPROCESSING,
+ request_id,
+ int(start_time * 1e9),
+ int(time.time() * 1e9),
+ thread_finish_flag=True,
+ )
+ if "trace_carrier" in data:
+ del data["trace_carrier"]
data["output_token_ids"] = output_tokens[rid]
data["outputs"]["top_logprobs"] = aggregated_top_logprobs[rid]
data["outputs"]["draft_top_logprobs"] = aggregated_draft_top_logprobs[rid]
@@ -347,6 +364,7 @@ class OpenAIServingCompletion:
except Exception as e:
api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True)
finally:
+ tracing.trace_req_finish(request_id)
trace_print(LoggingEventName.POSTPROCESSING_END, request_id, getattr(request, "user", ""))
self.engine_client.semaphore.release()
if dealer is not None:
@@ -577,6 +595,19 @@ class OpenAIServingCompletion:
choices = []
if res["finished"]:
+ trace_carrier = res.get("trace_carrier")
+ if trace_carrier:
+ tracing.trace_set_proc_propagate_context(request_id, trace_carrier)
+ start_time = res["metrics"]["engine_recv_latest_token_time"]
+ tracing.trace_report_span(
+ tracing.TraceSpanName.POSTPROCESSING,
+ request_id,
+ int(start_time * 1e9),
+ int(time.time() * 1e9),
+ thread_finish_flag=True,
+ )
+ if "trace_carrier" in res:
+ del res["trace_carrier"]
num_choices -= 1
if getattr(request, "stream_options", None) and request.stream_options.include_usage:
usage_chunk = CompletionStreamResponse(
@@ -607,6 +638,8 @@ class OpenAIServingCompletion:
api_server_logger.error(f"Error in completion_stream_generator: {e}, {str(traceback.format_exc())}")
yield f"data: {ErrorResponse(error=ErrorInfo(message=str(e), code='400', type=ErrorType.INTERNAL_ERROR)).model_dump_json(exclude_unset=True)}\n\n"
finally:
+
+ tracing.trace_req_finish(request_id)
trace_print(LoggingEventName.POSTPROCESSING_END, request_id, getattr(request, "user", ""))
del request
if dealer is not None:
diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py
index 712f9195f6..4ac0b79036 100644
--- a/fastdeploy/envs.py
+++ b/fastdeploy/envs.py
@@ -152,6 +152,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
"FD_HPU_CHUNK_SIZE": lambda: int(os.getenv("FD_HPU_CHUNK_SIZE", "64")),
"FD_PREFILL_WAIT_DECODE_RESOURCE_SECONDS": lambda: int(os.getenv("FD_PREFILL_WAIT_DECODE_RESOURCE_SECONDS", "30")),
"FMQ_CONFIG_JSON": lambda: os.getenv("FMQ_CONFIG_JSON", None),
+ "FD_OTLP_EXPORTER_SCHEDULE_DELAY_MILLIS": lambda: int(os.getenv("FD_OTLP_EXPORTER_SCHEDULE_DELAY_MILLIS", "500")),
+ "FD_OTLP_EXPORTER_MAX_EXPORT_BATCH_SIZE": lambda: int(os.getenv("FD_OTLP_EXPORTER_MAX_EXPORT_BATCH_SIZE", "64")),
"FD_TOKEN_PROCESSOR_HEALTH_TIMEOUT": lambda: int(os.getenv("FD_TOKEN_PROCESSOR_HEALTH_TIMEOUT", "120")),
}
diff --git a/fastdeploy/metrics/trace.py b/fastdeploy/metrics/trace.py
new file mode 100644
index 0000000000..5c60b4e98d
--- /dev/null
+++ b/fastdeploy/metrics/trace.py
@@ -0,0 +1,777 @@
+"""
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+# This file is modified from https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/tracing/trace.py
+
+from __future__ import annotations
+
+import inspect
+import os
+import random
+import threading
+import time
+import uuid
+from dataclasses import dataclass
+from enum import Enum, unique
+from functools import wraps
+from typing import Any, Dict, List, Optional
+
+from fastdeploy import envs
+from fastdeploy.utils import api_server_logger as logger
+
+opentelemetry_imported = False
+tracing_enabled = False
+
+try:
+ from opentelemetry import context, propagate, trace
+ from opentelemetry.sdk.environment_variables import (
+ OTEL_EXPORTER_OTLP_TRACES_PROTOCOL,
+ )
+ from opentelemetry.sdk.resources import Resource
+ from opentelemetry.sdk.trace import SpanProcessor, TracerProvider, id_generator
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter
+
+ opentelemetry_imported = True
+except ImportError as e:
+ print(f"Failed to import opentelemetry, tracing disabled.{e}")
+ logger.error(f"Failed to import opentelemetry, tracing disabled.{e}")
+
+ class id_generator:
+ class IdGenerator:
+ pass
+
+ logger.info("opentelemetry package is not installed, tracing disabled")
+
+
+class FilteringSpanProcessor(SpanProcessor):
+ def __init__(self, exporter: SpanExporter, **kwargs):
+ self._processor = BatchSpanProcessor(exporter, **kwargs)
+
+ def on_start(self, span, parent_context=None):
+ parent_span = trace.get_current_span()
+ if parent_span and parent_span.is_recording():
+ stream_attr = parent_span.attributes.get("stream")
+ if stream_attr is not None:
+ span.set_attribute("stream", stream_attr)
+ self._processor.on_start(span, parent_context)
+
+ def on_end(self, span):
+ # asgi_event_type = span.attributes.get("asgi.event.type")
+ # stream = span.attributes.get("stream")
+ span_name = span.name or ""
+
+ if "http" in span_name:
+ return
+
+ self._processor.on_end(span)
+
+ def shutdown(self):
+ self._processor.shutdown()
+
+ def force_flush(self, timeout_millis=None):
+ self._processor.force_flush(timeout_millis)
+
+
+def label_span(request):
+ if request.stream:
+ span = trace.get_current_span()
+ if span is not None and span.is_recording():
+ span.set_attribute("stream", "true")
+
+
+@dataclass
+class TraceThreadInfo:
+ host_id: str
+ pid: int
+ thread_label: str
+ tp_rank: int
+ dp_rank: int
+ tracer: trace.Tracer
+
+
+@dataclass
+class TraceSliceContext:
+ slice_name: str
+ span: Optional[trace.span.Span] = None
+ # When True, defers slice_name assignment until trace_slice_end()
+ anonymous: bool = False
+
+
+@dataclass
+class TraceThreadContext:
+ thread_info: TraceThreadInfo
+ cur_slice_stack: List[TraceSliceContext]
+ thread_span: Optional[trace.span.Span] = None
+ # Record the most recently completed span as the previous span for the next span to be created.
+ last_span_context: Optional[trace.span.SpanContext] = None
+
+
+@dataclass
+class TraceReqContext:
+ rid: str
+ start_time_ns: int
+ threads_context: Dict[int, TraceThreadContext]
+
+ # Indicates whether this instance is a replica from the main process.
+ # When True, root_span is None and only root_span_context is preserved.
+ is_copy: bool = False
+ root_span: Optional[trace.span.Span] = None
+ root_span_context: Optional[context.Context] = None
+
+
+@dataclass
+class TracePropagateContext:
+ root_span_context: context.Context
+ prev_span_context: Optional[trace.span.SpanContext]
+
+ def to_dict(self):
+ carrier: dict[str, str] = {}
+ propagate.inject(carrier, context=self.root_span_context)
+
+ if self.prev_span_context:
+ return {
+ "root_span": carrier,
+ "prev_span": {
+ "span_id": self.prev_span_context.span_id,
+ "trace_id": self.prev_span_context.trace_id,
+ },
+ }
+ else:
+ return {"root_span": carrier, "prev_span": "None"}
+
+ @classmethod
+ def instance_from_dict(cls, d):
+ if "root_span" not in d or "prev_span" not in d:
+ return None
+
+ carrier = d["root_span"]
+ root_span_context = propagate.extract(carrier)
+
+ if d["prev_span"] == "None":
+ prev_span_context = None
+ else:
+ prev_span_context = trace.span.SpanContext(
+ trace_id=d["prev_span"]["trace_id"],
+ span_id=d["prev_span"]["span_id"],
+ is_remote=True,
+ )
+
+ return cls(root_span_context, prev_span_context)
+
+
+class TraceCustomIdGenerator(id_generator.IdGenerator):
+ """
+ The default IdGenerator may produce duplicate trace IDs across multiple TP scheduler processes,
+ hence a custom IdGenerator is implemented.
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.local_random = random.Random()
+ self.local_random.seed(time.time())
+
+ def generate_trace_id(self) -> int:
+ return self.local_random.getrandbits(64)
+
+ def generate_span_id(self) -> int:
+ return self.local_random.getrandbits(64)
+
+
+# global variables
+remote_trace_contexts: Dict[str, TracePropagateContext] = {}
+threads_info: Dict[int, TraceThreadInfo] = {}
+reqs_context: Dict[str, TraceReqContext] = {}
+
+__get_cur_time_ns = lambda: int(time.time() * 1e9)
+
+
+def __get_host_id() -> str:
+ """
+ In distributed tracing systems, obtain a unique node identifier
+ and inject it into all subsequently generated spans
+ to prevent PID conflicts between threads on different nodes.
+ """
+ if envs.FD_HOST_NAME:
+ return envs.FD_HOST_NAME
+ paths = ["/etc/machine-id", "/var/lib/dbus/machine-id"]
+ for path in paths:
+ try:
+ with open(path, "r") as f:
+ val = f.read().strip()
+ if val:
+ return val
+ except Exception:
+ continue
+
+ mac = uuid.getnode()
+ if mac != 0:
+ return uuid.UUID(int=mac).hex
+
+ try:
+ unique_id = uuid.uuid4().hex + "-" + str(os.getpid())
+ return unique_id
+ except Exception:
+ return "unknown"
+
+
+# Should be called by each tracked process.
+def process_tracing_init():
+ global tracing_enabled
+ global __get_cur_time_ns
+ tracing_enabled = envs.TRACES_ENABLE.lower() == "true"
+
+ if not tracing_enabled:
+ logger.warning("Opentelemetry is DISABLED.")
+ return
+
+ if not opentelemetry_imported:
+ tracing_enabled = False
+ return
+
+ try:
+ # --- read env ---
+ service_name = envs.FD_SERVICE_NAME
+ host_name = envs.FD_HOST_NAME
+ resource_attributes = {"service.name": service_name}
+ if host_name:
+ resource_attributes["host.name"] = host_name
+ resource = Resource(attributes=resource_attributes)
+ endpoint = envs.EXPORTER_OTLP_ENDPOINT
+ headers = envs.EXPORTER_OTLP_HEADERS
+ headers = dict(item.split("=") for item in headers.split(",")) if headers else None
+
+ otlp_exporter = get_otlp_span_exporter(endpoint, headers)
+
+ schedule_delay_millis = envs.FD_OTLP_EXPORTER_SCHEDULE_DELAY_MILLIS
+ max_export_batch_size = envs.FD_OTLP_EXPORTER_MAX_EXPORT_BATCH_SIZE
+ processor = FilteringSpanProcessor(
+ otlp_exporter,
+ schedule_delay_millis=schedule_delay_millis,
+ max_export_batch_size=max_export_batch_size,
+ )
+ tracer_provider = TracerProvider(resource=resource, id_generator=TraceCustomIdGenerator())
+
+ tracer_provider.add_span_processor(processor)
+ # tracer_provider.add_span_processor(
+ # SimpleSpanProcessor(ConsoleSpanExporter())
+ # )
+ trace.set_tracer_provider(tracer_provider)
+ except Exception as e:
+ logger.error(f": initialize opentelemetry error:{e}")
+ logger.warning("please set correct otlp endpoint")
+ tracing_enabled = False
+ return
+
+ if hasattr(time, "time_ns"):
+ __get_cur_time_ns = lambda: int(time.time_ns())
+
+ tracing_enabled = True
+
+
+def get_otlp_span_exporter(endpoint, headers):
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
+ OTLPSpanExporter as GRPCSpanExporter,
+ )
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
+ OTLPSpanExporter as HTTPSpanExporter,
+ )
+
+ protocol = os.environ.get(OTEL_EXPORTER_OTLP_TRACES_PROTOCOL, "grpc")
+ supported_protocols = {"grpc", "http/protobuf"}
+
+ if protocol not in supported_protocols:
+ raise ValueError(
+ f"Unsupported OTLP protocol '{protocol}' configured. "
+ f"Supported protocols are: {', '.join(sorted(supported_protocols))}"
+ )
+
+ if protocol == "grpc":
+ return GRPCSpanExporter(endpoint=endpoint, insecure=True)
+ elif protocol == "http/protobuf":
+ return HTTPSpanExporter(endpoint=endpoint, headers=headers)
+
+
+# Should be called by each tracked thread.
+def trace_set_thread_info(thread_label: str, tp_rank: Optional[int] = None, dp_rank: Optional[int] = None):
+ if not tracing_enabled:
+ return
+
+ pid = threading.get_native_id()
+ if pid in threads_info:
+ return
+
+ threads_info[pid] = TraceThreadInfo(
+ host_id=__get_host_id(),
+ pid=pid,
+ thread_label=thread_label,
+ tp_rank=tp_rank,
+ dp_rank=dp_rank,
+ tracer=trace.get_tracer("fastdeploy server"),
+ )
+
+
+def __create_thread_context(pid, req_span_context, ts: Optional[int] = None):
+ if pid not in threads_info:
+ trace_set_thread_info("unknown")
+
+ thread_info = threads_info[pid]
+ thread_context = TraceThreadContext(
+ thread_info=thread_info,
+ cur_slice_stack=[],
+ )
+
+ thread_name = f"{thread_info.thread_label}"
+ if thread_info.tp_rank is not None:
+ thread_name += f" [TP {thread_info.tp_rank}] "
+ thread_name += f"(host:{thread_info.host_id} | pid:{pid})"
+ ts = ts or __get_cur_time_ns()
+ thread_context.thread_span = thread_context.thread_info.tracer.start_span(
+ name=thread_name,
+ start_time=ts,
+ context=req_span_context,
+ )
+
+ if thread_info.tp_rank is not None:
+ thread_context.thread_span.set_attributes({"tp_rank": thread_info.tp_rank})
+
+ thread_context.thread_span.set_attributes(
+ {
+ "host_id": thread_info.host_id,
+ "pid": thread_info.pid,
+ "thread_label": thread_info.thread_label,
+ }
+ )
+
+ return thread_context
+
+
+def trace_get_proc_propagate_context(rid) -> Optional[Dict[str, Any]]:
+ if not tracing_enabled:
+ return None
+
+ rid = str(rid)
+ if rid not in reqs_context or not reqs_context[rid].root_span_context:
+ return None
+
+ pid = threading.get_native_id()
+ prev_span_context = None
+ thread_context = reqs_context[rid].threads_context[pid]
+ if thread_context.cur_slice_stack:
+ cur_slice_info = thread_context.cur_slice_stack[0]
+ prev_span_context = cur_slice_info.span.get_span_context()
+ elif thread_context.last_span_context:
+ prev_span_context = thread_context.last_span_context
+
+ root_span_context = reqs_context[rid].root_span_context
+
+ trace_context = TracePropagateContext(root_span_context, prev_span_context)
+ return trace_context.to_dict()
+
+
+def trace_set_proc_propagate_context(rid, trace_context: Optional[Dict[str, Any]], ts: Optional[int] = None):
+ if not tracing_enabled:
+ return
+ if not trace_context:
+ return
+
+ trace_context = TracePropagateContext.instance_from_dict(trace_context)
+ if not trace_context:
+ return
+
+ rid = str(rid)
+ # Create a copy of the request context
+ if rid not in reqs_context:
+ reqs_context[rid] = TraceReqContext(
+ rid=rid,
+ start_time_ns=ts or __get_cur_time_ns(),
+ threads_context={},
+ root_span_context=trace_context.root_span_context,
+ is_copy=True,
+ )
+
+ pid = threading.get_native_id()
+
+ if pid in reqs_context[rid].threads_context:
+ return
+
+ # Create new thread context.
+ reqs_context[rid].threads_context[pid] = __create_thread_context(
+ pid,
+ trace_context.root_span_context,
+ reqs_context[rid].start_time_ns,
+ )
+
+ reqs_context[rid].threads_context[pid].last_span_context = trace_context.prev_span_context
+
+
+def trace_req_start(
+ rid: str,
+ trace_content: str,
+ ts: Optional[int] = None,
+ role: Optional[str] = "null",
+):
+ if not tracing_enabled:
+ return
+
+ rid = str(rid)
+
+ ts = ts or __get_cur_time_ns()
+
+ pid = threading.get_native_id()
+ if pid not in threads_info:
+ return
+
+ tracer = threads_info[pid].tracer
+
+ upstream_context = trace_content
+
+ # 1. Check if there is already an active Span (from FastAPI Instrumentor)
+ active_span = trace.get_current_span()
+ if active_span is not None and active_span.is_recording():
+ active_span.set_attribute("rid", rid)
+ new_span_name = active_span.name + f" (Req: {rid})"
+ active_span.update_name(new_span_name)
+
+ active_span_context = active_span.get_span_context()
+
+ if active_span_context.is_valid and active_span_context.trace_id != 0:
+ # Scenario: FastAPIInstrumentor has created the top-level Span
+
+ if rid in reqs_context:
+ return
+
+ logger.info(f"Using existing active span from context as root for RID: {rid}")
+
+ # Inject the FastAPI Span Context as the root Span Context into the internal structure
+ reqs_context[rid] = TraceReqContext(
+ rid=rid,
+ start_time_ns=ts,
+ threads_context={},
+ root_span=active_span,
+ root_span_context=context.get_current(),
+ is_copy=True,
+ )
+ # Thread context is necessary so that trace_slice_start can find the tracer
+ if pid not in reqs_context[rid].threads_context:
+ reqs_context[rid].threads_context[pid] = __create_thread_context(
+ pid,
+ context.get_current(),
+ ts,
+ )
+ # No need to manually end req/bootstrap room span, this is handled by FastAPIInstrumentor
+ return
+
+ parent_context = None
+
+ use_upstream = False
+ if upstream_context:
+ ctx_span = trace.get_current_span(upstream_context)
+ if ctx_span.get_span_context().is_valid:
+ use_upstream = True
+
+ if use_upstream:
+ logger.info(f"Continuing upstream trace for RID={rid}")
+ parent_context = upstream_context
+
+ reqs_context[rid] = TraceReqContext(
+ rid=rid,
+ start_time_ns=ts,
+ threads_context={},
+ is_copy=True,
+ )
+
+ else:
+ reqs_context[rid] = TraceReqContext(
+ rid=rid,
+ start_time_ns=ts,
+ threads_context={},
+ is_copy=False,
+ )
+
+ orig_rid = rid.split("_")[0]
+ role = "" if role == "null" else role
+ attrs = {"rid": orig_rid}
+
+ root_span = tracer.start_span(
+ name=f"{role} Req {orig_rid}".strip(),
+ start_time=ts,
+ context=parent_context,
+ kind=trace.SpanKind.SERVER,
+ attributes=attrs,
+ )
+
+ root_span.set_attributes(
+ {
+ "rid": rid,
+ }
+ )
+
+ # Consistently populate the Root Span information in reqs_context
+ reqs_context[rid].root_span = root_span
+ reqs_context[rid].root_span_context = trace.set_span_in_context(root_span)
+
+ # create thread context and thread span
+ reqs_context[rid].threads_context[pid] = __create_thread_context(
+ pid,
+ reqs_context[rid].root_span_context,
+ ts,
+ )
+
+
+def trace_req_finish(rid: str, ts: Optional[int] = None, attrs: Optional[Dict[str, Any]] = None):
+ if not tracing_enabled:
+ return
+
+ rid = str(rid)
+ if rid not in reqs_context:
+ return
+
+ req_context = reqs_context[rid]
+ ts = ts or __get_cur_time_ns()
+
+ # End all unclosed thread spans.
+ for thread_context in req_context.threads_context.values():
+ thread_context.thread_span.end(end_time=ts)
+
+ # Only end the root_span if it was manually created
+ if req_context.root_span:
+ if attrs:
+ req_context.root_span.set_attributes(attrs)
+ req_context.root_span.end(end_time=ts)
+
+ del reqs_context[rid]
+
+
+def trace_slice_start(
+ name: str,
+ rid: str,
+ ts: Optional[int] = None,
+ anonymous: bool = False,
+):
+ if not tracing_enabled:
+ return
+
+ rid = str(rid)
+ if rid not in reqs_context:
+ return
+
+ pid = threading.get_native_id()
+ if pid not in reqs_context[rid].threads_context:
+ return
+
+ thread_context = reqs_context[rid].threads_context[pid]
+
+ ts = ts or __get_cur_time_ns()
+
+ slice_info = TraceSliceContext(
+ slice_name=name,
+ anonymous=anonymous,
+ )
+
+ # find prev slice
+ prev_span_context = None
+ if not thread_context.cur_slice_stack:
+ if thread_context.last_span_context:
+ prev_span_context = thread_context.last_span_context
+
+ parent_span = thread_context.thread_span
+ if thread_context.cur_slice_stack:
+ parent_span = thread_context.cur_slice_stack[-1].span
+
+ parent_span_context = trace.set_span_in_context(parent_span)
+ span = thread_context.thread_info.tracer.start_span(
+ name=slice_info.slice_name,
+ start_time=ts,
+ context=parent_span_context,
+ )
+
+ if prev_span_context:
+ span.add_link(prev_span_context)
+
+ slice_info.span = span
+
+ thread_context.cur_slice_stack.append(slice_info)
+
+
+def trace_slice_end(
+ name: str,
+ rid: str,
+ ts: Optional[int] = None,
+ attrs: Optional[Dict[str, Any]] = None,
+ auto_next_anon: bool = False,
+ thread_finish_flag: bool = False,
+):
+ if not tracing_enabled:
+ return
+
+ rid = str(rid)
+ if rid not in reqs_context:
+ return
+
+ pid = threading.get_native_id()
+ if pid not in reqs_context[rid].threads_context:
+ return
+
+ thread_context = reqs_context[rid].threads_context[pid]
+
+ if not thread_context.cur_slice_stack:
+ logger.warning(f"No matching with the SLICE_START event{name} is required.")
+ return
+
+ ts = ts or __get_cur_time_ns()
+ slice_info = thread_context.cur_slice_stack[-1]
+ span = slice_info.span
+
+ if slice_info.anonymous:
+ span.update_name(name)
+ else:
+ span = slice_info.span
+ if slice_info.slice_name != name:
+ span.set_status(trace.Status(trace.StatusCode.ERROR))
+ logger.warning(f"Slice name mismatch: {name} != {slice_info.slice_name}")
+
+ if attrs:
+ span.set_attributes(attrs)
+
+ span.end(end_time=ts)
+
+ thread_context.cur_slice_stack.pop()
+ if len(thread_context.cur_slice_stack) == 0:
+ thread_context.last_span_context = span.get_span_context()
+
+ # If this is the last slice in the thread,
+ # release the thread context and check whether to release the request context.
+ if thread_finish_flag:
+ thread_context.thread_span.end(end_time=ts)
+ del reqs_context[rid].threads_context[pid]
+ if reqs_context[rid].is_copy and not reqs_context[rid].threads_context:
+ del reqs_context[rid]
+ return
+
+ if auto_next_anon:
+ trace_slice_start("", rid, ts, True)
+
+
+# alias
+trace_slice = trace_slice_end
+
+
+def trace_report_span(
+ name: str,
+ rid: str,
+ start_time_ns: int,
+ end_time_ns: int,
+ attrs: Dict[str, Any] = None,
+ thread_finish_flag: bool = False,
+):
+ if not tracing_enabled:
+ return
+ trace_slice_start(name, rid, start_time_ns)
+ trace_slice_end(name, rid, end_time_ns, attrs, False, thread_finish_flag)
+
+
+# Add event to the current slice on the same thread with the same rid.
+def trace_event(name: str, rid: str, ts: Optional[int] = None, attrs: Dict[str, Any] = None):
+ if not tracing_enabled:
+ return
+
+ rid = str(rid)
+ if rid not in reqs_context:
+ return
+
+ pid = threading.get_native_id()
+ if pid not in reqs_context[rid].threads_context:
+ return
+
+ thread_context = reqs_context[rid].threads_context[pid]
+
+ if not thread_context.cur_slice_stack:
+ logger.warning("No slice is currently being traced.")
+ return
+
+ ts = ts or __get_cur_time_ns()
+
+ slice_info = thread_context.cur_slice_stack[-1]
+ slice_info.span.add_event(name=name, timestamp=ts, attributes=attrs)
+
+
+# Add attrs to the current slice on the same thread with the same rid.
+def trace_slice_add_attr(rid: str, attrs: Dict[str, Any]):
+ if not tracing_enabled:
+ return
+
+ rid = str(rid)
+ if rid not in reqs_context:
+ return
+
+ pid = threading.get_native_id()
+ if pid not in reqs_context[rid].threads_context:
+ return
+
+ thread_context = reqs_context[rid].threads_context[pid]
+
+ if not thread_context.cur_slice_stack:
+ logger.warning("No slice is currently being traced.")
+ return
+
+ slice_info = thread_context.cur_slice_stack[-1]
+ slice_info.span.set_attributes(attrs)
+
+
+def trace_span(span_name: str = None):
+
+ def decorator(func):
+ if not tracing_enabled:
+ return func
+
+ pid = threading.get_native_id()
+ if pid not in threads_info:
+ trace_set_thread_info("FastDeploy")
+
+ tracer = threads_info[pid].tracer
+
+ name = span_name or func.__name__
+
+ if inspect.iscoroutinefunction(func):
+
+ @wraps(func)
+ async def async_wrapper(*args, **kwargs):
+ with tracer.start_as_current_span(name):
+ return await func(*args, **kwargs)
+
+ return async_wrapper
+
+ else:
+
+ @wraps(func)
+ def sync_wrapper(*args, **kwargs):
+ with tracer.start_as_current_span(name):
+ return func(*args, **kwargs)
+
+ return sync_wrapper
+
+ return decorator
+
+
+@unique
+class TraceSpanName(str, Enum):
+
+ FASTDEPLOY = "FASTDEPLOY"
+ PREPROCESSING = "PREPROCESSING"
+ SCHEDULE = "SCHEDULE"
+ PREFILL = "PREFILL"
+ DECODE = "DECODE"
+ POSTPROCESSING = "POSTPROCESSING"
diff --git a/fastdeploy/metrics/trace_util.py b/fastdeploy/metrics/trace_util.py
deleted file mode 100644
index 111c2c8534..0000000000
--- a/fastdeploy/metrics/trace_util.py
+++ /dev/null
@@ -1,262 +0,0 @@
-import json
-import os
-
-from fastapi import FastAPI
-from opentelemetry import trace
-from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
-from opentelemetry.instrumentation.logging import LoggingInstrumentor
-from opentelemetry.propagate import extract, inject
-from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace import SpanProcessor, TracerProvider
-from opentelemetry.sdk.trace.export import (
- BatchSpanProcessor,
- ConsoleSpanExporter,
- SpanExporter,
-)
-
-from fastdeploy import envs
-from fastdeploy.utils import llm_logger
-
-# OpenTelemetry Trace context store in metadata
-TRACE_CARRIER = "trace_carrier"
-
-traces_enable = False
-tracer = trace.get_tracer(__name__)
-
-
-class FilteringSpanProcessor(SpanProcessor):
- def __init__(self, exporter: SpanExporter):
- self._processor = BatchSpanProcessor(exporter)
-
- # 父span属性继承逻辑
- def on_start(self, span, parent_context=None):
- parent_span = trace.get_current_span()
- if parent_span and parent_span.is_recording():
- stream_attr = parent_span.attributes.get("stream")
- if stream_attr is not None:
- span.set_attribute("stream", stream_attr)
- self._processor.on_start(span, parent_context)
-
- # span导出时的过滤逻辑
- def on_end(self, span):
- asgi_event_type = span.attributes.get("asgi.event.type")
- stream = span.attributes.get("stream")
- span_name = span.name or ""
-
- if stream and asgi_event_type == "http.response.body" and "http send" in span_name:
- return
-
- self._processor.on_end(span)
-
- def shutdown(self):
- self._processor.shutdown()
-
- def force_flush(self, timeout_millis=None):
- self._processor.force_flush(timeout_millis)
-
-
-# 标记函数
-def lable_span(request):
- if request.stream:
- span = trace.get_current_span()
- if span is not None and span.is_recording():
- span.set_attribute("stream", "true")
-
-
-def set_up():
- try:
- # when TRACES_ENABLED=true start trace
- global traces_enable
- traces_enable = envs.TRACES_ENABLE.lower() == "true"
- if not traces_enable:
- llm_logger.warning("Opentelemetry is DISABLED.")
- return
-
- llm_logger.info("Opentelemetry is ENABLED, configuring...")
- # --- read env ---
- service_name = envs.FD_SERVICE_NAME
- host_name = envs.FD_HOST_NAME
- # --- set attributes (Service Name, Host Name, etc.) ---
- resource_attributes = {"service.name": service_name}
- if host_name:
- resource_attributes["host.name"] = host_name
-
- resource = Resource(attributes=resource_attributes)
-
- # --- set Exporter ---
- exporter_type = envs.TRACES_EXPORTER.lower()
- if exporter_type == "otlp":
- endpoint = envs.EXPORTER_OTLP_ENDPOINT # should be set
- headers = envs.EXPORTER_OTLP_HEADERS # e.g., "Authentication=***,k2=v2"
-
- otlp_exporter = OTLPSpanExporter(
- endpoint=endpoint,
- headers=(dict(item.split("=") for item in headers.split(",")) if headers else None),
- )
- processor = FilteringSpanProcessor(otlp_exporter)
- llm_logger.info(f"Using OTLP Exporter, sending to {endpoint} with headers {headers}")
- else: # default console
- processor = FilteringSpanProcessor(ConsoleSpanExporter())
- llm_logger.info("Using Console Exporter.")
-
- # --- set Tracer Provider ---
- provider = TracerProvider(resource=resource)
- provider.add_span_processor(processor)
- trace.set_tracer_provider(provider)
- global tracer
- tracer = trace.get_tracer(__name__)
- except:
- llm_logger.error("set_up failed")
- pass
-
-
-def instrument(app: FastAPI):
- try:
- set_up()
- if traces_enable:
- llm_logger.info("Applying instrumentors...")
- FastAPIInstrumentor.instrument_app(app)
- try:
- LoggingInstrumentor().instrument(set_logging_format=True)
- except Exception:
- pass
- except:
- llm_logger.info("instrument failed")
- pass
-
-
-def inject_to_metadata(request, metadata_attr="metadata"):
- """
- Inject OpenTelemetry trace context into the metadata field of the request.
-
- Parameters:
- request: can be a dict or object, with metadata attributes or fields.
- metadata_attr: the field name of metadata, default is 'metadata'.
-
- Operation:
- - If metadata does not exist, create a new one and mount it on the request.
- - Inject the current trace context as a JSON string and store it in metadata.
- - Use the key TRACE_CARRIER to store the injected content.
-
- Note:
- - This function is a non-blocking operation, and errors are silently ignored.
- - If there is no metadata attribute in the request, an empty dict will be created for it as its attribute
- """
- try:
- if request is None or not traces_enable:
- return
-
- metadata = request.get(metadata_attr) if isinstance(request, dict) else getattr(request, metadata_attr, None)
- if metadata is None:
- metadata = {}
- if isinstance(request, dict):
- request[metadata_attr] = metadata
- else:
- setattr(request, metadata_attr, metadata)
-
- trace_carrier = {}
- inject(trace_carrier)
- trace_carrier_json_string = json.dumps(trace_carrier)
- metadata[TRACE_CARRIER] = trace_carrier_json_string
- except:
- pass
-
-
-def extract_from_metadata(request, metadata_attr="metadata"):
- """
- Extract trace context from metadata of request object (dict or class instance).
-
- Parameters:
- request: can be a dictionary or any object, containing metadata attributes or fields.
- metadata_attr: metadata field name, default is 'metadata'.
-
- Returns:
- - Extraction success: returns OpenTelemetry context object (Context)
- - Extraction failure or exception: returns None
- """
- try:
- metadata = request.get(metadata_attr) if isinstance(request, dict) else getattr(request, metadata_attr, None)
- if metadata is None:
- return None
-
- trace_carrier_json_string = metadata.get(TRACE_CARRIER)
- if trace_carrier_json_string is None:
- return None
-
- trace_carrier = json.loads(trace_carrier_json_string)
- ctx = extract(trace_carrier)
- return ctx
- except:
- return None
-
-
-def extract_from_request(request):
- """
- Extract trace context from trace_carrier of request object (dict or class instance).
-
- Parameters:
- request: can be a dictionary or any object, containing metadata attributes or fields.
- metadata_attr: metadata field name, default is 'metadata'.
-
- Returns:
- - Extraction success: returns OpenTelemetry context object (Context)
- - Extraction failure or exception: returns None
- """
- try:
- trace_carrier_info = getattr(request, TRACE_CARRIER, None)
-
- if trace_carrier_info is None:
- return None
-
- trace_carrier = json.loads(trace_carrier_info)
- ctx = extract(trace_carrier)
- return ctx
- except:
- return None
-
-
-def start_span(span_name, request, kind=trace.SpanKind.CLIENT):
- """
- just start a new span in request trace context
- """
- try:
- if not traces_enable:
- return
- # extract Trace context from request.metadata.trace_carrier
- ctx = extract_from_metadata(request)
- with tracer.start_as_current_span(span_name, context=ctx, kind=kind) as span:
- span.set_attribute("job_id", os.getenv("FD_JOB_ID", default="null"))
- pass
- except:
- pass
-
-
-def fd_start_span(span_name, kind=trace.SpanKind.CLIENT):
- """
- when fd start, start a new span show start success
- """
- try:
- if not traces_enable:
- return
- with tracer.start_as_current_span(span_name, kind=kind) as span:
- span.set_attribute("job_id", os.getenv("FD_JOB_ID", default="null"))
- pass
- except:
- pass
-
-
-def start_span_request(span_name, request, kind=trace.SpanKind.CLIENT):
- """
- just start a new span in request trace context
- """
- try:
- if not traces_enable:
- return
- # extract Trace context from request.metadata.trace_carrier
- ctx = extract_from_request(request)
- with tracer.start_as_current_span(span_name, context=ctx, kind=kind) as span:
- span.set_attribute("job_id", os.getenv("FD_JOB_ID", default="null"))
- pass
- except:
- pass
diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py
index 03e00a0a36..f5aeb7277c 100644
--- a/fastdeploy/output/token_processor.py
+++ b/fastdeploy/output/token_processor.py
@@ -27,6 +27,7 @@ import numpy as np
import paddle
import zmq
+import fastdeploy.metrics.trace as tracing
from fastdeploy import envs
from fastdeploy.engine.request import (
CompletionOutput,
@@ -361,6 +362,7 @@ class TokenProcessor:
"""
read tokens from paddle inference engine and process
"""
+ tracing.trace_set_thread_info("Token Processor")
if current_platform.is_xpu():
from fastdeploy.model_executor.ops.xpu import (
@@ -704,6 +706,12 @@ class TokenProcessor:
is_prefill = task.disaggregate_info is not None and self.cfg.scheduler_config.splitwise_role == "prefill"
is_decode = task.disaggregate_info is not None and self.cfg.scheduler_config.splitwise_role == "decode"
+ rid = task_id.split("_")[0]
+ trace_carrier = task.trace_carrier
+ metrics = task.metrics
+ t = metrics.inference_start_time
+ ts = int(t * 1_000_000_000) if t is not None else 0
+ tracing.trace_set_proc_propagate_context(rid, trace_carrier, ts)
if self.cfg.speculative_config.method:
self._record_speculative_decoding_accept_num_per_request(task_id, accept_num[i])
if accept_num[i] == -3:
@@ -748,11 +756,21 @@ class TokenProcessor:
self.total_step += 1
current_time = time.time()
+ trace_carrier = None
if self.tokens_counter[task_id] == 0:
task.metrics.record_recv_first_token()
task.metrics.cal_cost_time()
metrics = copy.copy(task.metrics)
self._record_first_token_metrics(task, current_time)
+
+ tracing.trace_report_span(
+ name=tracing.TraceSpanName.PREFILL,
+ rid=rid,
+ start_time_ns=int(task.metrics.inference_start_time * 1e9),
+ end_time_ns=int(time.time() * 1e9),
+ thread_finish_flag=False,
+ )
+
else:
task.metrics.record_recv_token()
if self.tokens_counter[task_id] == 1 and self.cfg.scheduler_config.splitwise_role == "decode":
@@ -774,6 +792,7 @@ class TokenProcessor:
metrics=metrics,
ic_req_data=task.ic_req_data,
prompt_token_ids_len=task.prompt_token_ids_len,
+ trace_carrier=trace_carrier,
)
if self.tokens_counter[task_id] == 0:
if task.messages is not None:
@@ -830,6 +849,15 @@ class TokenProcessor:
if token_id in task.eos_token_ids or is_prefill or recovery_stop:
result.finished = True
+ trace_carrier = tracing.trace_get_proc_propagate_context(rid=rid)
+ result.trace_carrier = trace_carrier
+ tracing.trace_report_span(
+ name=tracing.TraceSpanName.DECODE,
+ rid=rid,
+ start_time_ns=int(task.metrics.inference_start_time * 1e9),
+ end_time_ns=int(time.time() * 1e9),
+ thread_finish_flag=True,
+ )
if recovery_stop:
result.error_msg = "Recover is not supported, the result is incomplete!"
llm_logger.info(
diff --git a/mkdocs.yml b/mkdocs.yml
index 4a97f7f1ba..ac4c81f85a 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -66,6 +66,8 @@ plugins:
Scheduler: 调度器
Graceful Shutdown: 服务优雅关闭
Offline Inference: 离线推理
+ Observability: 可观测性
+ Trace: Trace服务
CLI: CLI 使用说明
Chat: Chat命令
Complete: Complete命令
@@ -173,3 +175,5 @@ nav:
- Bench: cli/bench.md
- Run Batch: cli/run-batch.md
- Tokenizer: cli/tokenizer.md
+ - Observability:
+ - Trace: observability/trace.md
diff --git a/tests/metrics/test_trace.py b/tests/metrics/test_trace.py
new file mode 100644
index 0000000000..29664b582f
--- /dev/null
+++ b/tests/metrics/test_trace.py
@@ -0,0 +1,1269 @@
+"""
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import os
+import threading
+import time
+import unittest
+from unittest import mock
+from unittest.mock import MagicMock, patch
+
+import pytest
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+
+from fastdeploy.metrics import trace
+from fastdeploy.metrics.trace import FilteringSpanProcessor, label_span
+
+
+class TestFilteringSpanProcessor(unittest.TestCase):
+ """Test cases for FilteringSpanProcessor class"""
+
+ def setUp(self):
+ """Set up test fixtures"""
+ self.exporter = ConsoleSpanExporter()
+ self.processor = FilteringSpanProcessor(self.exporter)
+
+ def test_initialization(self):
+ """Test that FilteringSpanProcessor is properly initialized"""
+ self.assertIsInstance(self.processor._processor, BatchSpanProcessor)
+ self.assertEqual(self.processor._processor.span_exporter, self.exporter)
+
+ def test_on_start_with_parent_span(self):
+ """Test on_start method with parent span containing stream attribute"""
+ # Mock span and parent context
+ mock_span = MagicMock()
+ mock_parent_span = MagicMock()
+ mock_parent_span.is_recording.return_value = True
+ mock_parent_span.attributes.get.return_value = "test_stream"
+
+ # Mock trace.get_current_span to return parent span
+ with patch("fastdeploy.metrics.trace.trace.get_current_span", return_value=mock_parent_span):
+ with patch.object(self.processor._processor, "on_start") as mock_parent_on_start:
+ self.processor.on_start(mock_span, parent_context=None)
+
+ # Verify stream attribute is set on child span
+ mock_span.set_attribute.assert_called_once_with("stream", "test_stream")
+ mock_parent_on_start.assert_called_once_with(mock_span, None)
+
+ def test_on_start_without_parent_span(self):
+ """Test on_start method without parent span"""
+ mock_span = MagicMock()
+
+ # Mock trace.get_current_span to return None
+ with patch("fastdeploy.metrics.trace.trace.get_current_span", return_value=None):
+ with patch.object(self.processor._processor, "on_start") as mock_parent_on_start:
+ self.processor.on_start(mock_span, parent_context=None)
+
+ # Verify no attributes are set
+ mock_span.set_attribute.assert_not_called()
+ mock_parent_on_start.assert_called_once_with(mock_span, None)
+
+ def test_on_start_with_non_recording_parent_span(self):
+ """Test on_start method with non-recording parent span"""
+ mock_span = MagicMock()
+ mock_parent_span = MagicMock()
+ mock_parent_span.is_recording.return_value = False
+
+ with patch("fastdeploy.metrics.trace.trace.get_current_span", return_value=mock_parent_span):
+ with patch.object(self.processor._processor, "on_start") as mock_parent_on_start:
+ self.processor.on_start(mock_span, parent_context=None)
+
+ # Verify no attributes are set
+ mock_span.set_attribute.assert_not_called()
+ mock_parent_on_start.assert_called_once_with(mock_span, None)
+
+ def test_on_end_filter_stream_http_response(self):
+ """Test on_end method filters out stream http response spans"""
+ mock_span = MagicMock()
+ mock_span.attributes.get.side_effect = lambda key: {
+ "asgi.event.type": "http.response.body",
+ "stream": "true",
+ }.get(key)
+ mock_span.name = "http send request"
+
+ with patch.object(self.processor._processor, "on_end") as mock_parent_on_end:
+ self.processor.on_end(mock_span)
+
+ # Verify parent on_end is NOT called (span is filtered out)
+ mock_parent_on_end.assert_not_called()
+
+ def test_on_end_keep_spans_without_http_send(self):
+ """Test on_end method keeps spans without 'http send' in name"""
+ mock_span = MagicMock()
+ mock_span.attributes.get.side_effect = lambda key: {
+ "asgi.event.type": "http.response.body",
+ "stream": "true",
+ }.get(key)
+ mock_span.name = "other operation"
+
+ with patch.object(self.processor._processor, "on_end") as mock_parent_on_end:
+ self.processor.on_end(mock_span)
+
+ # Verify parent on_end is called
+ mock_parent_on_end.assert_called_once_with(mock_span)
+
+ def test_shutdown(self):
+ """Test shutdown method"""
+ with patch.object(self.processor._processor, "shutdown") as mock_shutdown:
+ self.processor.shutdown()
+ mock_shutdown.assert_called_once()
+
+ def test_force_flush(self):
+ """Test force_flush method"""
+ with patch.object(self.processor._processor, "force_flush") as mock_force_flush:
+ self.processor.force_flush(timeout_millis=5000)
+ mock_force_flush.assert_called_once_with(5000)
+
+
+class TestLableSpan(unittest.TestCase):
+ """Test cases for label_span function"""
+
+ def test_lable_span_with_stream_request(self):
+ """Test label_span function with streaming request"""
+ mock_request = MagicMock()
+ mock_request.stream = True
+
+ mock_span = MagicMock()
+ mock_span.is_recording.return_value = True
+
+ with patch("fastdeploy.metrics.trace.trace.get_current_span", return_value=mock_span):
+ label_span(mock_request)
+
+ # Verify stream attribute is set
+ mock_span.set_attribute.assert_called_once_with("stream", "true")
+
+ def test_lable_span_without_stream_request(self):
+ """Test label_span function with non-streaming request"""
+ mock_request = MagicMock()
+ mock_request.stream = False
+
+ mock_span = MagicMock()
+ mock_span.is_recording.return_value = True
+
+ with patch("fastdeploy.metrics.trace.trace.get_current_span", return_value=mock_span):
+ label_span(mock_request)
+
+ # Verify no attributes are set
+ mock_span.set_attribute.assert_not_called()
+
+ def test_lable_span_without_current_span(self):
+ """Test label_span function when no current span exists"""
+ mock_request = MagicMock()
+ mock_request.stream = True
+
+ with patch("fastdeploy.metrics.trace.trace.get_current_span", return_value=None):
+ # Should not raise any exception
+ label_span(mock_request)
+
+ def test_lable_span_with_non_recording_span(self):
+ """Test label_span function with non-recording span"""
+ mock_request = MagicMock()
+ mock_request.stream = True
+
+ mock_span = MagicMock()
+ mock_span.is_recording.return_value = False
+
+ with patch("fastdeploy.metrics.trace.trace.get_current_span", return_value=mock_span):
+ label_span(mock_request)
+
+ # Verify no attributes are set
+ mock_span.set_attribute.assert_not_called()
+
+
+class TestTraceComprehensive:
+ """Comprehensive tests for tracing functionality"""
+
+ def setup_method(self):
+ """Setup test environment"""
+ # Mock environment variables
+ self.original_env = os.environ.copy()
+ os.environ["TRACES_ENABLE"] = "true"
+ os.environ["FD_SERVICE_NAME"] = "test_service"
+ os.environ["FD_HOST_NAME"] = "test_host"
+ os.environ["EXPORTER_OTLP_ENDPOINT"] = "http://localhost:4317"
+ os.environ["EXPORTER_OTLP_HEADERS"] = "key1=value1,key2=value2"
+ os.environ["FD_OTLP_EXPORTER_SCHEDULE_DELAY_MILLIS"] = "1000"
+ os.environ["FD_OTLP_EXPORTER_MAX_EXPORT_BATCH_SIZE"] = "512"
+
+ # Reset global state
+ trace.remote_trace_contexts = {}
+ trace.threads_info = {}
+ trace.reqs_context = {}
+ trace.tracing_enabled = False
+
+ def teardown_method(self):
+ """Restore environment"""
+ os.environ = self.original_env
+
+ def test_process_tracing_init_with_different_scenarios(self):
+ """Test tracing initialization under different scenarios"""
+ # Test normal initialization
+ trace.process_tracing_init()
+ assert trace.tracing_enabled is True
+
+ # Test with tracing disabled
+ os.environ["TRACES_ENABLE"] = "false"
+ trace.process_tracing_init()
+ assert trace.tracing_enabled is False
+
+ # Test with invalid endpoint
+ os.environ["TRACES_ENABLE"] = "true"
+ os.environ["EXPORTER_OTLP_ENDPOINT"] = ""
+
+ # Test with different protocols
+ for protocol in ["grpc", "http/protobuf"]:
+ os.environ["OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"] = protocol
+ trace.process_tracing_init()
+ assert trace.tracing_enabled is True
+
+ # Test with unsupported protocol
+ os.environ["OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"] = "unsupported"
+ with pytest.raises(ValueError):
+ trace.get_otlp_span_exporter("http://localhost:4317", None)
+
+ def test_thread_info_with_different_ranks(self):
+ """Test thread info with TP and DP ranks"""
+ # Test with TP rank
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread_tp", tp_rank=0, dp_rank=1)
+
+ pid = threading.get_native_id()
+ info = trace.threads_info[pid]
+ assert info.tp_rank == 0
+ assert info.dp_rank == 1
+
+ # Test with None ranks
+ trace.trace_set_thread_info("test_thread_no_ranks")
+ info = trace.threads_info[pid] # Should still be same thread
+ assert info.tp_rank == 0 # Should preserve previous values
+
+ def test_advanced_request_scenarios(self):
+ """Test advanced request tracing scenarios"""
+ # Test request with timestamp
+ rid = "test_request_timestamp"
+ ts = int(time.time() * 1e9) - 1000 # 1 microsecond ago
+
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ trace.trace_req_start(rid, "", ts=ts)
+ assert rid in trace.reqs_context
+ assert trace.reqs_context[rid].start_time_ns == ts
+
+ trace.trace_req_finish(rid, ts=ts + 2000)
+
+ # Test request with attributes
+ rid2 = "test_request_attrs"
+ trace.trace_req_start(rid2, "")
+ attrs = {"attr1": "value1", "attr2": 123}
+ trace.trace_req_finish(rid2, attrs=attrs)
+
+ def test_complex_slice_scenarios(self):
+ """Test complex slice operations"""
+ rid = "test_complex_slices"
+
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+ trace.trace_req_start(rid, "")
+
+ # Test nested slices
+ trace.trace_slice_start("outer", rid)
+ trace.trace_slice_start("inner", rid)
+ trace.trace_slice_end("inner", rid)
+ trace.trace_slice_end("outer", rid)
+
+ # Test anonymous slices
+ trace.trace_slice_start("", rid, anonymous=True)
+ trace.trace_slice_end("anonymous_test", rid)
+
+ trace.trace_req_finish(rid)
+
+ def test_trace_report_span_function(self):
+ """Test trace_report_span convenience function"""
+ rid = "test_report_span"
+
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+ trace.trace_req_start(rid, "")
+
+ # Test trace_report_span
+ start_time = int(time.time() * 1e9)
+ end_time = start_time + 1000000 # 1ms later
+ attrs = {"test_attr": "test_value"}
+
+ trace.trace_report_span("report_test", rid, start_time, end_time, attrs)
+
+ trace.trace_req_finish(rid)
+
+ def test_propagation_advanced_scenarios(self):
+ """Test advanced context propagation scenarios"""
+ rid = "test_advanced_propagation"
+
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+ trace.trace_req_start(rid, "")
+
+ # Create slices to get a non-null prev_span_context
+ trace.trace_slice_start("slice1", rid)
+ trace.trace_slice_end("slice1", rid)
+
+ # Get context with prev_span_context
+ context_dict = trace.trace_get_proc_propagate_context(rid)
+ assert context_dict is not None
+ assert "prev_span" in context_dict
+
+ # Test propagation with timestamp
+ new_rid = "test_propagated"
+ ts = int(time.time() * 1e9)
+ trace.trace_set_proc_propagate_context(new_rid, context_dict, ts=ts)
+
+ assert new_rid in trace.reqs_context
+ assert trace.reqs_context[new_rid].is_copy is True
+ assert trace.reqs_context[new_rid].start_time_ns == ts
+
+ # Test with empty or invalid context
+ trace.trace_set_proc_propagate_context("invalid_rid", None)
+ trace.trace_set_proc_propagate_context("invalid_rid", {})
+ trace.trace_set_proc_propagate_context("invalid_rid", {"invalid": "data"})
+
+ trace.trace_req_finish(rid)
+ trace.trace_req_finish(new_rid)
+
+ def test_multiple_threads_same_request(self):
+ """Test tracing with multiple threads on same request"""
+ rid = "test_multi_thread"
+
+ trace.process_tracing_init()
+
+ # Setup main thread
+ trace.trace_set_thread_info("main_thread")
+ trace.trace_req_start(rid, "")
+
+ # Create worker thread
+ def worker_thread():
+ trace.trace_set_thread_info("worker_thread")
+ trace.trace_slice_start("worker_task", rid)
+ time.sleep(0.001) # Simulate work
+ trace.trace_slice_end("worker_task", rid)
+
+ thread = threading.Thread(target=worker_thread)
+ thread.start()
+ thread.join()
+
+ # Main thread continues
+ trace.trace_slice_start("main_task", rid)
+ trace.trace_slice_end("main_task", rid)
+
+ trace.trace_req_finish(rid)
+
+ def test_trace_span_enum(self):
+ """Test TraceSpanName enum values"""
+ assert trace.TraceSpanName.FASTDEPLOY == "FASTDEPLOY"
+ assert trace.TraceSpanName.PREPROCESSING == "PREPROCESSING"
+ assert trace.TraceSpanName.SCHEDULE == "SCHEDULE"
+ assert trace.TraceSpanName.PREFILL == "PREFILL"
+ assert trace.TraceSpanName.DECODE == "DECODE"
+ assert trace.TraceSpanName.POSTPROCESSING == "POSTPROCESSING"
+
+ # Test all enum members exist
+ expected_spans = ["FASTDEPLOY", "PREPROCESSING", "SCHEDULE", "PREFILL", "DECODE", "POSTPROCESSING"]
+ for span_name in expected_spans:
+ assert hasattr(trace.TraceSpanName, span_name)
+
+ def test_host_id_generation(self):
+ """Test host ID generation logic"""
+ # Test with environment variable (most reliable)
+ os.environ["FD_HOST_NAME"] = "env-host-id"
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+ pid = threading.get_native_id()
+ assert pid in trace.threads_info
+ assert trace.threads_info[pid].host_id == "env-host-id"
+
+ # Test fallback (when env var is not set)
+ os.environ.pop("FD_HOST_NAME", None)
+ trace.threads_info.clear() # Reset to trigger re-calculation
+ trace.trace_set_thread_info("test_thread2")
+ pid2 = threading.get_native_id()
+ assert pid2 in trace.threads_info
+ # Should generate some kind of host ID
+ assert trace.threads_info[pid2].host_id is not None
+ assert len(trace.threads_info[pid2].host_id) > 0
+
+ def test_edge_case_operations(self):
+ """Test edge case operations"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ # Test operations on empty stack
+ rid = "test_edge_cases"
+ trace.trace_req_start(rid, "")
+
+ # Try to end a slice that doesn't exist
+ trace.trace_slice_end("non_existent", rid)
+
+ # Try to add event to non-existent slice
+ trace.trace_event("test_event", rid)
+
+ trace.trace_req_finish(rid)
+
+ # Test repeated operations on finished request
+ trace.trace_slice_start("test", rid)
+ trace.trace_slice_end("test", rid)
+ trace.trace_event("test", rid)
+
+ def test_timing_functions(self):
+ """Test timing-related functions"""
+ # Test that time_ns is used if available
+ if hasattr(time, "time_ns"):
+ trace.process_tracing_init()
+ # Test that timing works correctly by checking timestamps
+ ts1 = int(time.time() * 1e9)
+ time.sleep(0.001) # 1ms
+ ts2 = int(time.time() * 1e9)
+ assert ts2 > ts1
+ assert ts2 - ts1 >= 1000000 # At least 1ms in nanoseconds
+
+ def test_request_start_with_trace_content(self):
+ """Test request start with trace content (upstream context)"""
+ rid = "test_upstream_context"
+
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ # Test with empty upstream context (valid case)
+ trace_content = ""
+ trace.trace_req_start(rid, trace_content, role="test_role")
+
+ # Verify that request was created
+ assert rid in trace.reqs_context
+
+ trace.trace_req_finish(rid)
+
+ def test_span_linking_logic(self):
+ """Test span linking functionality"""
+ rid = "test_span_linking"
+
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+ trace.trace_req_start(rid, "")
+
+ # Create first slice
+ trace.trace_slice_start("first_slice", rid)
+ trace.trace_slice_end("first_slice", rid)
+
+ # Create second slice (should be linked to first)
+ trace.trace_slice_start("second_slice", rid)
+ trace.trace_slice_end("second_slice", rid)
+
+ trace.trace_req_finish(rid)
+
+ @mock.patch("fastdeploy.metrics.trace.trace")
+ def test_active_span_handling(self, mock_trace):
+ """Test handling of active spans from FastAPI Instrumentor"""
+ rid = "test_active_span"
+
+ # Mock an active span
+ mock_span = mock.MagicMock()
+ mock_span.is_recording.return_value = True
+ mock_span.name = "fastapi_request"
+ mock_span.get_span_context.return_value = mock.MagicMock(is_valid=True, trace_id=1234567890)
+ mock_trace.get_current_span.return_value = mock_span
+ mock_trace.set_span_in_context.return_value = "mock_context"
+
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ trace.trace_req_start(rid, "")
+
+ # Verify that active span was used
+ assert rid in trace.reqs_context
+ assert trace.reqs_context[rid].is_copy is True
+ mock_span.set_attribute.assert_called_with("rid", rid)
+ mock_span.update_name.assert_called_with("fastapi_request (Req: test_active_span)")
+
+ trace.trace_req_finish(rid)
+
+ def test_lable_span_functionality(self):
+ """Test label_span function with different scenarios"""
+
+ # Create mock request and span
+ class MockRequest:
+ def __init__(self, stream):
+ self.stream = stream
+
+ mock_span = mock.MagicMock()
+ mock_span.is_recording.return_value = True
+
+ with mock.patch("fastdeploy.metrics.trace.trace.get_current_span", return_value=mock_span):
+ # Test with stream=True
+ request_stream = MockRequest(True)
+ trace.label_span(request_stream)
+ mock_span.set_attribute.assert_called_with("stream", "true")
+
+ # Test with stream=False
+ request_no_stream = MockRequest(False)
+ trace.label_span(request_no_stream)
+ # Should not set stream attribute for False
+
+ # Test with no active span
+ with mock.patch(
+ "fastdeploy.metrics.trace.trace.get_current_span", return_value=mock.MagicMock(is_recording=False)
+ ):
+ request_no_stream = MockRequest(False)
+ trace.label_span(request_no_stream)
+ # Should not set stream attribute for False
+ # Should not crash
+
+ def test_error_handling_and_logging(self):
+ """Test error handling and logging scenarios"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ with mock.patch("fastdeploy.metrics.trace.logger") as mock_logger:
+ # Test operations on non-existent request
+ rid = "non_existent"
+ trace.trace_slice_start("test", rid)
+ trace.trace_slice_end("test", rid)
+ trace.trace_event("test", rid)
+ trace.trace_slice_add_attr(rid, {"test": "value"})
+
+ # Should log warnings but not crash
+ # Check if warning was called (may not always be called depending on implementation)
+
+ # Test slice name mismatch warning
+ rid = "test_mismatch_warning"
+ trace.trace_req_start(rid, "")
+
+ with mock.patch("fastdeploy.metrics.trace.logger") as mock_logger:
+ trace.trace_slice_start("start_name", rid)
+ trace.trace_slice_end("different_name", rid)
+ assert mock_logger.warning.called
+
+ trace.trace_req_finish(rid)
+
+
+class TestPerformanceAndConcurrency:
+ """Performance and concurrency tests"""
+
+ def test_concurrent_requests(self):
+ """Test handling of concurrent requests"""
+ trace.process_tracing_init()
+
+ def process_request(request_id, results_list):
+ """Process a single request"""
+ trace.trace_set_thread_info(f"thread_{request_id}")
+ trace.trace_req_start(request_id, "")
+ trace.trace_slice_start("process", request_id)
+ time.sleep(0.001) # Simulate work
+ trace.trace_slice_end("process", request_id)
+ trace.trace_req_finish(request_id)
+ result = f"request_{request_id}_completed"
+ results_list.append(result)
+ return result
+
+ # Process multiple requests concurrently
+ results = []
+ threads = []
+
+ for i in range(10):
+ thread = threading.Thread(target=process_request, args=(f"req_{i}", results))
+ threads.append(thread)
+ thread.start()
+
+ for thread in threads:
+ thread.join()
+
+ # Verify all requests were processed
+ assert len([r for r in results if r.endswith("_completed")]) == 10
+
+ def test_memory_cleanup(self):
+ """Test proper memory cleanup"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ # Create and finish multiple requests
+ for i in range(5):
+ rid = f"test_request_{i}"
+ trace.trace_req_start(rid, "")
+ trace.trace_slice_start("test", rid)
+ trace.trace_slice_end("test", rid)
+ trace.trace_req_finish(rid)
+
+ # Verify cleanup
+ assert len(trace.reqs_context) == 0
+
+ # Thread info should persist
+ pid = threading.get_native_id()
+ assert pid in trace.threads_info
+
+
+class TestAdditionalCoverage:
+ """Additional test cases for better code coverage"""
+
+ def setup_method(self):
+ """Setup test environment"""
+ self.original_env = os.environ.copy()
+ os.environ["TRACES_ENABLE"] = "true"
+ os.environ["FD_SERVICE_NAME"] = "test_service"
+ os.environ["EXPORTER_OTLP_ENDPOINT"] = "http://localhost:4317"
+
+ # Reset global state
+ trace.remote_trace_contexts = {}
+ trace.threads_info = {}
+ trace.reqs_context = {}
+ trace.tracing_enabled = False
+
+ def teardown_method(self):
+ """Restore environment"""
+ os.environ = self.original_env
+
+ def test_trace_propagate_context_to_dict(self):
+ """Test TracePropagateContext.to_dict method"""
+ from fastdeploy.metrics.trace import TracePropagateContext
+
+ # Mock context objects
+ mock_root_context = MagicMock()
+ mock_prev_span_context = MagicMock()
+ mock_prev_span_context.span_id = 12345
+ mock_prev_span_context.trace_id = 67890
+
+ # Test with prev_span_context
+ propagate_context = TracePropagateContext(mock_root_context, mock_prev_span_context)
+ result_dict = propagate_context.to_dict()
+
+ assert "root_span" in result_dict
+ assert "prev_span" in result_dict
+ assert result_dict["prev_span"]["span_id"] == 12345
+ assert result_dict["prev_span"]["trace_id"] == 67890
+
+ # Test without prev_span_context
+ propagate_context_none = TracePropagateContext(mock_root_context, None)
+ result_dict_none = propagate_context_none.to_dict()
+
+ assert "root_span" in result_dict_none
+ assert result_dict_none["prev_span"] == "None"
+
+ def test_trace_propagate_context_instance_from_dict(self):
+ """Test TracePropagateContext.instance_from_dict method"""
+ from fastdeploy.metrics.trace import TracePropagateContext
+
+ # Test valid dict with prev_span
+ valid_dict = {"root_span": {"test": "carrier"}, "prev_span": {"span_id": 12345, "trace_id": 67890}}
+
+ with mock.patch("fastdeploy.metrics.trace.propagate.extract") as mock_extract:
+ mock_extract.return_value = "mock_context"
+
+ with mock.patch("fastdeploy.metrics.trace.trace.span.SpanContext") as mock_span_context:
+ mock_span_context_instance = MagicMock()
+ mock_span_context.return_value = mock_span_context_instance
+
+ result = TracePropagateContext.instance_from_dict(valid_dict)
+
+ assert result is not None
+ assert result.root_span_context == "mock_context"
+ assert result.prev_span_context == mock_span_context_instance
+ mock_span_context.assert_called_once_with(trace_id=67890, span_id=12345, is_remote=True)
+
+ # Test with None prev_span
+ valid_dict_none = {"root_span": {"test": "carrier"}, "prev_span": "None"}
+
+ with mock.patch("fastdeploy.metrics.trace.propagate.extract") as mock_extract:
+ mock_extract.return_value = "mock_context"
+
+ result = TracePropagateContext.instance_from_dict(valid_dict_none)
+
+ assert result is not None
+ assert result.root_span_context == "mock_context"
+ assert result.prev_span_context is None
+
+ # Test invalid dict (missing keys)
+ invalid_dict = {"invalid": "data"}
+ result = TracePropagateContext.instance_from_dict(invalid_dict)
+ assert result is None
+
+ # Test empty dict
+ result = TracePropagateContext.instance_from_dict({})
+ assert result is None
+
+ def test_trace_custom_id_generator(self):
+ """Test TraceCustomIdGenerator class"""
+ from fastdeploy.metrics.trace import TraceCustomIdGenerator
+
+ generator = TraceCustomIdGenerator()
+
+ # Test generate_trace_id
+ trace_id = generator.generate_trace_id()
+ assert isinstance(trace_id, int)
+ assert trace_id > 0
+
+ # Test generate_span_id
+ span_id = generator.generate_span_id()
+ assert isinstance(span_id, int)
+ assert span_id > 0
+
+ # Test that multiple calls generate different IDs
+ trace_id2 = generator.generate_trace_id()
+ span_id2 = generator.generate_span_id()
+
+ # Should be different (very high probability)
+ assert trace_id != trace_id2
+ assert span_id != span_id2
+
+ def test_get_host_id_fallback_methods(self):
+ """Test __get_host_id function fallback methods"""
+ # Access function through module directly
+ import fastdeploy.metrics.trace as trace_module
+
+ get_host_id_func = trace_module.__dict__.get("__get_host_id")
+
+ if get_host_id_func is None:
+ # Skip test if function is not accessible
+ pytest.skip("__get_host_id function not accessible for testing")
+ return
+
+ # Test with FD_HOST_NAME set
+ os.environ["FD_HOST_NAME"] = "test-host-name"
+ host_id = get_host_id_func()
+ assert host_id == "test-host-name"
+
+ # Test fallback when machine-id files don't exist and MAC is 0
+ os.environ.pop("FD_HOST_NAME", None)
+
+ with mock.patch("builtins.open", side_effect=FileNotFoundError):
+ with mock.patch("uuid.getnode", return_value=0):
+ with mock.patch("uuid.uuid4") as mock_uuid4:
+ mock_uuid = MagicMock()
+ mock_uuid.hex = "test-uuid-hex"
+ mock_uuid4.return_value = mock_uuid
+
+ with mock.patch("os.getpid", return_value=12345):
+ host_id = get_host_id_func()
+ # The function might return different values based on environment
+ # Just verify it returns a non-empty string
+ assert isinstance(host_id, str)
+ assert len(host_id) > 0
+
+ def test_get_host_id_exception_handling(self):
+ """Test __get_host_id exception handling"""
+ import fastdeploy.metrics.trace as trace_module
+
+ get_host_id_func = trace_module.__dict__.get("__get_host_id")
+
+ if get_host_id_func is None:
+ # Skip test if function is not accessible
+ pytest.skip("__get_host_id function not accessible for testing")
+ return
+
+ os.environ.pop("FD_HOST_NAME", None)
+
+ with mock.patch("builtins.open", side_effect=FileNotFoundError):
+ with mock.patch("uuid.getnode", return_value=0):
+ with mock.patch("uuid.uuid4", side_effect=Exception("UUID generation failed")):
+ host_id = get_host_id_func()
+ # The function should return some fallback value
+ assert isinstance(host_id, str)
+ assert len(host_id) > 0
+ # In case of complete failure, it should return "unknown"
+ # but depending on environment, it might return other fallback values
+
+ def test_trace_slice_auto_next_anon(self):
+ """Test trace_slice_end with auto_next_anon parameter"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_auto_anon"
+ trace.trace_req_start(rid, "")
+
+ # Start a slice
+ trace.trace_slice_start("first_slice", rid)
+
+ # End with auto_next_anon=True
+ trace.trace_slice_end("first_slice", rid, auto_next_anon=True)
+
+ # Should have automatically started an anonymous slice
+ pid = threading.get_native_id()
+ thread_context = trace.reqs_context[rid].threads_context[pid]
+ assert len(thread_context.cur_slice_stack) == 1
+ assert thread_context.cur_slice_stack[0].anonymous is True
+ assert thread_context.cur_slice_stack[0].slice_name == ""
+
+ trace.trace_req_finish(rid)
+
+ def test_trace_slice_thread_finish_flag(self):
+ """Test trace_slice_end with thread_finish_flag parameter"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_thread_finish"
+ trace.trace_req_start(rid, "")
+
+ pid = threading.get_native_id()
+
+ # Start and end a slice with thread_finish_flag=True
+ trace.trace_slice_start("test_slice", rid)
+ trace.trace_slice_end("test_slice", rid, thread_finish_flag=True)
+
+ # Thread context should be removed
+ assert pid not in trace.reqs_context[rid].threads_context
+
+ trace.trace_req_finish(rid)
+
+ def test_trace_slice_alias(self):
+ """Test trace_slice alias function"""
+ # trace_slice should be an alias for trace_slice_end
+ assert trace.trace_slice == trace.trace_slice_end
+
+ def test_trace_event_and_add_attr_functionality(self):
+ """Test trace_event and trace_slice_add_attr functionality"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_events_attrs"
+ trace.trace_req_start(rid, "")
+
+ # Start a slice
+ trace.trace_slice_start("test_slice", rid)
+
+ # Test trace_event
+ attrs = {"event_attr": "event_value"}
+ trace.trace_event("test_event", rid, attrs=attrs)
+
+ # Test trace_slice_add_attr
+ slice_attrs = {"slice_attr": "slice_value"}
+ trace.trace_slice_add_attr(rid, slice_attrs)
+
+ trace.trace_slice_end("test_slice", rid)
+ trace.trace_req_finish(rid)
+
+ def test_trace_span_decorator_sync(self):
+ """Test trace_span decorator with sync function"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ @trace.trace_span("test_sync_function")
+ def test_function():
+ return "test_result"
+
+ result = test_function()
+ assert result == "test_result"
+
+ def test_trace_span_decorator_async(self):
+ """Test trace_span decorator with async function"""
+ import asyncio
+
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ @trace.trace_span("test_async_function")
+ async def test_async_function():
+ return "test_async_result"
+
+ async def run_test():
+ result = await test_async_function()
+ return result
+
+ result = asyncio.run(run_test())
+ assert result == "test_async_result"
+
+ def test_trace_span_decorator_disabled(self):
+ """Test trace_span decorator when tracing is disabled"""
+ trace.tracing_enabled = False
+
+ @trace.trace_span("test_disabled_function")
+ def test_function():
+ return "test_result_disabled"
+
+ result = test_function()
+ assert result == "test_result_disabled"
+
+ def test_trace_span_decorator_no_thread_info(self):
+ """Test trace_span decorator when thread info is not set"""
+ trace.process_tracing_init()
+ trace.threads_info.clear() # Clear thread info
+
+ @trace.trace_span("test_no_thread_info")
+ def test_function():
+ return "test_result_no_thread"
+
+ result = test_function()
+ assert result == "test_result_no_thread"
+
+ # Should have created thread info automatically
+ pid = threading.get_native_id()
+ assert pid in trace.threads_info
+
+ def test_get_otlp_span_exporter_grpc(self):
+ """Test get_otlp_span_exporter with grpc protocol"""
+ exporter = trace.get_otlp_span_exporter("http://localhost:4317", None)
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
+ OTLPSpanExporter as GRPCSpanExporter,
+ )
+
+ assert isinstance(exporter, GRPCSpanExporter)
+
+ def test_get_otlp_span_exporter_http(self):
+ """Test get_otlp_span_exporter with http protocol"""
+ # Set environment variable for http protocol
+ os.environ["OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"] = "http/protobuf"
+ headers = {"Authorization": "Bearer token"}
+ exporter = trace.get_otlp_span_exporter("http://localhost:4318", headers)
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
+ OTLPSpanExporter as HTTPSpanExporter,
+ )
+
+ assert isinstance(exporter, HTTPSpanExporter)
+
+ def test_get_otlp_span_exporter_unsupported_protocol(self):
+ """Test get_otlp_span_exporter with unsupported protocol"""
+ # Set environment variable for unsupported protocol
+ os.environ["OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"] = "unsupported"
+ with pytest.raises(ValueError, match="Unsupported OTLP protocol"):
+ trace.get_otlp_span_exporter("http://localhost:4317", None)
+
+ def test_process_tracing_init_without_opentelemetry(self):
+ """Test process_tracing_init when opentelemetry is not imported"""
+ original_opentelemetry_imported = trace.opentelemetry_imported
+ trace.opentelemetry_imported = False
+
+ try:
+ trace.process_tracing_init()
+ assert trace.tracing_enabled is False
+ finally:
+ trace.opentelemetry_imported = original_opentelemetry_imported
+
+ def test_trace_set_thread_info_when_tracing_disabled(self):
+ """Test trace_set_thread_info when tracing is disabled"""
+ trace.tracing_enabled = False
+
+ # Should not raise any exception
+ trace.trace_set_thread_info("test_thread")
+
+ # Should not add to threads_info
+ pid = threading.get_native_id()
+ assert pid not in trace.threads_info
+
+ def test_trace_set_thread_info_existing_thread(self):
+ """Test trace_set_thread_info when thread already exists"""
+ trace.process_tracing_init()
+
+ # Set thread info first time
+ trace.trace_set_thread_info("test_thread")
+
+ # Try to set again - should not overwrite
+ original_thread_info = trace.threads_info[threading.get_native_id()]
+ trace.trace_set_thread_info("different_thread")
+
+ # Should still have original info
+ pid = threading.get_native_id()
+ assert trace.threads_info[pid] == original_thread_info
+
+ def test_trace_req_start_without_thread_info(self):
+ """Test trace_req_start when thread info is not set"""
+ trace.process_tracing_init()
+ trace.threads_info.clear() # Clear thread info
+
+ rid = "test_no_thread_info_req"
+ trace.trace_req_start(rid, "")
+
+ # Should not create request context
+ assert rid not in trace.reqs_context
+
+ def test_trace_req_start_existing_request(self):
+ """Test trace_req_start when request already exists"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_existing_req"
+ trace.trace_req_start(rid, "")
+
+ # Try to start same request again - should return early
+ trace.trace_req_start(rid, "")
+
+ # Should not overwrite existing request (function returns early)
+ assert rid in trace.reqs_context
+
+ def test_trace_req_finish_nonexistent_request(self):
+ """Test trace_req_finish with non-existent request"""
+ trace.process_tracing_init()
+
+ # Should not raise any exception
+ trace.trace_req_finish("nonexistent_rid")
+
+ def test_trace_slice_operations_without_request(self):
+ """Test trace slice operations without request context"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "nonexistent_request"
+
+ # Should not raise any exception
+ trace.trace_slice_start("test", rid)
+ trace.trace_slice_end("test", rid)
+ trace.trace_event("test", rid)
+ trace.trace_slice_add_attr(rid, {"test": "value"})
+
+ def test_trace_get_proc_propagate_context_without_request(self):
+ """Test trace_get_proc_propagate_context without request"""
+ trace.process_tracing_init()
+
+ result = trace.trace_get_proc_propagate_context("nonexistent_rid")
+ assert result is None
+
+ def test_trace_set_proc_propagate_context_without_request(self):
+ """Test trace_set_proc_propagate_context without request"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ context_dict = {"test": "context"}
+
+ # Should not raise any exception
+ trace.trace_set_proc_propagate_context("nonexistent_rid", context_dict)
+
+ def test_trace_set_proc_propagate_context_existing_thread(self):
+ """Test trace_set_proc_propagate_context when thread already exists"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_existing_thread"
+ context_dict = {"test": "context"}
+
+ # Create request context first
+ trace.reqs_context[rid] = trace.TraceReqContext(
+ rid=rid,
+ start_time_ns=int(time.time() * 1e9),
+ threads_context={threading.get_native_id(): MagicMock()},
+ is_copy=True,
+ )
+
+ # Try to set propagate context - should not create new thread context
+ original_threads_context = trace.reqs_context[rid].threads_context.copy()
+ trace.trace_set_proc_propagate_context(rid, context_dict)
+
+ # Should not have changed threads_context
+ assert trace.reqs_context[rid].threads_context == original_threads_context
+
+ def test_trace_report_span_without_request(self):
+ """Test trace_report_span without request context"""
+ trace.process_tracing_init()
+
+ # Should not raise any exception
+ trace.trace_report_span("test", "nonexistent_rid", 0, 1000000)
+
+ def test_all_functions_when_tracing_disabled(self):
+ """Test all trace functions when tracing is disabled"""
+ trace.tracing_enabled = False
+
+ rid = "test_disabled"
+
+ # All these should not raise exceptions
+ trace.trace_req_start(rid, "")
+ trace.trace_req_finish(rid)
+ trace.trace_slice_start("test", rid)
+ trace.trace_slice_end("test", rid)
+ trace.trace_event("test", rid)
+ trace.trace_slice_add_attr(rid, {"test": "value"})
+ trace.trace_get_proc_propagate_context(rid)
+ trace.trace_set_proc_propagate_context(rid, {})
+ trace.trace_report_span("test", rid, 0, 1000000)
+
+ def test_trace_req_start_with_role(self):
+ """Test trace_req_start with role parameter"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_with_role"
+ role = "test_role"
+
+ trace.trace_req_start(rid, "", role=role)
+
+ # Should create request context
+ assert rid in trace.reqs_context
+
+ def test_trace_req_start_with_null_role(self):
+ """Test trace_req_start with null role"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_null_role"
+ role = "null"
+
+ trace.trace_req_start(rid, "", role=role)
+
+ # Should create request context
+ assert rid in trace.reqs_context
+
+ def test_trace_span_decorator_with_custom_name(self):
+ """Test trace_span decorator with custom span name"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ @trace.trace_span("custom_span_name")
+ def test_function():
+ return "test_result"
+
+ result = test_function()
+ assert result == "test_result"
+
+ def test_trace_span_decorator_without_name(self):
+ """Test trace_span decorator without custom span name"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ @trace.trace_span()
+ def test_function():
+ return "test_result"
+
+ result = test_function()
+ assert result == "test_result"
+
+ def test_trace_span_decorator_with_none_name(self):
+ """Test trace_span decorator with None span name"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ @trace.trace_span(None)
+ def test_function():
+ return "test_result"
+
+ result = test_function()
+ assert result == "test_result"
+
+ def test_trace_slice_start_with_timestamp(self):
+ """Test trace_slice_start with custom timestamp"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_timestamp"
+ ts = int(time.time() * 1e9) - 1000000 # 1ms ago
+
+ trace.trace_req_start(rid, "")
+ trace.trace_slice_start("test_slice", rid, ts=ts)
+ trace.trace_slice_end("test_slice", rid)
+ trace.trace_req_finish(rid)
+
+ def test_trace_slice_end_with_timestamp(self):
+ """Test trace_slice_end with custom timestamp"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_end_timestamp"
+ ts = int(time.time() * 1e9) + 1000000 # 1ms in future
+
+ trace.trace_req_start(rid, "")
+ trace.trace_slice_start("test_slice", rid)
+ trace.trace_slice_end("test_slice", rid, ts=ts)
+ trace.trace_req_finish(rid)
+
+ def test_trace_slice_end_with_attributes(self):
+ """Test trace_slice_end with attributes"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_attrs"
+ attrs = {"test_attr": "test_value", "number_attr": 123}
+
+ trace.trace_req_start(rid, "")
+ trace.trace_slice_start("test_slice", rid)
+ trace.trace_slice_end("test_slice", rid, attrs=attrs)
+ trace.trace_req_finish(rid)
+
+ def test_trace_event_with_timestamp(self):
+ """Test trace_event with custom timestamp"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_event_timestamp"
+ ts = int(time.time() * 1e9) - 500000 # 0.5ms ago
+
+ trace.trace_req_start(rid, "")
+ trace.trace_slice_start("test_slice", rid)
+ trace.trace_event("test_event", rid, ts=ts)
+ trace.trace_slice_end("test_slice", rid)
+ trace.trace_req_finish(rid)
+
+ def test_trace_event_without_attributes(self):
+ """Test trace_event without attributes"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_event_no_attrs"
+
+ trace.trace_req_start(rid, "")
+ trace.trace_slice_start("test_slice", rid)
+ trace.trace_event("test_event", rid)
+ trace.trace_slice_end("test_slice", rid)
+ trace.trace_req_finish(rid)
+
+ def test_trace_report_span_with_thread_finish(self):
+ """Test trace_report_span with thread_finish_flag"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_report_thread_finish"
+ start_time = int(time.time() * 1e9)
+ end_time = start_time + 1000000 # 1ms later
+
+ trace.trace_req_start(rid, "")
+ trace.trace_report_span("test_span", rid, start_time, end_time, thread_finish_flag=True)
+ trace.trace_req_finish(rid)
+
+ def test_multiple_nested_slices(self):
+ """Test multiple levels of nested slices"""
+ trace.process_tracing_init()
+ trace.trace_set_thread_info("test_thread")
+
+ rid = "test_nested"
+
+ trace.trace_req_start(rid, "")
+ trace.trace_slice_start("level1", rid)
+ trace.trace_slice_start("level2", rid)
+ trace.trace_slice_start("level3", rid)
+ trace.trace_slice_end("level3", rid)
+ trace.trace_slice_end("level2", rid)
+ trace.trace_slice_end("level1", rid)
+ trace.trace_req_finish(rid)
+
+ def test_concurrent_slice_operations(self):
+ """Test concurrent slice operations"""
+ trace.process_tracing_init()
+
+ rid = "test_concurrent_slices"
+
+ def worker_slices():
+ trace.trace_set_thread_info("worker_thread")
+ trace.trace_req_start(rid, "")
+ trace.trace_slice_start("worker_slice", rid)
+ time.sleep(0.001)
+ trace.trace_slice_end("worker_slice", rid)
+ trace.trace_req_finish(rid)
+
+ # Main thread
+ trace.trace_set_thread_info("main_thread")
+ trace.trace_req_start(rid, "")
+ trace.trace_slice_start("main_slice", rid)
+
+ # Start worker thread
+ thread = threading.Thread(target=worker_slices)
+ thread.start()
+ thread.join()
+
+ trace.trace_slice_end("main_slice", rid)
+ trace.trace_req_finish(rid)
+
+
+if __name__ == "__main__":
+ pytest.main([__file__, "-v"])
diff --git a/tests/metrics/test_trace_util.py b/tests/metrics/test_trace_util.py
deleted file mode 100644
index ebec980f38..0000000000
--- a/tests/metrics/test_trace_util.py
+++ /dev/null
@@ -1,193 +0,0 @@
-"""
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-
-import unittest
-from unittest.mock import MagicMock, patch
-
-from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
-
-from fastdeploy.metrics.trace_util import FilteringSpanProcessor, lable_span
-
-
-class TestFilteringSpanProcessor(unittest.TestCase):
- """Test cases for FilteringSpanProcessor class"""
-
- def setUp(self):
- """Set up test fixtures"""
- self.exporter = ConsoleSpanExporter()
- self.processor = FilteringSpanProcessor(self.exporter)
-
- def test_initialization(self):
- """Test that FilteringSpanProcessor is properly initialized"""
- self.assertIsInstance(self.processor._processor, BatchSpanProcessor)
- self.assertEqual(self.processor._processor.span_exporter, self.exporter)
-
- def test_on_start_with_parent_span(self):
- """Test on_start method with parent span containing stream attribute"""
- # Mock span and parent context
- mock_span = MagicMock()
- mock_parent_span = MagicMock()
- mock_parent_span.is_recording.return_value = True
- mock_parent_span.attributes.get.return_value = "test_stream"
-
- # Mock trace.get_current_span to return parent span
- with patch("fastdeploy.metrics.trace_util.trace.get_current_span", return_value=mock_parent_span):
- with patch.object(self.processor._processor, "on_start") as mock_parent_on_start:
- self.processor.on_start(mock_span, parent_context=None)
-
- # Verify stream attribute is set on child span
- mock_span.set_attribute.assert_called_once_with("stream", "test_stream")
- mock_parent_on_start.assert_called_once_with(mock_span, None)
-
- def test_on_start_without_parent_span(self):
- """Test on_start method without parent span"""
- mock_span = MagicMock()
-
- # Mock trace.get_current_span to return None
- with patch("fastdeploy.metrics.trace_util.trace.get_current_span", return_value=None):
- with patch.object(self.processor._processor, "on_start") as mock_parent_on_start:
- self.processor.on_start(mock_span, parent_context=None)
-
- # Verify no attributes are set
- mock_span.set_attribute.assert_not_called()
- mock_parent_on_start.assert_called_once_with(mock_span, None)
-
- def test_on_start_with_non_recording_parent_span(self):
- """Test on_start method with non-recording parent span"""
- mock_span = MagicMock()
- mock_parent_span = MagicMock()
- mock_parent_span.is_recording.return_value = False
-
- with patch("fastdeploy.metrics.trace_util.trace.get_current_span", return_value=mock_parent_span):
- with patch.object(self.processor._processor, "on_start") as mock_parent_on_start:
- self.processor.on_start(mock_span, parent_context=None)
-
- # Verify no attributes are set
- mock_span.set_attribute.assert_not_called()
- mock_parent_on_start.assert_called_once_with(mock_span, None)
-
- def test_on_end_filter_stream_http_response(self):
- """Test on_end method filters out stream http response spans"""
- mock_span = MagicMock()
- mock_span.attributes.get.side_effect = lambda key: {
- "asgi.event.type": "http.response.body",
- "stream": "true",
- }.get(key)
- mock_span.name = "http send request"
-
- with patch.object(self.processor._processor, "on_end") as mock_parent_on_end:
- self.processor.on_end(mock_span)
-
- # Verify parent on_end is NOT called (span is filtered out)
- mock_parent_on_end.assert_not_called()
-
- def test_on_end_keep_non_stream_spans(self):
- """Test on_end method keeps non-stream spans"""
- mock_span = MagicMock()
- mock_span.attributes.get.side_effect = lambda key: {"asgi.event.type": "http.request", "stream": None}.get(key)
- mock_span.name = "http receive request"
-
- with patch.object(self.processor._processor, "on_end") as mock_parent_on_end:
- self.processor.on_end(mock_span)
-
- # Verify parent on_end is called
- mock_parent_on_end.assert_called_once_with(mock_span)
-
- def test_on_end_keep_spans_without_http_send(self):
- """Test on_end method keeps spans without 'http send' in name"""
- mock_span = MagicMock()
- mock_span.attributes.get.side_effect = lambda key: {
- "asgi.event.type": "http.response.body",
- "stream": "true",
- }.get(key)
- mock_span.name = "other operation"
-
- with patch.object(self.processor._processor, "on_end") as mock_parent_on_end:
- self.processor.on_end(mock_span)
-
- # Verify parent on_end is called
- mock_parent_on_end.assert_called_once_with(mock_span)
-
- def test_shutdown(self):
- """Test shutdown method"""
- with patch.object(self.processor._processor, "shutdown") as mock_shutdown:
- self.processor.shutdown()
- mock_shutdown.assert_called_once()
-
- def test_force_flush(self):
- """Test force_flush method"""
- with patch.object(self.processor._processor, "force_flush") as mock_force_flush:
- self.processor.force_flush(timeout_millis=5000)
- mock_force_flush.assert_called_once_with(5000)
-
-
-class TestLableSpan(unittest.TestCase):
- """Test cases for lable_span function"""
-
- def test_lable_span_with_stream_request(self):
- """Test lable_span function with streaming request"""
- mock_request = MagicMock()
- mock_request.stream = True
-
- mock_span = MagicMock()
- mock_span.is_recording.return_value = True
-
- with patch("fastdeploy.metrics.trace_util.trace.get_current_span", return_value=mock_span):
- lable_span(mock_request)
-
- # Verify stream attribute is set
- mock_span.set_attribute.assert_called_once_with("stream", "true")
-
- def test_lable_span_without_stream_request(self):
- """Test lable_span function with non-streaming request"""
- mock_request = MagicMock()
- mock_request.stream = False
-
- mock_span = MagicMock()
- mock_span.is_recording.return_value = True
-
- with patch("fastdeploy.metrics.trace_util.trace.get_current_span", return_value=mock_span):
- lable_span(mock_request)
-
- # Verify no attributes are set
- mock_span.set_attribute.assert_not_called()
-
- def test_lable_span_without_current_span(self):
- """Test lable_span function when no current span exists"""
- mock_request = MagicMock()
- mock_request.stream = True
-
- with patch("fastdeploy.metrics.trace_util.trace.get_current_span", return_value=None):
- # Should not raise any exception
- lable_span(mock_request)
-
- def test_lable_span_with_non_recording_span(self):
- """Test lable_span function with non-recording span"""
- mock_request = MagicMock()
- mock_request.stream = True
-
- mock_span = MagicMock()
- mock_span.is_recording.return_value = False
-
- with patch("fastdeploy.metrics.trace_util.trace.get_current_span", return_value=mock_span):
- lable_span(mock_request)
-
- # Verify no attributes are set
- mock_span.set_attribute.assert_not_called()
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/output/test_process_batch_output.py b/tests/output/test_process_batch_output.py
index ee25341b62..77d28aba04 100644
--- a/tests/output/test_process_batch_output.py
+++ b/tests/output/test_process_batch_output.py
@@ -66,6 +66,7 @@ class MockTask:
self.llm_engine_recv_req_timestamp = time.time()
self.ic_req_data = {}
self.prompt_token_ids_len = 0
+ self.trace_carrier = {}
now = time.time()
self.metrics = RequestMetrics(