dev: add DNN

2026-04-22 23:27:21 +08:00 · 2025-01-10 23:11:27 +08:00
parent 05e4c291d4
commit a242c30751
15 changed files with 641 additions and 73 deletions
@@ -1,7 +1,7 @@
 # Compiler and flags
 CC = g++
-CFLAGS = -Wall -O2 -I$(INCDIR) `pkg-config --cflags libavformat libavcodec libavutil libswscale sdl2 SDL2_ttf`
-LDFLAGS = `pkg-config --libs libavformat libavcodec libavutil libswscale sdl2 SDL2_ttf` -lpthread
+CFLAGS = -Wall -O2 -I$(INCDIR) `pkg-config --cflags libavformat libavcodec libavutil libswscale sdl2 SDL2_ttf opencv4`
+LDFLAGS = `pkg-config --libs libavformat libavcodec libavutil libswscale sdl2 SDL2_ttf opencv4` -lpthread

 # Target executable
 TARGET = generic-rtsp-yolov8-render
@@ -0,0 +1,25 @@
+// Copyright (C) 2025 wwhai
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#ifndef COCO_CLASS_H
+#define COCO_CLASS_H
+#include <stdio.h>
+
+// 初始化数组
+void init_coco_names();
+// 根据id获取name
+const char *get_coco_name(int id);
+// 打印出coco类型数组
+void print_coco_names();
+#endif // COCO_CLASS_H
@@ -65,16 +65,27 @@ typedef struct FrameQueue
    int max_size;
 } FrameQueue;

-// 初始化队列
+/// @brief 初始化队列
+/// @param q 队列指针
+/// @param max_size 队列最大容量
 void frame_queue_init(FrameQueue *q, int max_size);

-// 入队操作
+/// @brief  入队操作
+/// @param q
+/// @param item
+/// @return
 int enqueue(FrameQueue *q, QueueItem item);

 // 出队操作
 int dequeue(FrameQueue *q, QueueItem *item);
-
-// 释放队列资源的函数
+// 出队操作
+// @param q 队列指针
+// @param item 出队元素
+// @return 1 成功，-1 队列为空，0 失败
+int async_dequeue(FrameQueue *q, QueueItem *item);
+/// 销毁队列
+/// @param q 队列指针
+/// @return 0 成功，-1 失败
 void frame_queue_destroy(FrameQueue *q);

 #endif // FRAME_QUEUE_H
@@ -0,0 +1,33 @@
+// Copyright (C) 2025 wwhai
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+#ifndef OPENCV_DNN_MODULE_H
+#define OPENCV_DNN_MODULE_H
+
+#include <opencv2/opencv.hpp>
+#include <opencv2/dnn.hpp>
+#include <vector>
+#include "frame_queue.h"
+
+// 初始化YOLOv8模型
+int Init_CV_ONNX_DNN_Yolov8(const char *model_path, cv::dnn::Net *net);
+
+// 执行YOLOv8推理
+int Infer_CV_ONNX_DNN_Yolov8(cv::dnn::Net *net, cv::Mat frame, std::vector<Box> &boxes);
+
+// 释放模型资源
+int Release_CV_ONNX_DNN_Yolov8(cv::dnn::Net *net);
+
+#endif // OPENCV_DNN_MODULE_H
@@ -0,0 +1,88 @@
+// Copyright (C) 2025 wwhai
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#ifndef OPENCV_UTILS_H
+#define OPENCV_UTILS_H
+#include <opencv2/opencv.hpp>
+extern "C"
+{
+#include <libavutil/frame.h>
+#include <libavutil/imgutils.h>
+#include <libswscale/swscale.h>
+}
+struct BestResult
+{
+    int bestId;
+    float bestScore;
+};
+typedef struct
+{
+    int x;
+    int y;
+    int w;
+    int h;
+    float score;
+    int class_id;
+} DnnResult;
+/// @brief 将AVFrame转换为cv::Mat
+/// @param frame AVFrame指针
+/// @return
+cv::Mat AVFrameToCVMat(AVFrame *frame);
+/// @brief 将cv::Mat转换为AVFrame
+/// @param confidenceValues
+/// @param size
+/// @return
+BestResult getBestFromConfidenceValue(float confidenceValues[], size_t size);
+
+/// @brief postprocess
+/// @param frame
+/// @param outs
+/// @param confThreshold
+/// @param nmsThreshold
+/// @return
+std::vector<DnnResult> postprocess(cv::Mat &frame, const std::vector<cv::Mat> &outs, float confThreshold, float nmsThreshold);
+// 函数：计算宽度和高度的缩放比例
+// @param original_width 原始图像的宽度
+// @param original_height 原始图像的高度
+// @param scaled_width 缩放后图像的宽度
+// @param scaled_height 缩放后图像的高度
+// @return 宽度和高度的缩放比例
+void calculate_scale_factors(float original_width, float original_height, float scaled_width,
+                             float scaled_height, float *width_scale, float *height_scale);
+
+// 函数：将缩放后的标记框尺寸还原为原始尺寸
+// @param x 缩放后标记框的 x 坐标
+// @param y 缩放后标记框的 y 坐标
+// @param w 缩放后标记框的宽度
+// @param h 缩放后标记框的高度
+// @param width_scale 宽度的缩放比例
+// @param height_scale 高度的缩放比例
+// @return 原始尺寸下的 x, y, w, h
+void rescale_box(float x, float y, float w, float h, float width_scale, float height_scale,
+                 float *x_original, float *y_original, float *w_original, float *h_original);
+
+// 实现 letterbox 功能的 C 风格函数
+// @param src 输入的源图像指针
+// @param dst 输出的 letterbox 处理后的图像指针
+// @param new_width 目标宽度
+// @param new_height 目标高度
+// @param color 填充颜色，默认为黑色 (0, 0, 0)
+void letterbox(const cv::Mat *src, cv::Mat *dst, int new_width, int new_height, cv::Scalar color);
+// 将缩放后的矩形映射回原始图像的函数
+// @param box 缩放后图像上的矩形（x, y, width, height）
+// @param original_size 原始图像的尺寸
+// @param letterboxed_size 经过 letterbox 处理后的图像尺寸
+// @return 原始图像上的矩形
+cv::Rect map_box_to_original(cv::Rect box, cv::Size original_size, cv::Size letterboxed_size);
+#endif
@@ -0,0 +1,119 @@
+// Copyright (C) 2025 wwhai
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+#include "coco_class.h"
+
+// 定义存储名称的数组
+const char *coco_names[80];
+void init_coco_names()
+{
+    coco_names[0] = "person";
+    coco_names[1] = "bicycle";
+    coco_names[2] = "car";
+    coco_names[3] = "motorcycle";
+    coco_names[4] = "airplane";
+    coco_names[5] = "bus";
+    coco_names[6] = "train";
+    coco_names[7] = "truck";
+    coco_names[8] = "boat";
+    coco_names[9] = "traffic light";
+    coco_names[10] = "fire hydrant";
+    coco_names[11] = "stop sign";
+    coco_names[12] = "parking meter";
+    coco_names[13] = "bench";
+    coco_names[14] = "bird";
+    coco_names[15] = "cat";
+    coco_names[16] = "dog";
+    coco_names[17] = "horse";
+    coco_names[18] = "sheep";
+    coco_names[19] = "cow";
+    coco_names[20] = "elephant";
+    coco_names[21] = "bear";
+    coco_names[22] = "zebra";
+    coco_names[23] = "giraffe";
+    coco_names[24] = "backpack";
+    coco_names[25] = "umbrella";
+    coco_names[26] = "handbag";
+    coco_names[27] = "tie";
+    coco_names[28] = "suitcase";
+    coco_names[29] = "frisbee";
+    coco_names[30] = "skis";
+    coco_names[31] = "snowboard";
+    coco_names[32] = "sports ball";
+    coco_names[33] = "kite";
+    coco_names[34] = "baseball bat";
+    coco_names[35] = "baseball glove";
+    coco_names[36] = "skateboard";
+    coco_names[37] = "surfboard";
+    coco_names[38] = "tennis racket";
+    coco_names[39] = "bottle";
+    coco_names[40] = "wine glass";
+    coco_names[41] = "cup";
+    coco_names[42] = "fork";
+    coco_names[43] = "knife";
+    coco_names[44] = "spoon";
+    coco_names[45] = "bowl";
+    coco_names[46] = "banana";
+    coco_names[47] = "apple";
+    coco_names[48] = "sandwich";
+    coco_names[49] = "orange";
+    coco_names[50] = "broccoli";
+    coco_names[51] = "carrot";
+    coco_names[52] = "hot dog";
+    coco_names[53] = "pizza";
+    coco_names[54] = "donut";
+    coco_names[55] = "cake";
+    coco_names[56] = "chair";
+    coco_names[57] = "couch";
+    coco_names[58] = "potted plant";
+    coco_names[59] = "bed";
+    coco_names[60] = "dining table";
+    coco_names[61] = "toilet";
+    coco_names[62] = "tv";
+    coco_names[63] = "laptop";
+    coco_names[64] = "mouse";
+    coco_names[65] = "remote";
+    coco_names[66] = "keyboard";
+    coco_names[67] = "cell phone";
+    coco_names[68] = "microwave";
+    coco_names[69] = "oven";
+    coco_names[70] = "toaster";
+    coco_names[71] = "sink";
+    coco_names[72] = "refrigerator";
+    coco_names[73] = "book";
+    coco_names[74] = "clock";
+    coco_names[75] = "vase";
+    coco_names[76] = "scissors";
+    coco_names[77] = "teddy bear";
+    coco_names[78] = "hair drier";
+    coco_names[79] = "toothbrush";
+}
+void print_coco_names()
+{
+    printf("coco names:\n");
+    for (int i = 0; i < 80; i++)
+    {
+        printf("  %d => %s\n", i, coco_names[i]);
+    }
+}
+
+const char *get_coco_name(int id)
+{
+    if (id < 0 || id > 80)
+    {
+        return (const char *)"unknown";
+    }
+    return coco_names[id];
+}
@@ -12,20 +12,26 @@
 //
 // You should have received a copy of the GNU Affero General Public License
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
 #include "detection_thread.h"
 #include <stdio.h>
 #include "thread_args.h"
-
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
+#include "opencv_dnn_module.h"
+#include "opencv_utils.h"

 void *frame_detection_thread(void *arg)
 {
    const ThreadArgs *args = (ThreadArgs *)arg;
-
-    srand(time(NULL)); // 用于生成随机数的种子
+    const char *modelPath = "./yolov8n.onnx";
+    cv::dnn::Net net;
+    if (Init_CV_ONNX_DNN_Yolov8(modelPath, &net) != 0)
+    {
+        printf("Error: Failed to initialize the YOLOv8 ONNX DNN model.\n");
+        pthread_exit(NULL);
+        return NULL;
+    }

    while (1)
    {
@@ -34,37 +40,42 @@ void *frame_detection_thread(void *arg)
            goto END;
        }

-        // 每隔 100 毫秒插入一些矩形框
-        usleep(100000); // 100 毫秒
-
-        // 随机生成矩形框
-        QueueItem item;
-        memset(&item, 0, sizeof(QueueItem));
-
-        // 设置框的数量，可以根据需要调整
-        item.box_count = rand() % 5 + 1; // 随机生成 1 到 5 个矩形框
-
-        for (int i = 0; i < item.box_count; ++i)
+        QueueItem detection_item;
+        memset(&detection_item, 0, sizeof(QueueItem));
+        if (dequeue(args->detection_queue, &detection_item))
        {
-            Box *box = &item.Boxes[i];
-
-            // 随机生成矩形框的坐标和大小，范围在 1920x1080 分辨率内
-            box->x = rand() % 1920;                     // 随机横坐标
-            box->y = rand() % 1080;                     // 随机纵坐标
-            box->w = rand() % (1920 - box->x);          // 随机宽度
-            box->h = rand() % (1080 - box->y);          // 随机高度
-            box->prop = (float)(rand() % 100) / 100.0f; // 随机比例
-
-            // 随机生成标签
-            sprintf(box->label, "Box%d", i + 1);
+            if (detection_item.type == ONLY_FRAME)
+            {
+                AVFrame *detection_frame = (AVFrame *)detection_item.data;
+                cv::Mat detection_mat = AVFrameToCVMat(detection_frame);
+                if (!detection_mat.empty())
+                {
+                    std::vector<Box> outputs;
+                    Infer_CV_ONNX_DNN_Yolov8(&net, detection_mat, outputs);
+                    QueueItem boxes_item;
+                    memset(&boxes_item, 0, sizeof(QueueItem));
+                    boxes_item.box_count = (outputs.size() > 20) ? 20 : outputs.size();
+                    boxes_item.type = ONLY_BOXES;
+                    boxes_item.data = NULL;
+                    for (int i = 0; i < boxes_item.box_count; ++i)
+                    {
+                        // 存储检测框信息
+                        boxes_item.Boxes[i].x = outputs[i].x;
+                        boxes_item.Boxes[i].y = outputs[i].y;
+                        boxes_item.Boxes[i].w = outputs[i].w;
+                        boxes_item.Boxes[i].h = outputs[i].h;
+                        boxes_item.Boxes[i].prop = outputs[i].prop;
+                        strcpy(boxes_item.Boxes[i].label, outputs[i].label);
+                    }
+                    enqueue(args->box_queue, boxes_item);
+                }
+                av_frame_free(&detection_frame);
+            }
        }
-        item.type = ONLY_BOXES;
-        item.data = NULL;
-        enqueue(args->box_queue, item);
-        // printf(">>> enqueue(args->detection_queue, item)\n");
    }

 END:
+    Release_CV_ONNX_DNN_Yolov8(&net);
    pthread_exit(NULL);
    return NULL;
-}
+}
@@ -96,7 +96,30 @@ int dequeue(FrameQueue *q, QueueItem *item)
    free(temp);
    return 1;
 }
-
+// 出队操作
+// @param q 队列指针
+// @param item 出队元素
+// @return 1 成功，-1 队列为空，0 失败
+int async_dequeue(FrameQueue *q, QueueItem *item)
+{
+    pthread_mutex_lock(&q->lock);
+    if (q->front == NULL)
+    {
+        pthread_mutex_unlock(&q->lock);
+        return -1;
+    }
+    QueueNode *temp = q->front;
+    *item = temp->item;
+    q->front = q->front->next;
+    if (q->front == NULL)
+    {
+        q->rear = NULL;
+    }
+    q->size--; // 减少元素数量
+    pthread_mutex_unlock(&q->lock);
+    free(temp);
+    return 1;
+}
 // 释放队列资源的函数
 void frame_queue_destroy(FrameQueue *q)
 {
@@ -0,0 +1,108 @@
+// Copyright (C) 2025 wwhai
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#include "opencv_dnn_module.h"
+#include "opencv_utils.h"
+#include <iostream>
+#include "coco_class.h"
+
+// 初始化YOLOv8模型
+int Init_CV_ONNX_DNN_Yolov8(const char *model_path, cv::dnn::Net *net)
+{
+    init_coco_names();
+    print_coco_names();
+    try
+    {
+        *net = cv::dnn::readNetFromONNX(model_path);
+        if (net->empty())
+        {
+            std::cerr << "Error: Failed to load YOLOv8 model from " << model_path << std::endl;
+            return -1;
+        }
+
+        net->setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
+        net->setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
+
+        return 0; // 成功
+    }
+    catch (const std::exception &e)
+    {
+        std::cerr << "Exception during model initialization: " << e.what() << std::endl;
+        return -1;
+    }
+}
+
+// 执行YOLOv8推理
+int Infer_CV_ONNX_DNN_Yolov8(cv::dnn::Net *net, cv::Mat frame, std::vector<Box> &boxes)
+{
+    if (!net)
+    {
+        fprintf(stderr, "Error: Net pointer is null.\n");
+        return -1;
+    }
+    // 准备输入; YOLOV8图片尺寸需要压缩为640*640
+    cv::Mat blob;
+    cv::Mat letterboxed_frame;
+    letterbox(&frame, &letterboxed_frame, 640, 640, cv::Scalar(0, 0, 0));
+    cv::dnn::blobFromImage(letterboxed_frame, blob, 1.0 / 255.0, cv::Size(640, 640), cv::Scalar(), true, false);
+    net->setInput(blob);
+    // 推理
+    std::vector<cv::Mat> outs;
+    std::string outputName = "output0";
+    int layerId = net->getLayerId("output0");
+    net->registerOutput(outputName, layerId, 0);
+    net->forward(outs, outputName);
+    // 检查推理结果
+    if (outs.empty())
+    {
+        fprintf(stderr, "Error: No output from the network.\n");
+        return -1;
+    }
+    std::vector<DnnResult> results = postprocess(frame, outs, 0.25, 0.5);
+    for (auto &&result : results)
+    {
+        const char *coco_name = get_coco_name(result.class_id);
+        cv::Rect box_in_letterbox(result.x, result.y, result.w, result.h);
+        cv::Rect box_in_original = map_box_to_original(box_in_letterbox, frame.size(), letterboxed_frame.size());
+        Box box = {
+            .x = box_in_original.x,
+            .y = box_in_original.y,
+            .w = box_in_original.width,
+            .h = box_in_original.height,
+            .prop = result.score,
+        };
+
+        strcpy(box.label, coco_name);
+        boxes.push_back(box);
+    }
+    return 0;
+}
+
+// 释放模型资源
+int Release_CV_ONNX_DNN_Yolov8(cv::dnn::Net *net)
+{
+    try
+    {
+        if (net)
+        {
+            net->~Net();
+        }
+        return 0; // 成功
+    }
+    catch (const std::exception &e)
+    {
+        fprintf(stderr, "Exception during model release: %s\n", e.what());
+        return -1;
+    }
+}
@@ -0,0 +1,161 @@
+// Copyright (C) 2025 wwhai
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+#include "opencv_utils.h"
+// 将 AVFrame 转换为 OpenCV 的 cv::Mat
+
+cv::Mat AVFrameToCVMat(AVFrame *frame)
+{
+    // 获取帧的格式、宽度和高度
+    int width = frame->width;
+    int height = frame->height;
+    cv::Mat cvFrame;
+
+    // 检查帧的格式是否为支持的格式
+    AVPixelFormat pix_fmt = (AVPixelFormat)frame->format;
+    if (pix_fmt == AV_PIX_FMT_YUV420P || pix_fmt == AV_PIX_FMT_YUVJ420P)
+    {
+        cv::Mat yuvFrame(height + height / 2, width, CV_8UC1);
+        // 复制 YUV 平面
+        av_image_copy_to_buffer(yuvFrame.data, yuvFrame.total() * yuvFrame.elemSize(),
+                                (const uint8_t **)frame->data, frame->linesize,
+                                pix_fmt, width, height, 1);
+        cv::Mat rgbFrame(height, width, CV_8UC3);
+        // 将 YUV 转换为 RGB
+        cv::cvtColor(yuvFrame, rgbFrame, cv::COLOR_YUV2RGB_I420);
+        cvFrame = rgbFrame;
+    }
+    else if (pix_fmt == AV_PIX_FMT_RGB24)
+    {
+        cvFrame = cv::Mat(height, width, CV_8UC3);
+        // 复制 RGB 平面
+        av_image_copy_to_buffer(cvFrame.data, cvFrame.total() * cvFrame.elemSize(),
+                                (const uint8_t **)frame->data, frame->linesize,
+                                pix_fmt, width, height, 1);
+    }
+    else
+    {
+        fprintf(stderr, "Unsupported pixel format: %s\n", av_get_pix_fmt_name(pix_fmt));
+        return cv::Mat();
+    }
+
+    return cvFrame;
+}
+
+BestResult getBestFromConfidenceValue(float confidenceValues[], size_t size)
+{
+    BestResult result;
+    result.bestId = -1; // 初始化为无效值
+    result.bestScore = 0.0f;
+    for (size_t i = 0; i < size; ++i)
+    {
+        if (confidenceValues[i] > result.bestScore)
+        {
+            result.bestId = static_cast<int>(i);
+            result.bestScore = confidenceValues[i];
+        }
+    }
+    return result;
+}
+
+std::vector<DnnResult> postprocess(cv::Mat &frame, const std::vector<cv::Mat> &outs, float confThreshold, float nmsThreshold)
+{
+
+    std::vector<int> classIds;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    // 网络输出的后处理
+    int columns = 84;
+    int rows = 8400;
+    for (const auto &out : outs)
+    {
+        float *data_ptr = (float *)out.data;
+        // std::cout << "out.rows: " << out.size << std::endl;
+        for (int i = 0; i < rows; ++i)
+        {
+            auto x = (data_ptr[i + rows * 0]);
+            auto y = (data_ptr[i + rows * 1]);
+            auto w = (data_ptr[i + rows * 2]);
+            auto h = (data_ptr[i + rows * 3]);
+            float confidenceValues[80] = {};
+            for (int j = 4; j < columns; ++j)
+            {
+                confidenceValues[j - 4] = data_ptr[i + rows * j];
+            }
+            BestResult result = getBestFromConfidenceValue(confidenceValues, 80);
+            classIds.push_back(result.bestId);
+            confidences.push_back(result.bestScore);
+            boxes.push_back(cv::Rect(int(x - w / 2), int(y - h / 2), w, h));
+        }
+    }
+
+    // 非极大值抑制
+    std::vector<DnnResult> boxes_result;
+    std::vector<int> indices;
+    cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
+    for (int idx : indices)
+    {
+        cv::Rect box = boxes[idx];
+        // printf("box: %d, %d, %d, %d, %f, %d\n", box.x, box.y, box.width, box.height, confidences[idx], classIds[idx]);
+        boxes_result.push_back({box.x, box.y, box.width, box.height, confidences[idx], classIds[idx]});
+    }
+    return boxes_result;
+}
+
+void calculate_scale_factors(float original_width, float original_height, float scaled_width, float scaled_height, float *width_scale, float *height_scale)
+{
+    *width_scale = original_width / scaled_width;
+    *height_scale = original_height / scaled_height;
+}
+
+void rescale_box(float x, float y, float w, float h, float width_scale, float height_scale, float *x_original, float *y_original, float *w_original, float *h_original)
+{
+    *x_original = x * width_scale;
+    *y_original = y * height_scale;
+    *w_original = w * width_scale;
+    *h_original = h * height_scale;
+}
+
+void letterbox(const cv::Mat *src, cv::Mat *dst, int new_width, int new_height, cv::Scalar color)
+{
+    float scale = std::min((float)new_width / src->cols, (float)new_height / src->rows);
+    int unpad_w = scale * src->cols;
+    int unpad_h = scale * src->rows;
+    cv::resize(*src, *dst, cv::Size(unpad_w, unpad_h));
+    int pad_w = new_width - unpad_w;
+    int pad_h = new_height - unpad_h;
+    int top = pad_h / 2;
+    int bottom = pad_h - top;
+    int left = pad_w / 2;
+    int right = pad_w - left;
+    cv::copyMakeBorder(*dst, *dst, top, bottom, left, right, cv::BORDER_CONSTANT, color);
+}
+
+cv::Rect map_box_to_original(cv::Rect box, cv::Size original_size, cv::Size letterboxed_size)
+{
+    float scale = std::min((float)letterboxed_size.width / original_size.width, (float)letterboxed_size.height / original_size.height);
+    int unpad_w = scale * original_size.width;
+    int unpad_h = scale * original_size.height;
+    int pad_w = letterboxed_size.width - unpad_w;
+    int pad_h = letterboxed_size.height - unpad_h;
+    int top = pad_h / 2;
+    int left = pad_w / 2;
+    cv::Rect mapped_box;
+    mapped_box.x = (box.x - left) / scale;
+    mapped_box.y = (box.y - top) / scale;
+    mapped_box.width = box.width / scale;
+    mapped_box.height = box.height / scale;
+    return mapped_box;
+}
@@ -185,14 +185,27 @@ void *rtsp_handler_thread(void *arg)
            }
            else
            {
-                AVFrame *frameOutput = CopyAVFrame(origin_frame);
-                QueueItem outputItem;
-                outputItem.type = ONLY_FRAME;
-                outputItem.data = frameOutput;
-                memset(outputItem.Boxes, 0, sizeof(outputItem.Boxes));
-                if (!enqueue(args->video_queue, outputItem))
                {
-                    av_frame_free(&frameOutput);
+                    AVFrame *display_frame = CopyAVFrame(origin_frame);
+                    QueueItem outputItem;
+                    outputItem.type = ONLY_FRAME;
+                    outputItem.data = display_frame;
+                    memset(outputItem.Boxes, 0, sizeof(outputItem.Boxes));
+                    if (!enqueue(args->video_queue, outputItem))
+                    {
+                        av_frame_free(&display_frame);
+                    }
+                }
+                {
+                    AVFrame *detection_frame = CopyAVFrame(origin_frame);
+                    QueueItem outputItem;
+                    outputItem.type = ONLY_FRAME;
+                    outputItem.data = detection_frame;
+                    memset(outputItem.Boxes, 0, sizeof(outputItem.Boxes));
+                    if (!enqueue(args->detection_queue, outputItem))
+                    {
+                        av_frame_free(&detection_frame);
+                    }
                }
            }
            av_frame_free(&origin_frame);
@@ -114,44 +114,20 @@ void *video_renderer_thread(void *arg)
                av_frame_free(&newFrame);
            }
        }
-
        SDL_RenderCopy(renderer, texture, NULL, NULL);
-
        // 处理检测结果队列
        QueueItem boxes_item;
-        if (dequeue(args->box_queue, &boxes_item))
+        if (async_dequeue(args->box_queue, &boxes_item))
        {
-            // printf("<<< dequeue(args->box_queue, &boxes_item)\n");
            if (boxes_item.type == ONLY_BOXES)
            {
-                // 当前检测框
-                Box *boxes = boxes_item.Boxes;
-                // 获取上一帧的检测框
-                QueueItem prevItem;
-                memset(&prevItem, 0, sizeof(QueueItem));
-                if (dequeue(args->box_queue, &prevItem) && prevItem.type == ONLY_BOXES)
+                for (int i = 0; i < boxes_item.box_count; ++i)
                {
-                    Box *prevBoxes = prevItem.Boxes;
-                    for (int i = 0; i < boxes_item.box_count; ++i)
-                    {
-                        // 插值平滑过渡
-                        Box interpolatedBox = InterpolateBox(prevBoxes[i], boxes[i], 0.5f); // 0.5f为插值因子
-                        RenderBox(renderer, &interpolatedBox);
-                    }
+                    RenderBox(renderer, &boxes_item.Boxes[i]);
+                    SDLDrawText(renderer, texture, font, boxes_item.Boxes[i].label, boxes_item.Boxes[i].x, boxes_item.Boxes[i].y);
                }
-                else
-                {
-                    // 如果没有前一帧的检测框，直接渲染当前帧的框
-                    for (int i = 0; i < boxes_item.box_count; ++i)
-                    {
-                        RenderBox(renderer, &boxes[i]);
-                    }
-                }
-                // 将当前检测框推入队列，用于下一帧的插值
-                enqueue(args->box_queue, boxes_item);
            }
        }
-
        // 计算FPS并显示
        frameCount++;
        currentFrameTime = SDL_GetPerformanceCounter();