mirror of
https://github.com/gowvp/gb28181.git
synced 2026-04-22 15:07:10 +08:00
753 lines
24 KiB
Python
753 lines
24 KiB
Python
import logging
|
||
import os
|
||
import time
|
||
from abc import ABC, abstractmethod
|
||
from typing import Any
|
||
|
||
import numpy as np
|
||
import cv2
|
||
|
||
slog = logging.getLogger("Detector")
|
||
|
||
# COCO 数据集 80 类标签
|
||
COCO_LABELS = [
|
||
"person",
|
||
"bicycle",
|
||
"car",
|
||
"motorcycle",
|
||
"airplane",
|
||
"bus",
|
||
"train",
|
||
"truck",
|
||
"boat",
|
||
"traffic light",
|
||
"fire hydrant",
|
||
"stop sign",
|
||
"parking meter",
|
||
"bench",
|
||
"bird",
|
||
"cat",
|
||
"dog",
|
||
"horse",
|
||
"sheep",
|
||
"cow",
|
||
"elephant",
|
||
"bear",
|
||
"zebra",
|
||
"giraffe",
|
||
"backpack",
|
||
"umbrella",
|
||
"handbag",
|
||
"tie",
|
||
"suitcase",
|
||
"frisbee",
|
||
"skis",
|
||
"snowboard",
|
||
"sports ball",
|
||
"kite",
|
||
"baseball bat",
|
||
"baseball glove",
|
||
"skateboard",
|
||
"surfboard",
|
||
"tennis racket",
|
||
"bottle",
|
||
"wine glass",
|
||
"cup",
|
||
"fork",
|
||
"knife",
|
||
"spoon",
|
||
"bowl",
|
||
"banana",
|
||
"apple",
|
||
"sandwich",
|
||
"orange",
|
||
"broccoli",
|
||
"carrot",
|
||
"hot dog",
|
||
"pizza",
|
||
"donut",
|
||
"cake",
|
||
"chair",
|
||
"couch",
|
||
"potted plant",
|
||
"bed",
|
||
"dining table",
|
||
"toilet",
|
||
"tv",
|
||
"laptop",
|
||
"mouse",
|
||
"remote",
|
||
"keyboard",
|
||
"cell phone",
|
||
"microwave",
|
||
"oven",
|
||
"toaster",
|
||
"sink",
|
||
"refrigerator",
|
||
"book",
|
||
"clock",
|
||
"vase",
|
||
"scissors",
|
||
"teddy bear",
|
||
"hair drier",
|
||
"toothbrush",
|
||
]
|
||
|
||
|
||
class ModelBackend(ABC):
|
||
"""
|
||
模型推理后端抽象接口
|
||
不同模型格式(ONNX、TFLite)需实现此接口,确保上层调用逻辑统一
|
||
"""
|
||
|
||
@abstractmethod
|
||
def load(self, model_path: str) -> bool:
|
||
"""加载模型文件,返回是否成功"""
|
||
pass
|
||
|
||
@abstractmethod
|
||
def is_ready(self) -> bool:
|
||
"""检查模型是否已加载并可用"""
|
||
pass
|
||
|
||
@abstractmethod
|
||
def get_input_shape(self) -> tuple:
|
||
"""获取模型输入形状,用于预处理"""
|
||
pass
|
||
|
||
@abstractmethod
|
||
def infer(self, input_tensor: np.ndarray) -> np.ndarray:
|
||
"""执行推理,返回原始输出"""
|
||
pass
|
||
|
||
|
||
class ONNXBackend(ModelBackend):
|
||
"""
|
||
ONNX Runtime 推理后端
|
||
使用 onnxruntime 库加载和执行 ONNX 格式模型
|
||
"""
|
||
|
||
def __init__(self):
|
||
self.session = None
|
||
self.input_name: str = ""
|
||
self.input_shape: tuple = (1, 3, 640, 640)
|
||
self._is_ready = False
|
||
|
||
def load(self, model_path: str) -> bool:
|
||
try:
|
||
import onnxruntime as ort
|
||
|
||
slog.info(f"加载 ONNX 模型: {model_path} ...")
|
||
start_time = time.time()
|
||
|
||
sess_options = ort.SessionOptions()
|
||
sess_options.graph_optimization_level = (
|
||
ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||
)
|
||
sess_options.intra_op_num_threads = 4
|
||
sess_options.inter_op_num_threads = 2
|
||
|
||
providers = ["CPUExecutionProvider"]
|
||
self.session = ort.InferenceSession(
|
||
model_path, sess_options=sess_options, providers=providers
|
||
)
|
||
|
||
input_info = self.session.get_inputs()[0]
|
||
self.input_name = input_info.name
|
||
self.input_shape = tuple(input_info.shape)
|
||
|
||
elapsed = time.time() - start_time
|
||
slog.info(
|
||
f"ONNX 模型加载完成 (耗时: {elapsed:.2f}s, 输入形状: {self.input_shape})"
|
||
)
|
||
self._is_ready = True
|
||
return True
|
||
except ImportError:
|
||
slog.error("未安装 onnxruntime,无法加载 ONNX 模型")
|
||
return False
|
||
except Exception as e:
|
||
slog.error(f"加载 ONNX 模型失败: {e}")
|
||
return False
|
||
|
||
def is_ready(self) -> bool:
|
||
return self._is_ready and self.session is not None
|
||
|
||
def get_input_shape(self) -> tuple:
|
||
return self.input_shape
|
||
|
||
def infer(self, input_tensor: np.ndarray) -> np.ndarray:
|
||
if not self.session:
|
||
raise RuntimeError("ONNX 模型未加载")
|
||
outputs = self.session.run(None, {self.input_name: input_tensor})
|
||
return np.asarray(outputs[0])
|
||
|
||
|
||
class TFLiteBackend(ModelBackend):
|
||
"""
|
||
TensorFlow Lite 推理后端
|
||
支持两种模型格式:
|
||
1. YOLO 格式:单输出张量 (1, 84, 8400)
|
||
2. SSD 格式:多输出张量(boxes, classes, scores, num_detections)
|
||
"""
|
||
|
||
def __init__(self):
|
||
self.interpreter: Any = None
|
||
self.input_details: list[dict[str, Any]] = []
|
||
self.output_details: list[dict[str, Any]] = []
|
||
self.input_shape: tuple = (1, 640, 640, 3)
|
||
self._is_ready = False
|
||
self._is_nhwc = True
|
||
self._is_ssd_format = False # 区分 SSD 和 YOLO 格式
|
||
self._input_quantization: tuple[float, int] = (1.0, 0) # scale, zero_point
|
||
|
||
def load(self, model_path: str) -> bool:
|
||
try:
|
||
Interpreter = None
|
||
try:
|
||
from tflite_runtime.interpreter import Interpreter # type: ignore
|
||
except ImportError:
|
||
try:
|
||
from ai_edge_litert.interpreter import Interpreter # type: ignore
|
||
except ImportError:
|
||
try:
|
||
import tensorflow as tf
|
||
|
||
Interpreter = tf.lite.Interpreter
|
||
except ImportError:
|
||
pass
|
||
|
||
if Interpreter is None:
|
||
raise ImportError("未找到 tflite_runtime、ai_edge_litert 或 tensorflow")
|
||
|
||
slog.info(f"加载 TFLite 模型: {model_path} ...")
|
||
start_time = time.time()
|
||
|
||
self.interpreter = Interpreter(model_path=model_path)
|
||
self.interpreter.allocate_tensors()
|
||
|
||
self.input_details = self.interpreter.get_input_details()
|
||
self.output_details = self.interpreter.get_output_details()
|
||
|
||
input_shape = self.input_details[0]["shape"]
|
||
self.input_shape = tuple(input_shape)
|
||
|
||
if len(self.input_shape) == 4:
|
||
self._is_nhwc = self.input_shape[3] == 3
|
||
|
||
# 获取输入量化参数(用于 uint8 量化模型)
|
||
quant_params = self.input_details[0].get("quantization_parameters", {})
|
||
scales = quant_params.get("scales", np.array([1.0]))
|
||
zero_points = quant_params.get("zero_points", np.array([0]))
|
||
if len(scales) > 0 and len(zero_points) > 0:
|
||
self._input_quantization = (float(scales[0]), int(zero_points[0]))
|
||
|
||
# 检测模型格式:SSD 模型通常有4个输出(boxes, classes, scores, num)
|
||
# 且输出名称包含 "TFLite_Detection_PostProcess"
|
||
self._is_ssd_format = len(self.output_details) >= 3 and any(
|
||
"Detection" in d.get("name", "") for d in self.output_details
|
||
)
|
||
|
||
elapsed = time.time() - start_time
|
||
format_name = "SSD" if self._is_ssd_format else "YOLO"
|
||
slog.info(
|
||
f"TFLite 模型加载完成 (耗时: {elapsed:.2f}s, 输入: {self.input_shape}, "
|
||
f"格式: {format_name}, 量化: {self._input_quantization})"
|
||
)
|
||
self._is_ready = True
|
||
return True
|
||
except ImportError:
|
||
slog.error("未安装 tflite_runtime 或 tensorflow,无法加载 TFLite 模型")
|
||
return False
|
||
except Exception as e:
|
||
slog.error(f"加载 TFLite 模型失败: {e}")
|
||
return False
|
||
|
||
def is_ready(self) -> bool:
|
||
return self._is_ready and self.interpreter is not None
|
||
|
||
def get_input_shape(self) -> tuple:
|
||
return self.input_shape
|
||
|
||
def is_nhwc(self) -> bool:
|
||
"""返回模型是否使用 NHWC 格式"""
|
||
return self._is_nhwc
|
||
|
||
def is_ssd_format(self) -> bool:
|
||
"""返回是否为 SSD 格式(多输出张量)"""
|
||
return self._is_ssd_format
|
||
|
||
def get_input_quantization(self) -> tuple[float, int]:
|
||
"""返回输入量化参数 (scale, zero_point)"""
|
||
return self._input_quantization
|
||
|
||
def get_input_dtype(self) -> np.dtype:
|
||
"""返回模型期望的输入数据类型"""
|
||
return self.input_details[0]["dtype"]
|
||
|
||
def infer(self, input_tensor: np.ndarray) -> np.ndarray:
|
||
"""执行推理,返回第一个输出张量(用于 YOLO 格式)"""
|
||
if not self.interpreter or len(self.input_details) == 0:
|
||
raise RuntimeError("TFLite 模型未加载")
|
||
|
||
input_dtype = self.input_details[0]["dtype"]
|
||
if input_tensor.dtype != input_dtype:
|
||
input_tensor = input_tensor.astype(input_dtype)
|
||
|
||
self.interpreter.set_tensor(self.input_details[0]["index"], input_tensor)
|
||
self.interpreter.invoke()
|
||
|
||
output = self.interpreter.get_tensor(self.output_details[0]["index"])
|
||
return np.asarray(output)
|
||
|
||
def infer_ssd(
|
||
self, input_tensor: np.ndarray
|
||
) -> tuple[np.ndarray, np.ndarray, np.ndarray, int]:
|
||
"""
|
||
执行 SSD 格式推理,返回解析后的检测结果
|
||
SSD 输出格式(已内置后处理):
|
||
- boxes: (1, num_boxes, 4) 归一化坐标 [y_min, x_min, y_max, x_max]
|
||
- classes: (1, num_boxes) 类别 ID
|
||
- scores: (1, num_boxes) 置信度分数
|
||
- num_detections: 有效检测数量
|
||
"""
|
||
if not self.interpreter or len(self.input_details) == 0:
|
||
raise RuntimeError("TFLite 模型未加载")
|
||
|
||
input_dtype = self.input_details[0]["dtype"]
|
||
if input_tensor.dtype != input_dtype:
|
||
input_tensor = input_tensor.astype(input_dtype)
|
||
|
||
self.interpreter.set_tensor(self.input_details[0]["index"], input_tensor)
|
||
self.interpreter.invoke()
|
||
|
||
# 按名称或索引获取各输出张量
|
||
boxes = None
|
||
classes = None
|
||
scores = None
|
||
num_detections = 0
|
||
|
||
for detail in self.output_details:
|
||
name = detail.get("name", "")
|
||
tensor = self.interpreter.get_tensor(detail["index"])
|
||
|
||
if "boxes" in name.lower() or (
|
||
detail["shape"][-1] == 4 and len(detail["shape"]) == 3
|
||
):
|
||
boxes = np.asarray(tensor)
|
||
elif "class" in name.lower() or (
|
||
len(detail["shape"]) == 2
|
||
and detail["shape"][1] > 1
|
||
and boxes is not None
|
||
):
|
||
classes = np.asarray(tensor)
|
||
elif "score" in name.lower() or ":2" in name:
|
||
scores = np.asarray(tensor)
|
||
elif "num" in name.lower() or (
|
||
len(detail["shape"]) == 1 and detail["shape"][0] == 1
|
||
):
|
||
num_detections = int(tensor[0])
|
||
|
||
# 兜底处理:按输出顺序分配
|
||
if boxes is None or classes is None or scores is None:
|
||
outputs = [
|
||
self.interpreter.get_tensor(d["index"]) for d in self.output_details
|
||
]
|
||
if len(outputs) >= 4:
|
||
boxes = np.asarray(outputs[0])
|
||
classes = np.asarray(outputs[1])
|
||
scores = np.asarray(outputs[2])
|
||
num_detections = int(outputs[3][0]) if outputs[3].size > 0 else 0
|
||
|
||
if boxes is None:
|
||
boxes = np.array([])
|
||
if classes is None:
|
||
classes = np.array([])
|
||
if scores is None:
|
||
scores = np.array([])
|
||
|
||
return boxes, classes, scores, num_detections
|
||
|
||
|
||
def get_model_type(model_path: str) -> str:
|
||
"""根据模型文件后缀判断模型类型"""
|
||
ext = os.path.splitext(model_path)[1].lower()
|
||
return "tflite" if ext == ".tflite" else "onnx"
|
||
|
||
|
||
def create_backend(model_type: str) -> ModelBackend:
|
||
"""
|
||
根据模型类型创建对应的推理后端
|
||
"""
|
||
if model_type == "tflite":
|
||
return TFLiteBackend()
|
||
else:
|
||
return ONNXBackend()
|
||
|
||
|
||
class ObjectDetector:
|
||
"""
|
||
目标检测器 - 支持多种模型格式(ONNX、TFLite)
|
||
通过统一的 ModelBackend 接口实现模型无关的检测逻辑
|
||
"""
|
||
|
||
def __init__(self, model_path: str):
|
||
self.model_path = model_path
|
||
self.model_type = get_model_type(model_path)
|
||
self.backend: ModelBackend | None = None
|
||
self.input_shape: tuple = (1, 3, 640, 640)
|
||
self._is_ready = False
|
||
self.names: dict[int, str] = {i: name for i, name in enumerate(COCO_LABELS)}
|
||
|
||
def load_model(self) -> bool:
|
||
"""加载模型并初始化推理后端"""
|
||
try:
|
||
start_time = time.time()
|
||
|
||
# 创建对应类型的后端
|
||
self.backend = create_backend(self.model_type)
|
||
|
||
# 加载模型
|
||
if not self.backend.load(self.model_path):
|
||
return False
|
||
|
||
self.input_shape = self.backend.get_input_shape()
|
||
|
||
# 预热模型
|
||
self._warmup()
|
||
|
||
elapsed = time.time() - start_time
|
||
slog.info(f"模型预热完成 (总耗时: {elapsed:.2f}s)")
|
||
self._is_ready = True
|
||
return True
|
||
except Exception as e:
|
||
slog.error(f"加载模型失败: {e}")
|
||
return False
|
||
|
||
def _warmup(self) -> None:
|
||
"""预热模型,减少首次推理延迟"""
|
||
if not self.backend:
|
||
return
|
||
|
||
dummy_img = np.zeros((640, 640, 3), dtype=np.uint8)
|
||
dummy_input = self._preprocess(dummy_img)
|
||
self.backend.infer(dummy_input)
|
||
slog.info("模型预热完成")
|
||
|
||
def is_ready(self) -> bool:
|
||
return self._is_ready and self.backend is not None and self.backend.is_ready()
|
||
|
||
def _get_target_size(self) -> int:
|
||
"""获取模型期望的输入尺寸"""
|
||
# NCHW: (1, 3, H, W) -> shape[2]
|
||
# NHWC: (1, H, W, 3) -> shape[1]
|
||
if self._is_nhwc_format():
|
||
return int(self.input_shape[1])
|
||
return int(self.input_shape[2])
|
||
|
||
def _is_nhwc_format(self) -> bool:
|
||
"""判断当前后端是否使用 NHWC 格式"""
|
||
if isinstance(self.backend, TFLiteBackend):
|
||
return self.backend.is_nhwc()
|
||
return False
|
||
|
||
def _is_ssd_format(self) -> bool:
|
||
"""判断当前后端是否为 SSD 格式"""
|
||
if isinstance(self.backend, TFLiteBackend):
|
||
return self.backend.is_ssd_format()
|
||
return False
|
||
|
||
def _preprocess(self, image: np.ndarray) -> np.ndarray:
|
||
"""
|
||
预处理图像:调整大小、归一化、转换格式
|
||
根据后端类型自动选择 NCHW 或 NHWC 格式,并处理量化输入
|
||
"""
|
||
target_size = self._get_target_size()
|
||
h, w = image.shape[:2]
|
||
|
||
# SSD 模型使用直接缩放(不保持宽高比)
|
||
# YOLO 模型使用 letterbox 缩放(保持宽高比)
|
||
if self._is_ssd_format():
|
||
resized = cv2.resize(
|
||
image, (target_size, target_size), interpolation=cv2.INTER_LINEAR
|
||
)
|
||
rgb = resized[:, :, ::-1] # BGR -> RGB
|
||
|
||
# 检查是否需要量化为 uint8
|
||
if isinstance(self.backend, TFLiteBackend):
|
||
input_dtype = self.backend.get_input_dtype()
|
||
if input_dtype == np.uint8:
|
||
# 直接返回 uint8 格式
|
||
return np.expand_dims(rgb.astype(np.uint8), axis=0)
|
||
|
||
# float32 格式
|
||
rgb = rgb.astype(np.float32) / 255.0
|
||
return np.expand_dims(rgb, axis=0)
|
||
|
||
# YOLO letterbox 预处理
|
||
scale = min(target_size / h, target_size / w)
|
||
new_h, new_w = int(h * scale), int(w * scale)
|
||
|
||
resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
||
|
||
canvas = np.full((target_size, target_size, 3), 114, dtype=np.uint8)
|
||
top = (target_size - new_h) // 2
|
||
left = (target_size - new_w) // 2
|
||
canvas[top : top + new_h, left : left + new_w] = resized
|
||
|
||
rgb = canvas[:, :, ::-1].astype(np.float32) / 255.0
|
||
|
||
if self._is_nhwc_format():
|
||
return np.expand_dims(rgb, axis=0)
|
||
else:
|
||
blob = rgb.transpose(2, 0, 1)
|
||
return np.expand_dims(blob, axis=0)
|
||
|
||
def _postprocess(
|
||
self,
|
||
outputs: np.ndarray,
|
||
original_shape: tuple[int, int],
|
||
threshold: float,
|
||
label_filter: list[str] | None = None,
|
||
) -> list[dict[str, Any]]:
|
||
"""
|
||
后处理 YOLO 输出:解析检测框、应用 NMS、坐标转换
|
||
YOLO11 输出格式: (1, 84, 8400) -> 84 = 4 (bbox) + 80 (classes)
|
||
"""
|
||
# 转置为 (8400, 84) 便于处理
|
||
predictions = outputs[0].T # (8400, 84)
|
||
|
||
# 提取边界框和类别分数
|
||
boxes = predictions[:, :4] # x_center, y_center, width, height
|
||
scores = predictions[:, 4:] # 80 个类别的分数
|
||
|
||
# 获取每个检测框的最高分数和对应类别
|
||
class_ids = np.argmax(scores, axis=1)
|
||
confidences = np.max(scores, axis=1)
|
||
|
||
# 过滤低置信度检测
|
||
mask = confidences >= threshold
|
||
boxes = boxes[mask]
|
||
confidences = confidences[mask]
|
||
class_ids = class_ids[mask]
|
||
|
||
if len(boxes) == 0:
|
||
return []
|
||
|
||
# 转换坐标:center_x, center_y, w, h -> x1, y1, x2, y2
|
||
x_center, y_center, w, h = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
|
||
x1 = x_center - w / 2
|
||
y1 = y_center - h / 2
|
||
x2 = x_center + w / 2
|
||
y2 = y_center + h / 2
|
||
|
||
# 缩放坐标到原始图像尺寸
|
||
orig_h, orig_w = original_shape
|
||
target_size = self._get_target_size()
|
||
scale = min(target_size / orig_h, target_size / orig_w)
|
||
pad_h = (target_size - orig_h * scale) / 2
|
||
pad_w = (target_size - orig_w * scale) / 2
|
||
|
||
x1 = (x1 - pad_w) / scale
|
||
y1 = (y1 - pad_h) / scale
|
||
x2 = (x2 - pad_w) / scale
|
||
y2 = (y2 - pad_h) / scale
|
||
|
||
# 裁剪到图像边界
|
||
x1 = np.clip(x1, 0, orig_w)
|
||
y1 = np.clip(y1, 0, orig_h)
|
||
x2 = np.clip(x2, 0, orig_w)
|
||
y2 = np.clip(y2, 0, orig_h)
|
||
|
||
# NMS (非极大值抑制)
|
||
boxes_for_nms = np.stack([x1, y1, x2, y2], axis=1)
|
||
indices = cv2.dnn.NMSBoxes(
|
||
boxes_for_nms.tolist(),
|
||
confidences.tolist(),
|
||
threshold,
|
||
0.45, # NMS IoU 阈值
|
||
)
|
||
|
||
detections = []
|
||
|
||
# 处理 NMSBoxes 返回值的不同格式
|
||
# OpenCV 不同版本返回格式不同:可能是 list、tuple、ndarray
|
||
if indices is None or len(indices) == 0:
|
||
return detections
|
||
|
||
# 将 indices 转换为一维列表
|
||
if isinstance(indices, np.ndarray):
|
||
indices = indices.flatten().tolist()
|
||
elif isinstance(indices, tuple):
|
||
indices = list(indices)
|
||
|
||
for idx in indices:
|
||
# 确保 idx 是整数
|
||
idx = int(idx) if not isinstance(idx, int) else idx
|
||
|
||
cls_id = int(class_ids[idx])
|
||
label = self.names.get(cls_id, f"class_{cls_id}")
|
||
|
||
# 标签过滤
|
||
if label_filter and label not in label_filter:
|
||
continue
|
||
|
||
x_min_val = int(x1[idx])
|
||
y_min_val = int(y1[idx])
|
||
x_max_val = int(x2[idx])
|
||
y_max_val = int(y2[idx])
|
||
area = (x_max_val - x_min_val) * (y_max_val - y_min_val)
|
||
|
||
detections.append(
|
||
{
|
||
"label": label,
|
||
"confidence": float(confidences[idx]),
|
||
"box": {
|
||
"x_min": x_min_val,
|
||
"y_min": y_min_val,
|
||
"x_max": x_max_val,
|
||
"y_max": y_max_val,
|
||
},
|
||
"area": area,
|
||
"norm_box": {
|
||
"x": (x_min_val + x_max_val) / 2 / orig_w,
|
||
"y": (y_min_val + y_max_val) / 2 / orig_h,
|
||
"w": (x_max_val - x_min_val) / orig_w,
|
||
"h": (y_max_val - y_min_val) / orig_h,
|
||
},
|
||
}
|
||
)
|
||
|
||
return detections
|
||
|
||
def detect(
|
||
self,
|
||
image: np.ndarray,
|
||
threshold: float = 0.5,
|
||
label_filter: list[str] | None = None,
|
||
regions: list[tuple[int, int, int, int]] | None = None,
|
||
) -> tuple[list[dict], float]:
|
||
"""执行目标检测"""
|
||
if not self.is_ready():
|
||
raise RuntimeError("模型未加载")
|
||
|
||
start_time = time.time()
|
||
detections = []
|
||
|
||
if regions and len(regions) > 0:
|
||
for region in regions:
|
||
x_min, y_min, x_max, y_max = region
|
||
h, w = image.shape[:2]
|
||
x_min = max(0, x_min)
|
||
y_min = max(0, y_min)
|
||
x_max = min(w, x_max)
|
||
y_max = min(h, y_max)
|
||
|
||
if x_max <= x_min or y_max <= y_min:
|
||
continue
|
||
|
||
cropped = image[y_min:y_max, x_min:x_max]
|
||
if cropped.size == 0:
|
||
continue
|
||
|
||
region_detections = self._detect_single(
|
||
cropped, threshold, label_filter
|
||
)
|
||
|
||
for det in region_detections:
|
||
det["box"]["x_min"] += x_min
|
||
det["box"]["y_min"] += y_min
|
||
det["box"]["x_max"] += x_min
|
||
det["box"]["y_max"] += y_min
|
||
detections.append(det)
|
||
else:
|
||
detections = self._detect_single(image, threshold, label_filter)
|
||
|
||
inference_time_ms = (time.time() - start_time) * 1000
|
||
return detections, inference_time_ms
|
||
|
||
def _detect_single(
|
||
self, image: np.ndarray, threshold: float, label_filter: list[str] | None = None
|
||
) -> list[dict[str, Any]]:
|
||
"""对单张图像执行检测"""
|
||
if not self.backend or not self.backend.is_ready():
|
||
return []
|
||
|
||
# 预处理
|
||
input_tensor = self._preprocess(image)
|
||
|
||
# 推理
|
||
output = self.backend.infer(input_tensor)
|
||
|
||
# 后处理
|
||
original_shape = image.shape[:2]
|
||
return self._postprocess(output, original_shape, threshold, label_filter)
|
||
|
||
|
||
class MotionDetector:
|
||
"""
|
||
运动检测器 - 基于背景差分法
|
||
用于在目标检测前预筛选有运动的帧,减少不必要的 AI 推理
|
||
"""
|
||
|
||
def __init__(self):
|
||
self.backgrounds: dict[str, np.ndarray] = {}
|
||
self.motion_threshold = 25
|
||
self.min_contour_area = 500
|
||
|
||
def detect(
|
||
self,
|
||
image: np.ndarray,
|
||
camera_name: str,
|
||
roi_points: list[float] | None = None,
|
||
) -> tuple[list[dict[str, Any]], bool]:
|
||
h, w = image.shape[:2]
|
||
if len(image.shape) == 3:
|
||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||
else:
|
||
gray = image.copy()
|
||
|
||
# 高斯模糊平滑噪点
|
||
gray = cv2.GaussianBlur(gray, (21, 21), 0)
|
||
|
||
if camera_name not in self.backgrounds:
|
||
self.backgrounds[camera_name] = gray.astype(np.float32)
|
||
return [], False
|
||
|
||
cv2.accumulateWeighted(gray, self.backgrounds[camera_name], 0.1)
|
||
|
||
frame_delta = cv2.absdiff(
|
||
gray, cv2.convertScaleAbs(self.backgrounds[camera_name])
|
||
)
|
||
thresh = cv2.threshold(
|
||
frame_delta, self.motion_threshold, 255, cv2.THRESH_BINARY
|
||
)[1]
|
||
|
||
# ROI 区域掩码
|
||
if roi_points and len(roi_points) > 0:
|
||
mask = np.zeros((h, w), dtype=np.uint8)
|
||
pts = []
|
||
for i in range(0, len(roi_points), 2):
|
||
pts.append((int(roi_points[i] * w), int(roi_points[i + 1] * h)))
|
||
pts_np = np.array([pts], dtype=np.int32)
|
||
cv2.fillPoly(mask, [pts_np], (255,)) # type: ignore
|
||
thresh = cv2.bitwise_and(thresh, thresh, mask=mask)
|
||
|
||
kernel = np.ones((3, 3), np.uint8)
|
||
thresh = cv2.dilate(thresh, kernel, iterations=2)
|
||
|
||
contours, _ = cv2.findContours(
|
||
thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
||
)
|
||
|
||
motion_boxes = []
|
||
for contour in contours:
|
||
if cv2.contourArea(contour) < self.min_contour_area:
|
||
continue
|
||
x, y, cw, ch = cv2.boundingRect(contour)
|
||
motion_boxes.append(
|
||
{"y_min": y, "x_min": x, "y_max": y + ch, "x_max": x + cw}
|
||
)
|
||
|
||
has_motion = len(motion_boxes) > 0
|
||
return motion_boxes, has_motion
|