feat: added yoloface

2026-04-22 23:17:27 +08:00 · 2024-07-03 13:13:00 +00:00
parent 5d6050f78e
commit 4e245c8fad
8 changed files with 328 additions and 0 deletions
@@ -3,6 +3,7 @@ module github.com/dev6699/face
 go 1.22.4
 require (
 	gocv.io/x/gocv v0.37.0
 	google.golang.org/grpc v1.64.0
 	google.golang.org/protobuf v1.34.2
 )
@@ -1,5 +1,7 @@
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 gocv.io/x/gocv v0.37.0 h1:sISHvnApErjoJodz1Dxb8UAkFdITOB3vXGslbVu6Knk=
 gocv.io/x/gocv v0.37.0/go.mod h1:lmS802zoQmnNvXETpmGriBqWrENPei2GxYx5KUxJsMA=
 golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
 golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
@@ -0,0 +1,41 @@
 ## Yoloface with face landmark5 detection
 <img src="output.jpg">
 ---
 Model description 
 [Get model](https://github.com/facefusion/facefusion-assets/releases/download/models/yoloface_8n.onnx)
 ```
 {
  "name": "yoloface",
  "versions": [
    "1"
  ],
  "platform": "onnxruntime_onnx",
  "inputs": [
    {
      "name": "images",
      "datatype": "FP32",
      "shape": [
        1,
        3,
        640,
        640
      ]
    }
  ],
  "outputs": [
    {
      "name": "output0",
      "datatype": "FP32",
      "shape": [
        1,
        20,
        8400
      ]
    }
  ]
 }
 ```
@@ -0,0 +1,155 @@
 package yoloface
 import (
 	"math"
 	"sort"
 	"github.com/dev6699/face/model"
 	"gocv.io/x/gocv"
 )
 type Detection struct {
 	BoundingBox   model.BoundingBox
 	FaceLandmark5 []gocv.Point2f
 	Confidence    float32
 }
 func (m *Model) PostProcess(rawOutputContents [][]byte) (*Output, error) {
 	// outputs": [
 	// 	{
 	// 	"name": "output0",
 	// 	"datatype": "FP32",
 	// 	"shape": [
 	// 		1,
 	// 		20,
 	// 		8400
 	// 	]
 	// 	}
 	// ]
 	outputCount := 8400
 	rawDetections, err := model.BytesToFloat32Slice(rawOutputContents[0])
 	if err != nil {
 		return nil, err
 	}
 	ratioWidth := m.ratioWidth
 	ratioHeight := m.ratioHeight
 	var detections []Detection
 	boundingBoxRaw := rawDetections[:4*outputCount]
 	scoreRaw := rawDetections[4*outputCount : 5*outputCount]
 	faceLandmark5Raw := rawDetections[5*outputCount:]
 	for i := 0; i < outputCount; i++ {
 		score := scoreRaw[i]
 		if score < m.faceDetectorScore {
 			continue
 		}
 		d := Detection{
 			Confidence: score,
 		}
 		bboxRaw := []float32{
 			boundingBoxRaw[i],
 			boundingBoxRaw[i+outputCount],
 			boundingBoxRaw[i+outputCount*2],
 			boundingBoxRaw[i+outputCount*3],
 		}
 		d.BoundingBox = model.BoundingBox{
 			X1: float64(bboxRaw[0]-bboxRaw[2]/2) * float64(ratioWidth),
 			Y1: float64(bboxRaw[1]-bboxRaw[3]/2) * float64(ratioHeight),
 			X2: float64(bboxRaw[0]+bboxRaw[2]/2) * float64(ratioWidth),
 			Y2: float64(bboxRaw[1]+bboxRaw[3]/2) * float64(ratioHeight),
 		}
 		faceLandmark5Extract := []float32{}
 		for j := 0; j < 15; j++ {
 			if (j-2)%3 == 0 {
 				continue
 			}
 			idx := j*outputCount + i
 			fl := faceLandmark5Raw[idx]
 			if j%3 == 0 {
 				fl *= ratioWidth
 			}
 			if (j-1)%3 == 0 {
 				fl *= ratioHeight
 			}
 			faceLandmark5Extract = append(faceLandmark5Extract, fl)
 		}
 		faceLandmark5 := []gocv.Point2f{}
 		for j := 0; j < len(faceLandmark5Extract); j += 2 {
 			faceLandmark5 = append(faceLandmark5,
 				gocv.Point2f{
 					X: faceLandmark5Extract[j],
 					Y: faceLandmark5Extract[j+1],
 				})
 		}
 		d.FaceLandmark5 = faceLandmark5
 		detections = append(detections, d)
 	}
 	keepIndices := applyNMS(detections, m.iouThreshold)
 	keepDetections := make([]Detection, len(keepIndices))
 	for i, idx := range keepIndices {
 		keepDetections[i] = detections[idx]
 	}
 	sort.Slice(keepDetections, func(i, j int) bool {
 		return keepDetections[i].Confidence > keepDetections[j].Confidence
 	})
 	return &Output{
 		Detections: keepDetections,
 	}, nil
 }
 // applyNMS performs non-maximum suppression to eliminate duplicate detections.
 func applyNMS(detections []Detection, iouThreshold float64) []int {
 	boundingBoxList := []model.BoundingBox{}
 	for _, d := range detections {
 		boundingBoxList = append(boundingBoxList, d.BoundingBox)
 	}
 	var keepIndices []int
 	indices := make([]int, len(boundingBoxList))
 	for i := range boundingBoxList {
 		indices[i] = i
 	}
 	areas := make([]float64, len(boundingBoxList))
 	for i, box := range boundingBoxList {
 		areas[i] = (box.X2 - box.X1 + 1) * (box.Y2 - box.Y1 + 1)
 	}
 	for len(indices) > 0 {
 		index := indices[0]
 		keepIndices = append(keepIndices, index)
 		var remainIndices []int
 		for _, i := range indices[1:] {
 			xx1 := math.Max(boundingBoxList[index].X1, boundingBoxList[i].X1)
 			yy1 := math.Max(boundingBoxList[index].Y1, boundingBoxList[i].Y1)
 			xx2 := math.Min(boundingBoxList[index].X2, boundingBoxList[i].X2)
 			yy2 := math.Min(boundingBoxList[index].Y2, boundingBoxList[i].Y2)
 			width := math.Max(0, xx2-xx1+1)
 			height := math.Max(0, yy2-yy1+1)
 			intersection := width * height
 			union := areas[index] + areas[i] - intersection
 			iou := intersection / union
 			if iou <= iouThreshold {
 				remainIndices = append(remainIndices, i)
 			}
 		}
 		indices = remainIndices
 	}
 	return keepIndices
 }
@@ -0,0 +1,81 @@
 package yoloface
 import (
 	"image"
 	"math"
 	"github.com/dev6699/face/protobuf"
 	"gocv.io/x/gocv"
 )
 func (m *Model) PreProcess(i *Input) ([]*protobuf.InferTensorContents, error) {
 	img := i.Img
 	width := img.Cols()
 	height := img.Rows()
 	faceDetectorSize := Resolution{Width: 640, Height: 640}
 	resizedVisionFrame, newWidth, newHeight := resizeFrameResolution(img.Clone(), faceDetectorSize)
 	defer resizedVisionFrame.Close()
 	ratioHeight := float32(height) / float32(newHeight)
 	ratioWidth := float32(width) / float32(newWidth)
 	m.ratioHeight = ratioHeight
 	m.ratioWidth = ratioWidth
 	contents := &protobuf.InferTensorContents{
 		Fp32Contents: prepareDetectFrame(resizedVisionFrame, faceDetectorSize),
 	}
 	return []*protobuf.InferTensorContents{contents}, nil
 }
 type Resolution struct {
 	Width  uint
 	Height uint
 }
 // resizeFrameResolution resize visionFrame where its resolution will be capped at maxResolution.
 func resizeFrameResolution(visionFrame gocv.Mat, maxResolution Resolution) (gocv.Mat, uint, uint) {
 	width := visionFrame.Cols()
 	height := visionFrame.Rows()
 	maxHeight := int(maxResolution.Height)
 	maxWidth := int(maxResolution.Width)
 	if height > maxHeight || width > maxWidth {
 		scale := math.Min(float64(maxHeight)/float64(height), float64(maxWidth)/float64(width))
 		newWidth := int(float64(width) * scale)
 		newHeight := int(float64(height) * scale)
 		gocv.Resize(visionFrame, &visionFrame, image.Point{X: newWidth, Y: newHeight}, 0, 0, gocv.InterpolationDefault)
 		return visionFrame, uint(newWidth), uint(newHeight)
 	}
 	return visionFrame, uint(width), uint(height)
 }
 func prepareDetectFrame(visionFrame gocv.Mat, faceDetectorSize Resolution) []float32 {
 	faceDetectorWidth := int(faceDetectorSize.Width)
 	faceDetectorHeight := int(faceDetectorSize.Height)
 	detectVisionFrame := gocv.NewMatWithSize(faceDetectorHeight, faceDetectorWidth, gocv.MatTypeCV8UC3)
 	defer detectVisionFrame.Close()
 	roi := detectVisionFrame.Region(image.Rect(0, 0, visionFrame.Cols(), visionFrame.Rows()))
 	defer roi.Close()
 	visionFrame.CopyTo(&roi)
 	output := make([]float32, 3*faceDetectorHeight*faceDetectorWidth)
 	idx := 0
 	for y := 0; y < faceDetectorHeight; y++ {
 		for x := 0; x < faceDetectorWidth; x++ {
 			pixel := detectVisionFrame.GetVecbAt(y, x)
 			output[idx] = (float32(pixel[0]) - 127.5) / 128.0
 			output[faceDetectorHeight*faceDetectorWidth+idx] = (float32(pixel[1]) - 127.5) / 128.0
 			output[2*faceDetectorHeight*faceDetectorWidth+idx] = (float32(pixel[2]) - 127.5) / 128.0
 			idx++
 		}
 	}
 	return output
 }
@@ -0,0 +1,46 @@
 package yoloface
 import (
 	"github.com/dev6699/face/model"
 	"gocv.io/x/gocv"
 )
 type Model struct {
 	faceDetectorScore float32
 	iouThreshold      float64
 	ratioHeight       float32
 	ratioWidth        float32
 }
 type Input struct {
 	Img gocv.Mat
 }
 type Output struct {
 	Detections []Detection
 }
 type ModelT = model.Model[*Input, *Output]
 var _ ModelT = &Model{}
 func NewFactory(faceDetectorScore float32, iouThreshold float64) func() ModelT {
 	return func() ModelT {
 		return New(faceDetectorScore, iouThreshold)
 	}
 }
 func New(faceDetectorScore float32, iouThreshold float64) *Model {
 	return &Model{
 		faceDetectorScore: faceDetectorScore,
 		iouThreshold:      iouThreshold,
 	}
 }
 func (m *Model) ModelName() string {
 	return "yoloface"
 }
 func (m *Model) ModelVersion() string {
 	return "1"
 }
@@ -0,0 +1,2 @@
 name: "yoloface"
 platform: "onnxruntime_onnx"
		`@@ -0,0 +1,2 @@`
							`name: "yoloface"`
							`platform: "onnxruntime_onnx"`