From d8f14db1ef2cdf72ba54d533fbdbc95b291fa6c3 Mon Sep 17 00:00:00 2001 From: sean yu <55464069+hexbabe@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:40:45 +0900 Subject: [PATCH] (1/3) Add NV12 frame format support for windows (#682) * Add NV12 support * Fix camera resolution listing by supporting FORMAT_VideoInfo2 in addition to FORMAT_VideoInfo. This change allows for better compatibility with various video formats by correctly retrieving width, height, and compression details from both format types (NV12 and YUY2) * Add support for configuring video capture pin format in open method * Import wmcodecdsp and try to use their nv12 const * Remove ifndef --- pkg/driver/camera/camera_windows.cpp | 152 +++++++++++++++++++++------ pkg/driver/camera/camera_windows.go | 60 ++++++++--- pkg/driver/camera/camera_windows.hpp | 1 + 3 files changed, 163 insertions(+), 50 deletions(-) diff --git a/pkg/driver/camera/camera_windows.cpp b/pkg/driver/camera/camera_windows.cpp index b7346f3..5520cd5 100644 --- a/pkg/driver/camera/camera_windows.cpp +++ b/pkg/driver/camera/camera_windows.cpp @@ -2,12 +2,26 @@ #include #include +#include #include #include #include "camera_windows.hpp" #include "_cgo_export.h" +static const uint32_t FOURCC_NV12 = 0x3231564E; // 'NV12' +static const uint32_t FOURCC_YUY2 = 0x32595559; // 'YUY2' + +// freeMediaType frees an AM_MEDIA_TYPE* allocated by GetStreamCaps. +static void freeMediaType(AM_MEDIA_TYPE* mt) +{ + if (mt->cbFormat != 0) + CoTaskMemFree(mt->pbFormat); + if (mt->pUnk != nullptr) + mt->pUnk->Release(); + CoTaskMemFree(mt); +} + imageProp* getProp(camera* cam, int i) { @@ -232,14 +246,26 @@ int listResolution(camera* cam, const char** errstr) continue; if (mediaType->majortype != MEDIATYPE_Video || - mediaType->formattype != FORMAT_VideoInfo || mediaType->pbFormat == nullptr) continue; - VIDEOINFOHEADER* videoInfoHdr = (VIDEOINFOHEADER*)mediaType->pbFormat; - cam->props[iProp].width = videoInfoHdr->bmiHeader.biWidth; - cam->props[iProp].height = videoInfoHdr->bmiHeader.biHeight; - cam->props[iProp].fcc = videoInfoHdr->bmiHeader.biCompression; + BITMAPINFOHEADER* bmi = nullptr; + if (mediaType->formattype == FORMAT_VideoInfo) + { + bmi = &((VIDEOINFOHEADER*)mediaType->pbFormat)->bmiHeader; + } + else if (mediaType->formattype == FORMAT_VideoInfo2) + { + bmi = &((VIDEOINFOHEADER2*)mediaType->pbFormat)->bmiHeader; + } + else + { + continue; + } + + cam->props[iProp].width = bmi->biWidth; + cam->props[iProp].height = bmi->biHeight; + cam->props[iProp].fcc = bmi->biCompression; iProp++; } cam->numProps = iProp; @@ -307,6 +333,55 @@ int openCamera(camera* cam, const char** errstr) goto fail; } + // Configure the capture pin format via IAMStreamConfig so the pin + // negotiation succeeds for both FORMAT_VideoInfo and FORMAT_VideoInfo2. + { + IPin* capturePin = getPin(captureFilter, PINDIR_OUTPUT); + if (capturePin != nullptr) + { + IAMStreamConfig* streamConfig = nullptr; + if (SUCCEEDED(capturePin->QueryInterface(IID_IAMStreamConfig, (void**)&streamConfig))) + { + int count = 0, size = 0; + if (SUCCEEDED(streamConfig->GetNumberOfCapabilities(&count, &size))) + { + for (int i = 0; i < count; ++i) + { + VIDEO_STREAM_CONFIG_CAPS caps; + AM_MEDIA_TYPE* mt = nullptr; + if (FAILED(streamConfig->GetStreamCaps(i, &mt, (BYTE*)&caps))) + continue; + + if (mt->majortype != MEDIATYPE_Video || mt->pbFormat == nullptr) + { + freeMediaType(mt); + continue; + } + + BITMAPINFOHEADER* bmi = nullptr; + if (mt->formattype == FORMAT_VideoInfo) + bmi = &((VIDEOINFOHEADER*)mt->pbFormat)->bmiHeader; + else if (mt->formattype == FORMAT_VideoInfo2) + bmi = &((VIDEOINFOHEADER2*)mt->pbFormat)->bmiHeader; + + if (bmi != nullptr && + bmi->biWidth == cam->width && + bmi->biHeight == cam->height && + bmi->biCompression == cam->fcc) + { + streamConfig->SetFormat(mt); + freeMediaType(mt); + break; + } + freeMediaType(mt); + } + } + safeRelease(&streamConfig); + } + safeRelease(&capturePin); + } + } + if (FAILED(CoCreateInstance( CLSID_SampleGrabber, nullptr, CLSCTX_INPROC, IID_IBaseFilter, (void**)&grabberFilter))) @@ -325,20 +400,11 @@ int openCamera(camera* cam, const char** errstr) AM_MEDIA_TYPE mediaType; memset(&mediaType, 0, sizeof(mediaType)); mediaType.majortype = MEDIATYPE_Video; - mediaType.subtype = MEDIASUBTYPE_YUY2; - mediaType.formattype = FORMAT_VideoInfo; - mediaType.bFixedSizeSamples = 1; - mediaType.cbFormat = sizeof(VIDEOINFOHEADER); - - VIDEOINFOHEADER videoInfoHdr; - memset(&videoInfoHdr, 0, sizeof(VIDEOINFOHEADER)); - videoInfoHdr.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); - videoInfoHdr.bmiHeader.biWidth = cam->width; - videoInfoHdr.bmiHeader.biHeight = cam->height; - videoInfoHdr.bmiHeader.biPlanes = 1; - videoInfoHdr.bmiHeader.biBitCount = 16; - videoInfoHdr.bmiHeader.biCompression = MAKEFOURCC('Y', 'U', 'Y', '2'); - mediaType.pbFormat = (BYTE*)&videoInfoHdr; + if (cam->fcc == FOURCC_NV12) + mediaType.subtype = MEDIASUBTYPE_NV12; + else + mediaType.subtype = MEDIASUBTYPE_YUY2; + // formattype left as GUID_NULL (wildcard) - accepts both VideoInfo and VideoInfo2 if (FAILED(grabber->SetMediaType(&mediaType))) { *errstr = errGrabber; @@ -440,23 +506,41 @@ HRESULT SampleGrabberCallback::BufferCB(double sampleTime, BYTE* buf, LONG len) fprintf(stderr, "Wrong frame buffer size: %d > %d\n", len, nPix * 2); return S_OK; } - int yi = 0; - int cbi = cam_->width * cam_->height; - int cri = cbi + cbi / 2; - // Pack as I422 - for (int y = 0; y < cam_->height; ++y) + + if (cam_->fcc == FOURCC_NV12) { - int j = y * cam_->width * 2; - for (int x = 0; x < cam_->width / 2; ++x) + // NV12: Y plane (nPix bytes) + interleaved UV plane (nPix/2 bytes). + // Convert to I420 planar: Y + U + V separate planes. + memcpy(gobuf, buf, nPix); + BYTE* uv = buf + nPix; + int ui = nPix; + int vi = nPix + nPix / 4; + for (int i = 0; i < nPix / 2; i += 2) { - gobuf[yi] = buf[j]; - gobuf[cbi] = buf[j + 1]; - gobuf[yi + 1] = buf[j + 2]; - gobuf[cri] = buf[j + 3]; - j += 4; - yi += 2; - cbi++; - cri++; + gobuf[ui++] = uv[i]; + gobuf[vi++] = uv[i + 1]; + } + } + else + { + // YUY2: packed YUYV. Convert to I422 planar. + int yi = 0; + int cbi = nPix; + int cri = cbi + cbi / 2; + for (int y = 0; y < cam_->height; ++y) + { + int j = y * cam_->width * 2; + for (int x = 0; x < cam_->width / 2; ++x) + { + gobuf[yi] = buf[j]; + gobuf[cbi] = buf[j + 1]; + gobuf[yi + 1] = buf[j + 2]; + gobuf[cri] = buf[j + 3]; + j += 4; + yi += 2; + cbi++; + cri++; + } } } diff --git a/pkg/driver/camera/camera_windows.go b/pkg/driver/camera/camera_windows.go index 045a465..0038b9c 100644 --- a/pkg/driver/camera/camera_windows.go +++ b/pkg/driver/camera/camera_windows.go @@ -82,6 +82,7 @@ func (c *camera) Open() error { var errStr *C.char if C.listResolution(c.cam, &errStr) != 0 { + C.free(unsafe.Pointer(c.cam.name)) return fmt.Errorf("failed to open device: %s", C.GoString(errStr)) } @@ -120,10 +121,18 @@ func (c *camera) Close() error { func (c *camera) VideoRecord(p prop.Media) (video.Reader, error) { nPix := p.Width * p.Height - c.buf = make([]byte, nPix*2) // for YUY2 + c.buf = make([]byte, nPix*2) c.bufGo = make([]byte, nPix*2) c.cam.width = C.int(p.Width) c.cam.height = C.int(p.Height) + + switch p.FrameFormat { + case frame.FormatNV12: + c.cam.fcc = fourccNV12 + default: + c.cam.fcc = fourccYUY2 + } + c.cam.buf = C.size_t(uintptr(unsafe.Pointer(&c.buf[0]))) var errStr *C.char @@ -142,12 +151,24 @@ func (c *camera) VideoRecord(p prop.Media) (video.Reader, error) { if !ok { return nil, func() {}, io.EOF } - img.Y = b[:nPix] - img.Cb = b[nPix : nPix+nPix/2] - img.Cr = b[nPix+nPix/2 : nPix*2] - img.YStride = p.Width - img.CStride = p.Width / 2 - img.SubsampleRatio = image.YCbCrSubsampleRatio422 + + if p.FrameFormat == frame.FormatNV12 { + // I420: Y plane (nPix) + U plane (nPix/4) + V plane (nPix/4) + img.Y = b[:nPix] + img.Cb = b[nPix : nPix+nPix/4] + img.Cr = b[nPix+nPix/4 : nPix+nPix/2] + img.YStride = p.Width + img.CStride = p.Width / 2 + img.SubsampleRatio = image.YCbCrSubsampleRatio420 + } else { // YUY2 + // I422: Y plane (nPix) + Cb plane (nPix/2) + Cr plane (nPix/2) + img.Y = b[:nPix] + img.Cb = b[nPix : nPix+nPix/2] + img.Cr = b[nPix+nPix/2 : nPix*2] + img.YStride = p.Width + img.CStride = p.Width / 2 + img.SubsampleRatio = image.YCbCrSubsampleRatio422 + } img.Rect = image.Rect(0, 0, p.Width, p.Height) return img, func() {}, nil }) @@ -158,20 +179,27 @@ func (c *camera) Properties() []prop.Media { properties := []prop.Media{} for i := 0; i < int(c.cam.numProps); i++ { p := C.getProp(c.cam, C.int(i)) - // TODO: support other FOURCC - if p.fcc == fourccYUY2 { - properties = append(properties, prop.Media{ - Video: prop.Video{ - Width: int(p.width), - Height: int(p.height), - FrameFormat: frame.FormatYUY2, - }, - }) + var fmt frame.Format + switch p.fcc { + case fourccYUY2: + fmt = frame.FormatYUY2 + case fourccNV12: + fmt = frame.FormatNV12 + default: + continue } + properties = append(properties, prop.Media{ + Video: prop.Video{ + Width: int(p.width), + Height: int(p.height), + FrameFormat: fmt, + }, + }) } return properties } const ( fourccYUY2 = 0x32595559 + fourccNV12 = 0x3231564E ) diff --git a/pkg/driver/camera/camera_windows.hpp b/pkg/driver/camera/camera_windows.hpp index 55669e6..06a21f2 100644 --- a/pkg/driver/camera/camera_windows.hpp +++ b/pkg/driver/camera/camera_windows.hpp @@ -17,6 +17,7 @@ typedef struct { int width; int height; + uint32_t fcc; size_t buf; // uintptr char* name;