From 810921d4486e38dfd3054309a3f5b1905340cddf Mon Sep 17 00:00:00 2001 From: Atsushi Watanabe Date: Mon, 10 Nov 2025 09:26:21 +0900 Subject: [PATCH] Add SVT-AV1 codec (#660) --- .github/workflows/ci.yaml | 6 +- README.md | 8 ++ pkg/codec/svtav1/bridge.c | 121 +++++++++++++++++++++++ pkg/codec/svtav1/bridge.h | 28 ++++++ pkg/codec/svtav1/errors.go | 35 +++++++ pkg/codec/svtav1/params.go | 47 +++++++++ pkg/codec/svtav1/svtav1.go | 165 ++++++++++++++++++++++++++++++++ pkg/codec/svtav1/svtav1_test.go | 146 ++++++++++++++++++++++++++++ 8 files changed, 554 insertions(+), 2 deletions(-) create mode 100644 pkg/codec/svtav1/bridge.c create mode 100644 pkg/codec/svtav1/bridge.h create mode 100644 pkg/codec/svtav1/errors.go create mode 100644 pkg/codec/svtav1/params.go create mode 100644 pkg/codec/svtav1/svtav1.go create mode 100644 pkg/codec/svtav1/svtav1_test.go diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c49c0d7..5b246d9 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -27,6 +27,7 @@ jobs: sudo apt-get update -qq \ && sudo apt-get install --no-install-recommends -y \ libopus-dev \ + libsvtav1enc-dev \ libva-dev \ libvpx-dev \ libx11-dev \ @@ -55,9 +56,10 @@ jobs: run: | which brew brew install \ - pkg-config \ - opus \ libvpx \ + opus \ + pkg-config \ + svt-av1 \ x264 - name: Run Test Suite run: make test diff --git a/README.md b/README.md index 99f92ce..1f1daac 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,14 @@ A codec library which supports H.264 encoding and decoding. It is suitable for u * Package: [github.com/pion/mediadevices/pkg/codec/openh264](https://pkg.go.dev/github.com/pion/mediadevices/pkg/codec/openh264) * Installation: no installation needed, included as a static binary +##### svtav1 +A free software video codec library from the Alliance for Open Media that implements AV1 video coding formats. + +* Package: [github.com/pion/mediadevices/pkg/codec/svtav1](https://pkg.go.dev/github.com/pion/mediadevices/pkg/codec/svtav1) +* Installation: + * Mac: `brew install svt-av1` + * Ubuntu: `apt install libsvtav1enc-dev` + ##### vpx A free software video codec library from Google and the Alliance for Open Media that implements VP8/VP9 video coding formats. diff --git a/pkg/codec/svtav1/bridge.c b/pkg/codec/svtav1/bridge.c new file mode 100644 index 0000000..f76ae82 --- /dev/null +++ b/pkg/codec/svtav1/bridge.c @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include + +#include "bridge.h" + +int enc_new(Encoder **e) { + *e = malloc(sizeof(Encoder)); + (*e)->param = malloc(sizeof(EbSvtAv1EncConfiguration)); + (*e)->in_buf = malloc(sizeof(EbBufferHeaderType)); + + memset((*e)->in_buf, 0, sizeof(EbBufferHeaderType)); + (*e)->in_buf->p_buffer = malloc(sizeof(EbSvtIOFormat)); + (*e)->in_buf->size = sizeof(EbBufferHeaderType); + + (*e)->handle = NULL; + +#if SVT_AV1_CHECK_VERSION(3, 0, 0) + const EbErrorType sret = svt_av1_enc_init_handle(&(*e)->handle, (*e)->param); +#else + const EbErrorType sret = svt_av1_enc_init_handle(&(*e)->handle, NULL, (*e)->param); +#endif + if (sret != EB_ErrorNone) { + enc_free(*e); + return ERR_INIT_ENC_HANDLER; + } + + return 0; +} + +int enc_free(Encoder *e) { + if (e->handle != NULL) { + svt_av1_enc_deinit(e->handle); + svt_av1_enc_deinit_handle(e->handle); + } + free(e->in_buf->p_buffer); + free(e->in_buf); + free(e->param); + free(e); + + return 0; +} + +int enc_init(Encoder *e) { + EbErrorType sret; + + e->param->encoder_bit_depth = 8; + e->param->encoder_color_format = EB_YUV420; + + sret = svt_av1_enc_set_parameter(e->handle, e->param); + if (sret != EB_ErrorNone) { + return ERR_SET_ENC_PARAM; + } + + sret = svt_av1_enc_init(e->handle); + if (sret != EB_ErrorNone) { + return ERR_ENC_INIT; + } + + return 0; +} + +int enc_apply_param(Encoder *e) { + const EbErrorType sret = svt_av1_enc_set_parameter(e->handle, e->param); + if (sret != EB_ErrorNone) { + return ERR_SET_ENC_PARAM; + } + + return 0; +} + +int enc_force_keyframe(Encoder *e) { + e->force_keyframe = true; + return 0; +} + +int enc_send_frame(Encoder *e, uint8_t *y, uint8_t *cb, uint8_t *cr, int ystride, int cstride) { + EbSvtIOFormat *in_data = (EbSvtIOFormat *)e->in_buf->p_buffer; + in_data->luma = y; + in_data->cb = cb; + in_data->cr = cr; + in_data->y_stride = ystride; + in_data->cb_stride = cstride; + in_data->cr_stride = cstride; + + e->in_buf->pic_type = EB_AV1_INVALID_PICTURE; // auto + if (e->force_keyframe) { + e->in_buf->pic_type = EB_AV1_KEY_PICTURE; + e->force_keyframe = false; + } + e->in_buf->flags = 0; + e->in_buf->pts++; + e->in_buf->n_filled_len = ystride * e->param->source_height; + e->in_buf->n_filled_len += 2 * cstride * e->param->source_height / 2; + + const EbErrorType sret = svt_av1_enc_send_picture(e->handle, e->in_buf); + if (sret != EB_ErrorNone) { + return ERR_SEND_PICTURE; + } + return 0; +} + +int enc_get_packet(Encoder *e, EbBufferHeaderType **out) { + const EbErrorType sret = svt_av1_enc_get_packet(e->handle, out, 0); + if (sret == EB_NoErrorEmptyQueue) { + return 0; + } + if (sret != EB_ErrorNone) { + return ERR_GET_PACKET; + } + + return 0; +} + +void memcpy_uint8(uint8_t *dst, const uint8_t *src, size_t n) { + // Just make CGO types compatible + memcpy(dst, src, n); +} diff --git a/pkg/codec/svtav1/bridge.h b/pkg/codec/svtav1/bridge.h new file mode 100644 index 0000000..3be2441 --- /dev/null +++ b/pkg/codec/svtav1/bridge.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include + +#define ERR_INIT_ENC_HANDLER 1 +#define ERR_SET_ENC_PARAM 2 +#define ERR_ENC_INIT 3 +#define ERR_SEND_PICTURE 4 +#define ERR_GET_PACKET 5 + +typedef struct Encoder { + EbSvtAv1EncConfiguration *param; + EbComponentType *handle; + EbBufferHeaderType *in_buf; + + bool force_keyframe; +} Encoder; + +int enc_free(Encoder *e); +int enc_new(Encoder **e); +int enc_init(Encoder *e); +int enc_apply_param(Encoder *e); +int enc_force_keyframe(Encoder *e); +int enc_send_frame(Encoder *e, uint8_t *y, uint8_t *cb, uint8_t *cr, int ystride, int cstride); +int enc_get_packet(Encoder *e, EbBufferHeaderType **out); +void memcpy_uint8(uint8_t *dst, const uint8_t *src, size_t n); diff --git a/pkg/codec/svtav1/errors.go b/pkg/codec/svtav1/errors.go new file mode 100644 index 0000000..75a33a3 --- /dev/null +++ b/pkg/codec/svtav1/errors.go @@ -0,0 +1,35 @@ +package svtav1 + +import "errors" + +// #cgo pkg-config: SvtAv1Enc +// #include "bridge.h" +import "C" + +var ( + ErrUnknownErrorCode = errors.New("unknown error code") + ErrInitEncHandler = errors.New("failed to initialize encoder handler") + ErrSetEncParam = errors.New("failed to set encoder parameters") + ErrEncInit = errors.New("failed to initialize encoder") + ErrSendPicture = errors.New("failed to send picture") + ErrGetPacket = errors.New("failed to get packet") +) + +func errFromC(ret C.int) error { + switch ret { + case 0: + return nil + case C.ERR_INIT_ENC_HANDLER: + return ErrInitEncHandler + case C.ERR_SET_ENC_PARAM: + return ErrSetEncParam + case C.ERR_ENC_INIT: + return ErrEncInit + case C.ERR_SEND_PICTURE: + return ErrSendPicture + case C.ERR_GET_PACKET: + return ErrGetPacket + default: + return ErrUnknownErrorCode + } +} diff --git a/pkg/codec/svtav1/params.go b/pkg/codec/svtav1/params.go new file mode 100644 index 0000000..9835e07 --- /dev/null +++ b/pkg/codec/svtav1/params.go @@ -0,0 +1,47 @@ +package svtav1 + +import ( + "time" + + "github.com/pion/mediadevices/pkg/codec" + "github.com/pion/mediadevices/pkg/io/video" + "github.com/pion/mediadevices/pkg/prop" +) + +// Params stores SVT-AV1 specific encoding parameters. +type Params struct { + codec.BaseParams + + // Preset configuration number of SVT-AV1 + // 1-3: extremely high efficiency but heavy + // 4-6: a balance of efficiency and reasonable compute time + // 7-13: real-time encoding + Preset int + + StartingBufferLevel time.Duration + OptimalBufferLevel time.Duration + MaximumBufferSize time.Duration +} + +// NewParams returns default SVT-AV1 codec specific parameters. +func NewParams() (Params, error) { + return Params{ + BaseParams: codec.BaseParams{ + KeyFrameInterval: 60, + }, + Preset: 9, + StartingBufferLevel: 400 * time.Millisecond, + OptimalBufferLevel: 200 * time.Millisecond, + MaximumBufferSize: 500 * time.Millisecond, + }, nil +} + +// RTPCodec represents the codec metadata +func (p *Params) RTPCodec() *codec.RTPCodec { + return codec.NewRTPAV1Codec(90000) +} + +// BuildVideoEncoder builds SVT-AV1 encoder with given params +func (p *Params) BuildVideoEncoder(r video.Reader, property prop.Media) (codec.ReadCloser, error) { + return newEncoder(r, property, *p) +} diff --git a/pkg/codec/svtav1/svtav1.go b/pkg/codec/svtav1/svtav1.go new file mode 100644 index 0000000..5e08ce6 --- /dev/null +++ b/pkg/codec/svtav1/svtav1.go @@ -0,0 +1,165 @@ +// Package svtav1 implements AV1 encoder. +// This package requires libSvtAv1Enc headers and libraries to be built. +package svtav1 + +// #cgo pkg-config: SvtAv1Enc +// #include "bridge.h" +import "C" + +import ( + "image" + "io" + "sync" + + "github.com/pion/mediadevices/pkg/codec" + "github.com/pion/mediadevices/pkg/io/video" + "github.com/pion/mediadevices/pkg/prop" +) + +type encoder struct { + engine *C.Encoder + r video.Reader + mu sync.Mutex + closed bool + + outPool sync.Pool +} + +func newEncoder(r video.Reader, p prop.Media, params Params) (codec.ReadCloser, error) { + var enc *C.Encoder + + if p.FrameRate == 0 { + p.FrameRate = 30 + } + + if err := errFromC(C.enc_new(&enc)); err != nil { + return nil, err + } + enc.param.source_width = C.uint32_t(p.Width) + enc.param.source_height = C.uint32_t(p.Height) + enc.param.profile = C.MAIN_PROFILE + enc.param.enc_mode = C.int8_t(params.Preset) + enc.param.rate_control_mode = C.SVT_AV1_RC_MODE_CBR + enc.param.pred_structure = C.SVT_AV1_PRED_LOW_DELAY_B + enc.param.target_bit_rate = C.uint32_t(params.BitRate) + enc.param.frame_rate_numerator = C.uint32_t(p.FrameRate * 1000) + enc.param.frame_rate_denominator = 1000 + enc.param.intra_refresh_type = C.SVT_AV1_KF_REFRESH + enc.param.intra_period_length = C.int32_t(params.KeyFrameInterval) + enc.param.starting_buffer_level_ms = C.int64_t(params.StartingBufferLevel.Milliseconds()) + enc.param.optimal_buffer_level_ms = C.int64_t(params.OptimalBufferLevel.Milliseconds()) + enc.param.maximum_buffer_size_ms = C.int64_t(params.MaximumBufferSize.Milliseconds()) + + if err := errFromC(C.enc_init(enc)); err != nil { + _ = C.enc_free(enc) + return nil, err + } + + e := encoder{ + engine: enc, + r: video.ToI420(r), + outPool: sync.Pool{ + New: func() any { + return []byte(nil) + }, + }, + } + return &e, nil +} + +func (e *encoder) Read() ([]byte, func(), error) { + e.mu.Lock() + defer e.mu.Unlock() + + if e.closed { + return nil, func() {}, io.EOF + } + + for { + img, release, err := e.r.Read() + if err != nil { + return nil, func() {}, err + } + defer release() + yuvImg := img.(*image.YCbCr) + + if err := errFromC(C.enc_send_frame( + e.engine, + (*C.uchar)(&yuvImg.Y[0]), + (*C.uchar)(&yuvImg.Cb[0]), + (*C.uchar)(&yuvImg.Cr[0]), + C.int(yuvImg.YStride), + C.int(yuvImg.CStride), + )); err != nil { + return nil, func() {}, err + } + + var buf *C.EbBufferHeaderType + if err := errFromC(C.enc_get_packet(e.engine, &buf)); err != nil { + return nil, func() {}, err + } + if buf == nil { + // Feed frames until receiving a packet + continue + } + + n := int(buf.n_filled_len) + outBuf := e.outPool.Get().([]byte) + if cap(outBuf) < n { + outBuf = make([]byte, n) + } else { + outBuf = outBuf[:n] + } + + C.memcpy_uint8((*C.uchar)(&outBuf[0]), buf.p_buffer, C.size_t(n)) + C.svt_av1_enc_release_out_buffer(&buf) + + return outBuf, func() { + e.outPool.Put(outBuf) + }, err + } +} + +func (e *encoder) ForceKeyFrame() error { + e.mu.Lock() + defer e.mu.Unlock() + + if err := errFromC(C.enc_force_keyframe(e.engine)); err != nil { + return err + } + + return nil +} + +func (e *encoder) SetBitRate(bitrate int) error { + e.mu.Lock() + defer e.mu.Unlock() + + e.engine.param.target_bit_rate = C.uint32_t(bitrate) + + if err := errFromC(C.enc_apply_param(e.engine)); err != nil { + return err + } + + return nil +} + +func (e *encoder) Controller() codec.EncoderController { + return e +} + +func (e *encoder) Close() error { + e.mu.Lock() + defer e.mu.Unlock() + + if e.closed { + return nil + } + + if err := errFromC(C.enc_free(e.engine)); err != nil { + return err + } + + e.closed = true + return nil +} diff --git a/pkg/codec/svtav1/svtav1_test.go b/pkg/codec/svtav1/svtav1_test.go new file mode 100644 index 0000000..2254e1a --- /dev/null +++ b/pkg/codec/svtav1/svtav1_test.go @@ -0,0 +1,146 @@ +package svtav1 + +import ( + "image" + "testing" + + "github.com/pion/mediadevices/pkg/codec" + "github.com/pion/mediadevices/pkg/codec/internal/codectest" + "github.com/pion/mediadevices/pkg/frame" + "github.com/pion/mediadevices/pkg/io/video" + "github.com/pion/mediadevices/pkg/prop" +) + +func getTestVideoEncoder() (codec.ReadCloser, error) { + p, err := NewParams() + if err != nil { + return nil, err + } + p.BitRate = 200000 + enc, err := p.BuildVideoEncoder(video.ReaderFunc(func() (image.Image, func(), error) { + return image.NewYCbCr( + image.Rect(0, 0, 256, 144), + image.YCbCrSubsampleRatio420, + ), nil, nil + }), prop.Media{ + Video: prop.Video{ + Width: 256, + Height: 144, + FrameFormat: frame.FormatI420, + }, + }) + if err != nil { + return nil, err + } + return enc, nil +} + +func TestEncoder(t *testing.T) { + t.Run("SimpleRead", func(t *testing.T) { + p, err := NewParams() + if err != nil { + t.Fatal(err) + } + p.BitRate = 200000 + codectest.VideoEncoderSimpleReadTest(t, &p, + prop.Media{ + Video: prop.Video{ + Width: 256, + Height: 144, + FrameFormat: frame.FormatI420, + }, + }, + image.NewYCbCr( + image.Rect(0, 0, 256, 144), + image.YCbCrSubsampleRatio420, + ), + ) + }) + t.Run("CloseTwice", func(t *testing.T) { + p, err := NewParams() + if err != nil { + t.Fatal(err) + } + p.BitRate = 200000 + codectest.VideoEncoderCloseTwiceTest(t, &p, prop.Media{ + Video: prop.Video{ + Width: 640, + Height: 480, + FrameRate: 30, + FrameFormat: frame.FormatI420, + }, + }) + }) + t.Run("ReadAfterClose", func(t *testing.T) { + p, err := NewParams() + if err != nil { + t.Fatal(err) + } + p.BitRate = 200000 + codectest.VideoEncoderReadAfterCloseTest(t, &p, + prop.Media{ + Video: prop.Video{ + Width: 256, + Height: 144, + FrameFormat: frame.FormatI420, + }, + }, + image.NewYCbCr( + image.Rect(0, 0, 256, 144), + image.YCbCrSubsampleRatio420, + ), + ) + }) +} + +func TestShouldImplementKeyFrameControl(t *testing.T) { + e := &encoder{} + if _, ok := e.Controller().(codec.KeyFrameController); !ok { + t.Error() + } +} + +func TestNoErrorOnForceKeyFrame(t *testing.T) { + enc, err := getTestVideoEncoder() + if err != nil { + t.Fatal(err) + } + kfc, ok := enc.Controller().(codec.KeyFrameController) + if !ok { + t.Fatal("Failed to get KeyFrameController") + } + if err := kfc.ForceKeyFrame(); err != nil { + t.Error(err) + } + _, rel, err := enc.Read() // try to read the encoded frame + rel() + if err != nil { + t.Fatal(err) + } +} + +func TestShouldImplementBitRateControl(t *testing.T) { + e := &encoder{} + if _, ok := e.Controller().(codec.BitRateController); !ok { + t.Error() + } +} + +func TestNoErrorOnSetBitRate(t *testing.T) { + enc, err := getTestVideoEncoder() + if err != nil { + t.Fatal(err) + } + brc, ok := enc.Controller().(codec.BitRateController) + if !ok { + t.Fatal("Failed to get BitRateController") + } + if err := brc.SetBitRate(1000); err != nil { // 1000 bit/second is ridiculously low, but this is a testcase. + t.Error(err) + } + _, rel, err := enc.Read() // try to read the encoded frame + rel() + if err != nil { + t.Fatal(err) + } +}