Files
lpms/ffmpeg/ffmpeg.go
T
2026-03-10 01:18:36 +00:00

1142 lines
33 KiB
Go
Executable File

package ffmpeg
import (
"bytes"
"encoding/hex"
"errors"
"fmt"
"io"
"os"
"path"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
"unsafe"
"github.com/golang/glog"
"github.com/golang/protobuf/proto"
pb "github.com/livepeer/lpms/ffmpeg/proto"
)
// #cgo pkg-config: libavformat libavfilter libavcodec libavutil libswscale
// #include <stdlib.h>
// #include "transcoder.h"
// #include "extras.h"
// #include <libavutil/log.h>
import "C"
var ErrTranscoderRes = errors.New("TranscoderInvalidResolution")
var ErrTranscoderHw = errors.New("TranscoderInvalidHardware")
var ErrTranscoderInp = errors.New("TranscoderInvalidInput")
var ErrTranscoderClipConfig = errors.New("TranscoderInvalidClipConfig")
var ErrTranscoderVid = errors.New("TranscoderInvalidVideo")
var ErrTranscoderDuration = errors.New("TranscoderInvalidDuration")
var ErrTranscoderStp = errors.New("TranscoderStopped")
var ErrTranscoderFmt = errors.New("TranscoderUnrecognizedFormat")
var ErrTranscoderPrf = errors.New("TranscoderUnrecognizedProfile")
var ErrTranscoderGOP = errors.New("TranscoderInvalidGOP")
var ErrTranscoderDev = errors.New("TranscoderIncompatibleDevices")
var ErrEmptyData = errors.New("EmptyData")
var ErrSignCompare = errors.New("InvalidSignData")
var ErrTranscoderPixelformat = errors.New("TranscoderInvalidPixelformat")
var ErrVideoCompare = errors.New("InvalidVideoData")
// Switch to turn off logging transcoding errors, when doing test transcoding
var LogTranscodeErrors = true
type Acceleration int
const (
Software Acceleration = iota
Nvidia
Amd
Netint
)
var AccelerationNameLookup = map[Acceleration]string{
Software: "SW",
Nvidia: "Nvidia",
Amd: "Amd",
Netint: "Netint",
}
var FfEncoderLookup = map[Acceleration]map[VideoCodec]string{
Software: {
H264: "libx264",
H265: "libx265",
VP8: "libvpx",
VP9: "libvpx-vp9",
},
Nvidia: {
H264: "h264_nvenc",
H265: "hevc_nvenc",
},
Netint: {
H264: "h264_ni_enc",
H265: "h265_ni_enc",
},
}
type ComponentOptions struct {
Name string
Opts map[string]string
}
type Transcoder struct {
handle *C.struct_transcode_thread
stopped bool
started bool
lastacodec string
mu *sync.Mutex
}
type TranscodeOptionsIn struct {
Fname string
Accel Acceleration
Device string
Transmuxing bool
Profile VideoProfile
Demuxer ComponentOptions
}
type TranscodeOptions struct {
Oname string
Profile VideoProfile
Accel Acceleration
Device string
CalcSign bool
From time.Duration
To time.Duration
Muxer ComponentOptions
VideoEncoder ComponentOptions
AudioEncoder ComponentOptions
Metadata map[string]string
}
type MediaInfo struct {
Frames int
Pixels int64
}
type TranscodeResults struct {
Decoded MediaInfo
Encoded []MediaInfo
}
type PixelFormat struct {
RawValue int
}
const (
PixelFormatNone int = C.AV_PIX_FMT_NONE
PixelFormatYUV420P int = C.AV_PIX_FMT_YUV420P
PixelFormatYUYV422 int = C.AV_PIX_FMT_YUYV422
PixelFormatYUV422P int = C.AV_PIX_FMT_YUV422P
PixelFormatYUV444P int = C.AV_PIX_FMT_YUV444P
PixelFormatUYVY422 int = C.AV_PIX_FMT_UYVY422
PixelFormatNV12 int = C.AV_PIX_FMT_NV12
PixelFormatNV21 int = C.AV_PIX_FMT_NV21
PixelFormatYUV420P10BE int = C.AV_PIX_FMT_YUV420P10BE
PixelFormatYUV420P10LE int = C.AV_PIX_FMT_YUV420P10LE
PixelFormatYUV422P10BE int = C.AV_PIX_FMT_YUV422P10BE
PixelFormatYUV422P10LE int = C.AV_PIX_FMT_YUV422P10LE
PixelFormatYUV444P10BE int = C.AV_PIX_FMT_YUV444P10BE
PixelFormatYUV444P10LE int = C.AV_PIX_FMT_YUV444P10LE
PixelFormatYUV420P16LE int = C.AV_PIX_FMT_YUV420P16LE
PixelFormatYUV420P16BE int = C.AV_PIX_FMT_YUV420P16BE
PixelFormatYUV422P16LE int = C.AV_PIX_FMT_YUV422P16LE
PixelFormatYUV422P16BE int = C.AV_PIX_FMT_YUV422P16BE
PixelFormatYUV444P16LE int = C.AV_PIX_FMT_YUV444P16LE
PixelFormatYUV444P16BE int = C.AV_PIX_FMT_YUV444P16BE
PixelFormatYUV420P12BE int = C.AV_PIX_FMT_YUV420P12BE
PixelFormatYUV420P12LE int = C.AV_PIX_FMT_YUV420P12LE
PixelFormatYUV422P12BE int = C.AV_PIX_FMT_YUV422P12BE
PixelFormatYUV422P12LE int = C.AV_PIX_FMT_YUV422P12LE
PixelFormatYUV444P12BE int = C.AV_PIX_FMT_YUV444P12BE
PixelFormatYUV444P12LE int = C.AV_PIX_FMT_YUV444P12LE
)
// hold bit number minus 8; ColorDepthBits + 8 == bit number
type ColorDepthBits int
const (
ColorDepth8Bit ColorDepthBits = 0
ColorDepth10Bit ColorDepthBits = 2
ColorDepth12Bit ColorDepthBits = 4
ColorDepth16Bit ColorDepthBits = 8
)
type ChromaSubsampling int
const (
ChromaSubsampling420 ChromaSubsampling = iota
ChromaSubsampling422
ChromaSubsampling444
)
func (pixelFormat PixelFormat) Properties() (ChromaSubsampling, ColorDepthBits, error) {
switch pixelFormat.RawValue {
case C.AV_PIX_FMT_YUV420P:
return ChromaSubsampling420, ColorDepth8Bit, nil
case C.AV_PIX_FMT_YUYV422:
return ChromaSubsampling422, ColorDepth8Bit, nil
case C.AV_PIX_FMT_YUV422P:
return ChromaSubsampling422, ColorDepth8Bit, nil
case C.AV_PIX_FMT_YUV444P:
return ChromaSubsampling444, ColorDepth8Bit, nil
case C.AV_PIX_FMT_UYVY422:
return ChromaSubsampling422, ColorDepth8Bit, nil
case C.AV_PIX_FMT_NV12:
return ChromaSubsampling420, ColorDepth8Bit, nil
case C.AV_PIX_FMT_NV21:
return ChromaSubsampling420, ColorDepth8Bit, nil
case C.AV_PIX_FMT_YUV420P10BE:
return ChromaSubsampling420, ColorDepth10Bit, nil
case C.AV_PIX_FMT_YUV420P10LE:
return ChromaSubsampling420, ColorDepth10Bit, nil
case C.AV_PIX_FMT_YUV422P10BE:
return ChromaSubsampling422, ColorDepth10Bit, nil
case C.AV_PIX_FMT_YUV422P10LE:
return ChromaSubsampling422, ColorDepth10Bit, nil
case C.AV_PIX_FMT_YUV444P10BE:
return ChromaSubsampling444, ColorDepth10Bit, nil
case C.AV_PIX_FMT_YUV444P10LE:
return ChromaSubsampling444, ColorDepth10Bit, nil
case C.AV_PIX_FMT_YUV420P16LE:
return ChromaSubsampling420, ColorDepth16Bit, nil
case C.AV_PIX_FMT_YUV420P16BE:
return ChromaSubsampling420, ColorDepth16Bit, nil
case C.AV_PIX_FMT_YUV422P16LE:
return ChromaSubsampling422, ColorDepth16Bit, nil
case C.AV_PIX_FMT_YUV422P16BE:
return ChromaSubsampling422, ColorDepth16Bit, nil
case C.AV_PIX_FMT_YUV444P16LE:
return ChromaSubsampling444, ColorDepth16Bit, nil
case C.AV_PIX_FMT_YUV444P16BE:
return ChromaSubsampling444, ColorDepth16Bit, nil
case C.AV_PIX_FMT_YUV420P12BE:
return ChromaSubsampling420, ColorDepth12Bit, nil
case C.AV_PIX_FMT_YUV420P12LE:
return ChromaSubsampling420, ColorDepth12Bit, nil
case C.AV_PIX_FMT_YUV422P12BE:
return ChromaSubsampling422, ColorDepth12Bit, nil
case C.AV_PIX_FMT_YUV422P12LE:
return ChromaSubsampling422, ColorDepth12Bit, nil
case C.AV_PIX_FMT_YUV444P12BE:
return ChromaSubsampling444, ColorDepth12Bit, nil
case C.AV_PIX_FMT_YUV444P12LE:
return ChromaSubsampling444, ColorDepth12Bit, nil
default:
return ChromaSubsampling420, ColorDepth8Bit, ErrTranscoderPixelformat
}
}
type CodecStatus int
const (
CodecStatusInternalError CodecStatus = -1
CodecStatusOk CodecStatus = 0
CodecStatusNeedsBypass CodecStatus = 1
CodecStatusMissing CodecStatus = 2
)
type MediaFormatInfo struct {
Format string
Acodec, Vcodec string
PixFormat PixelFormat
Width, Height int
FPS float32
DurSecs int64
AudioBitrate int
}
func (f *MediaFormatInfo) ScaledHeight(width int) int {
return int(float32(width) * float32(f.Height) / float32(f.Width))
}
func (f *MediaFormatInfo) ScaledWidth(height int) int {
return int(float32(height) * float32(f.Width) / float32(f.Height))
}
func GetCodecInfo(fname string) (CodecStatus, MediaFormatInfo, error) {
format := MediaFormatInfo{}
cfname := C.CString(fname)
defer C.free(unsafe.Pointer(cfname))
fmtname := C.CString(strings.Repeat("0", 255))
acodec_c := C.CString(strings.Repeat("0", 255))
vcodec_c := C.CString(strings.Repeat("0", 255))
defer C.free(unsafe.Pointer(fmtname))
defer C.free(unsafe.Pointer(acodec_c))
defer C.free(unsafe.Pointer(vcodec_c))
var params_c C.codec_info
params_c.format_name = fmtname
params_c.video_codec = vcodec_c
params_c.audio_codec = acodec_c
params_c.pixel_format = C.AV_PIX_FMT_NONE
status := CodecStatus(C.lpms_get_codec_info(cfname, &params_c))
if C.strlen(fmtname) < 255 {
format.Format = C.GoString(fmtname)
}
if C.strlen(acodec_c) < 255 {
format.Acodec = C.GoString(acodec_c)
}
if C.strlen(vcodec_c) < 255 {
format.Vcodec = C.GoString(vcodec_c)
}
format.PixFormat = PixelFormat{int(params_c.pixel_format)}
format.Width = int(params_c.width)
format.Height = int(params_c.height)
format.FPS = float32(params_c.fps)
format.DurSecs = int64(params_c.dur)
format.AudioBitrate = int(params_c.audio_bit_rate)
return status, format, nil
}
// GetCodecInfo opens the segment and attempts to get video and audio codec names. Additionally, first return value
// indicates whether the segment has zero video frames
func GetCodecInfoBytes(data []byte) (CodecStatus, MediaFormatInfo, error) {
format := MediaFormatInfo{}
status := CodecStatusInternalError
or, ow, err := os.Pipe()
go func() {
br := bytes.NewReader(data)
io.Copy(ow, br)
ow.Close()
}()
if err != nil {
return status, format, ErrEmptyData
}
fname := fmt.Sprintf("pipe:%d", or.Fd())
status, format, err = GetCodecInfo(fname)
// estimate duration from bitrate and filesize for audio
// some formats do not have built-in track duration metadata,
// and pipes do not have a filesize on their own which breaks ffmpeg's own
// duration estimates. So do the estimation calculation ourselves
// NB : mpegts has the same problem but may contain video so let's not handle that
// some other formats, eg ogg, show zero bitrate
//
// ffmpeg estimation of duration from bitrate:
// https://github.com/FFmpeg/FFmpeg/blob/8280ec7a3213c9b7bad88aac3695be2dedd2c00b/libavformat/demux.c#L1798
if format.DurSecs == 0 && format.AudioBitrate > 0 && (format.Format == "mp3" || format.Format == "wav" || format.Format == "aac") {
format.DurSecs = int64(len(data) * 8 / format.AudioBitrate)
}
return status, format, err
}
// HasZeroVideoFrameBytes opens video and returns true if it has video stream with 0-frame
func HasZeroVideoFrameBytes(data []byte) (bool, error) {
if len(data) == 0 {
return false, ErrEmptyData
}
or, ow, err := os.Pipe()
if err != nil {
return false, err
}
fname := fmt.Sprintf("pipe:%d", or.Fd())
go func() {
br := bytes.NewReader(data)
io.Copy(ow, br)
ow.Close()
}()
status, _, err := GetCodecInfo(fname)
ow.Close()
return status == CodecStatusNeedsBypass, err
}
// compare two signature files whether those matches or not
func CompareSignatureByPath(fname1 string, fname2 string) (bool, error) {
if len(fname1) <= 0 || len(fname2) <= 0 {
return false, nil
}
cfpath1 := C.CString(fname1)
defer C.free(unsafe.Pointer(cfpath1))
cfpath2 := C.CString(fname2)
defer C.free(unsafe.Pointer(cfpath2))
res := int(C.lpms_compare_sign_bypath(cfpath1, cfpath2))
if res > 0 {
return true, nil
} else if res == 0 {
return false, nil
} else {
return false, ErrSignCompare
}
}
// compare two signature buffers whether those matches or not
func CompareSignatureByBuffer(data1 []byte, data2 []byte) (bool, error) {
pdata1 := unsafe.Pointer(&data1[0])
pdata2 := unsafe.Pointer(&data2[0])
res := int(C.lpms_compare_sign_bybuffer(pdata1, C.int(len(data1)), pdata2, C.int(len(data2))))
if res > 0 {
return true, nil
} else if res == 0 {
return false, nil
} else {
return false, ErrSignCompare
}
}
// compare two vidoe files whether those matches or not
func CompareVideoByPath(fname1 string, fname2 string) (bool, error) {
if len(fname1) <= 0 || len(fname2) <= 0 {
return false, nil
}
cfpath1 := C.CString(fname1)
defer C.free(unsafe.Pointer(cfpath1))
cfpath2 := C.CString(fname2)
defer C.free(unsafe.Pointer(cfpath2))
res := int(C.lpms_compare_video_bypath(cfpath1, cfpath2))
if res == 0 {
return true, nil
} else if res == 1 {
return false, nil
} else {
return false, ErrVideoCompare
}
}
// compare two video buffers whether those matches or not
func CompareVideoByBuffer(data1 []byte, data2 []byte) (bool, error) {
pdata1 := unsafe.Pointer(&data1[0])
pdata2 := unsafe.Pointer(&data2[0])
res := int(C.lpms_compare_video_bybuffer(pdata1, C.int(len(data1)), pdata2, C.int(len(data2))))
if res == 0 {
return true, nil
} else if res == 1 {
return false, nil
} else {
return false, ErrVideoCompare
}
}
func RTMPToHLS(localRTMPUrl string, outM3U8 string, tmpl string, seglen_secs string, seg_start int) error {
inp := C.CString(localRTMPUrl)
outp := C.CString(outM3U8)
ts_tmpl := C.CString(tmpl)
seglen := C.CString(seglen_secs)
segstart := C.CString(fmt.Sprintf("%v", seg_start))
ret := int(C.lpms_rtmp2hls(inp, outp, ts_tmpl, seglen, segstart))
C.free(unsafe.Pointer(inp))
C.free(unsafe.Pointer(outp))
C.free(unsafe.Pointer(ts_tmpl))
C.free(unsafe.Pointer(seglen))
C.free(unsafe.Pointer(segstart))
if ret != 0 {
glog.Infof("RTMP2HLS Transmux Return : %v\n", Strerror(ret))
return ErrorMap[ret]
}
return nil
}
func Transcode(input string, workDir string, ps []VideoProfile) error {
opts := make([]TranscodeOptions, len(ps))
for i, param := range ps {
oname := path.Join(workDir, fmt.Sprintf("out%v%v", i, filepath.Base(input)))
opt := TranscodeOptions{
Oname: oname,
Profile: param,
Accel: Software,
}
opts[i] = opt
}
inopts := &TranscodeOptionsIn{
Fname: input,
Accel: Software,
}
return Transcode2(inopts, opts)
}
func newAVOpts(opts map[string]string) *C.AVDictionary {
var dict *C.AVDictionary
for key, value := range opts {
k := C.CString(key)
v := C.CString(value)
defer C.free(unsafe.Pointer(k))
defer C.free(unsafe.Pointer(v))
C.av_dict_set(&dict, k, v, 0)
}
return dict
}
// return encoding specific options for the given accel
func configEncoder(inOpts *TranscodeOptionsIn, outOpts TranscodeOptions) (string, string, string, error) {
inDev := inOpts.Device
outDev := outOpts.Device
encoder := FfEncoderLookup[outOpts.Accel][outOpts.Profile.Encoder]
switch inOpts.Accel {
case Software:
switch outOpts.Accel {
case Software:
return encoder, "scale", "", nil
case Nvidia:
upload := "hwupload_cuda"
if outDev != "" {
upload = upload + "=device=" + outDev
}
return encoder, upload + "," + hwScale(), hwScaleAlgo(), nil
}
case Nvidia:
switch outOpts.Accel {
case Software:
return encoder, hwScale(), hwScaleAlgo(), nil
case Nvidia:
// If we encode on a different device from decode then need to transfer
if outDev != "" && outDev != inDev {
return "", "", "", ErrTranscoderDev // XXX not allowed
}
return encoder, hwScale(), hwScaleAlgo(), nil
}
case Netint:
switch outOpts.Accel {
case Software, Nvidia:
return "", "", "", ErrTranscoderDev // XXX don't allow mix-match between NETINT and sw/nv
case Netint:
// Use software scale filter
return encoder, "scale", "", nil
}
}
return "", "", "", ErrTranscoderHw
}
func accelDeviceType(accel Acceleration) (C.enum_AVHWDeviceType, error) {
switch accel {
case Software:
return C.AV_HWDEVICE_TYPE_NONE, nil
case Nvidia:
return C.AV_HWDEVICE_TYPE_CUDA, nil
case Netint:
return C.AV_HWDEVICE_TYPE_MEDIACODEC, nil
}
return C.AV_HWDEVICE_TYPE_NONE, ErrTranscoderHw
}
func Transcode2(input *TranscodeOptionsIn, ps []TranscodeOptions) error {
_, err := Transcode3(input, ps)
return err
}
func Transcode3(input *TranscodeOptionsIn, ps []TranscodeOptions) (*TranscodeResults, error) {
t := NewTranscoder()
defer t.StopTranscoder()
return t.Transcode(input, ps)
}
type CodingSizeLimit struct {
WidthMin, HeightMin int
WidthMax, HeightMax int
}
func clamp(val, min, max int) int {
if val <= min {
return min
}
if val >= max {
return max
}
return val
}
// 7th Gen NVENC limits:
var nvidiaCodecSizeLimts = map[VideoCodec]CodingSizeLimit{
H264: {146, 50, 4096, 4096},
H265: {132, 40, 8192, 8192},
}
func isAudioAllDrop(ps []TranscodeOptions) bool {
for _, p := range ps {
if p.AudioEncoder.Name != "drop" {
return false
}
}
return true
}
// create C output params array and return it along with corresponding finalizer
// function that makes sure there are no C memory leaks
func createCOutputParams(input *TranscodeOptionsIn, ps []TranscodeOptions) ([]C.output_params, func(), error) {
params := make([]C.output_params, len(ps))
finalizer := func() { destroyCOutputParams(params) }
for i, p := range ps {
param := p.Profile
w, h, err := VideoProfileResolution(param)
if err != nil {
if p.VideoEncoder.Name != "drop" && p.VideoEncoder.Name != "copy" {
return params, finalizer, err
}
}
br := strings.Replace(param.Bitrate, "k", "000", 1)
bitrate, err := strconv.Atoi(br)
if err != nil {
if p.VideoEncoder.Name != "drop" && p.VideoEncoder.Name != "copy" {
return params, finalizer, err
}
}
encoder, scale_filter := p.VideoEncoder.Name, "scale"
var interpAlgo string
if encoder == "" {
encoder, scale_filter, interpAlgo, err = configEncoder(input, p)
if err != nil {
return params, finalizer, err
}
}
// use these as common limits even for non-nvidia
limits := nvidiaCodecSizeLimts[param.Encoder]
w = clamp(w, limits.WidthMin, limits.WidthMax)
h = clamp(h, limits.HeightMin, limits.HeightMax)
/*
use the larger dimension requested from the transcode resolution
and proportionally rescale the smaller side of the input down
this does imply transposing the dimensions if necessary
if this causes the smaller side to rescale below the minimum
then set the smaller side to the minimum
and rescale the larger side instead
NB possibility that could also blow past the maximum
but the aspect ratio would have to be super skewed
TODO check for unsupportable aspect ratios in the C
*/
maxD := w
if h > w {
maxD = h
}
wExpr := fmt.Sprintf(`trunc(
if(gte(iw,ih),
if(gte(%d*ih/iw,%d),%d,-2),
if(gte(%d,%d*iw/ih),%d,-2)
)/2)*2`, maxD, limits.HeightMin, maxD, limits.WidthMin, maxD, limits.WidthMin)
hExpr := fmt.Sprintf(`trunc(
if(gt(ih,iw),
if(gte(%d*iw/ih,%d),%d,-2),
if(gte(%d,%d*ih/iw),%d,-2)
)/2)*2`, maxD, limits.WidthMin, maxD, limits.HeightMin, maxD, limits.HeightMin)
filters := fmt.Sprintf("%s='w=%s:h=%s'", scale_filter, wExpr, hExpr)
if interpAlgo != "" {
filters = fmt.Sprintf("%s:interp_algo=%s", filters, interpAlgo)
}
if input.Accel == Nvidia && p.Accel == Software {
// needed for hw dec -> hw rescale -> sw enc
filters = filters + ",hwdownload,format=nv12"
}
if p.Accel == Nvidia && filepath.Ext(input.Fname) == ".png" {
// If the input is PNG image(s) and we are scaling on a Nvidia device
// we need to first convert to a pixel format that the scale_npp filter supports
filters = "format=nv12," + filters
}
// set FPS denominator to 1 if unset by user
if param.FramerateDen == 0 {
param.FramerateDen = 1
}
// Add fps filter *after* scale filter because otherwise we could
// be scaling duplicate frames unnecessarily. This becomes a DoS vector
// when a user submits two frames that are "far apart" in pts and
// the fps filter duplicates frames to fill out the difference to maintain
// a consistent frame rate.
// Once we allow for alternating segments, this issue should be mitigated
// and the fps filter can come *before* the scale filter to minimize work
// when going from high fps to low fps (much more common when transcoding
// than going from low fps to high fps)
var fps C.AVRational
if param.Framerate > 0 {
filters += fmt.Sprintf(",fps=%d/%d", param.Framerate, param.FramerateDen)
fps = C.AVRational{num: C.int(param.Framerate), den: C.int(param.FramerateDen)}
}
// Set video encoder options
// TODO understand how h264 profiles and GOP setting works for
// NETINT encoder, and make sure we change relevant things here
// Any other options for the encoder can also be added here
xcoderOutParamsStr := ""
if len(p.VideoEncoder.Name) <= 0 && len(p.VideoEncoder.Opts) <= 0 {
p.VideoEncoder.Opts = map[string]string{
"forced-idr": "1",
"preset": "medium",
"tier": "high",
}
if p.Profile.Quality != 0 {
if p.Profile.Quality <= 63 {
p.VideoEncoder.Opts["crf"] = strconv.Itoa(int(p.Profile.Quality))
} else {
glog.Warning("Cannot use CRF param, value out of range (0-63)")
}
// There's no direct numerical correspondence between CQ and CRF.
// From some experiments, it seems that setting CQ = CRF + 7 gives similar visual effects.
cq := p.Profile.Quality + 7
if cq <= 51 {
p.VideoEncoder.Opts["cq"] = strconv.Itoa(int(cq))
} else {
glog.Warning("Cannot use CQ param, value out of range (0-51)")
}
}
switch p.Profile.Profile {
case ProfileH264Baseline, ProfileH264ConstrainedHigh:
if p.Accel != Netint {
p.VideoEncoder.Opts["profile"] = ProfileParameters[p.Profile.Profile]
p.VideoEncoder.Opts["bf"] = "0"
} else {
xcoderOutParamsStr = "profile=high:gopPresetIdx=2"
}
case ProfileH264Main, ProfileH264High:
if p.Accel != Netint {
p.VideoEncoder.Opts["profile"] = ProfileParameters[p.Profile.Profile]
p.VideoEncoder.Opts["bf"] = "3"
} else {
xcoderOutParamsStr = "profile=high"
}
case ProfileNone:
if p.Accel == Nvidia {
p.VideoEncoder.Opts["bf"] = "0"
} else {
p.VideoEncoder.Opts["bf"] = "3"
}
default:
return params, finalizer, ErrTranscoderPrf
}
if p.Profile.Framerate == 0 && p.Accel == Nvidia {
// When the decoded video contains non-monotonic increases in PTS (common with OBS)
// & when B-frames are enabled nvenc struggles at calculating correct DTS
// XXX so we disable B-frames altogether to avoid PTS < DTS errors
if p.VideoEncoder.Opts["bf"] != "0" {
p.VideoEncoder.Opts["bf"] = "0"
glog.Warning("Forcing max_b_frames=0 for nvenc, as it can't handle those well with timestamp passthrough")
}
}
}
gopMs := 0
if param.GOP != 0 {
if param.GOP <= GOPInvalid {
return params, finalizer, ErrTranscoderGOP
}
// Check for intra-only
if param.GOP == GOPIntraOnly {
p.VideoEncoder.Opts["g"] = "0"
} else {
if param.Framerate > 0 {
gop := param.GOP.Seconds()
interval := strconv.Itoa(int(gop * float64(param.Framerate)))
p.VideoEncoder.Opts["g"] = interval
} else {
gopMs = int(param.GOP.Milliseconds())
}
}
}
var muxOpts C.component_opts
var muxName string
switch p.Profile.Format {
case FormatNone:
muxOpts = C.component_opts{
// don't free this bc of avformat_write_header API
opts: newAVOpts(p.Muxer.Opts),
}
muxName = p.Muxer.Name
case FormatMPEGTS:
muxName = "mpegts"
case FormatMP4:
muxName = "mp4"
muxOpts = C.component_opts{
opts: newAVOpts(map[string]string{"movflags": "faststart"}),
}
default:
return params, finalizer, ErrTranscoderFmt
}
if muxName != "" {
muxOpts.name = C.CString(muxName)
}
vidOpts := C.component_opts{
name: C.CString(encoder),
opts: newAVOpts(p.VideoEncoder.Opts),
}
audioEncoder := p.AudioEncoder.Name
if audioEncoder == "" {
audioEncoder = "aac"
}
audioOpts := C.component_opts{
name: C.CString(audioEncoder),
opts: newAVOpts(p.AudioEncoder.Opts),
}
metadata := newAVOpts(p.Metadata)
fromMs := int(p.From.Milliseconds())
toMs := int(p.To.Milliseconds())
vfilt := C.CString(filters)
oname := C.CString(p.Oname)
xcoderOutParams := C.CString(xcoderOutParamsStr)
params[i] = C.output_params{fname: oname, fps: fps,
w: C.int(w), h: C.int(h), bitrate: C.int(bitrate),
gop_time: C.int(gopMs), from: C.int(fromMs), to: C.int(toMs),
muxer: muxOpts, audio: audioOpts, video: vidOpts, metadata: metadata,
vfilters: vfilt, sfilters: nil, xcoderParams: xcoderOutParams}
if p.CalcSign {
//signfilter string
escapedOname := ffmpegStrEscape(p.Oname)
signfilter := fmt.Sprintf("signature=filename='%s.bin'", escapedOname)
if p.Accel == Nvidia {
//hw frame -> cuda signature -> sign.bin
signfilter = fmt.Sprintf("signature_cuda=filename='%s.bin'", escapedOname)
}
sfilt := C.CString(signfilter)
params[i].sfilters = sfilt
}
}
return params, finalizer, nil
}
func destroyCOutputParams(params []C.output_params) {
for _, p := range params {
// Note that _all_ memory is relased conditionally. This is because
// creation process may fail at any point, and so params array may be
// partially filled
if p.fname != nil {
C.free(unsafe.Pointer(p.fname))
}
if p.xcoderParams != nil {
C.free(unsafe.Pointer(p.xcoderParams))
}
if p.audio.name != nil {
C.free(unsafe.Pointer(p.audio.name))
}
if p.video.name != nil {
C.free(unsafe.Pointer(p.video.name))
}
if p.vfilters != nil {
C.free(unsafe.Pointer(p.vfilters))
}
if p.muxer.name != nil {
C.free(unsafe.Pointer(p.muxer.name))
}
if p.sfilters != nil {
C.free(unsafe.Pointer(p.sfilters))
}
// dictionaries are freed with special function
if p.audio.opts != nil {
C.av_dict_free(&p.audio.opts)
}
if p.muxer.opts != nil {
C.av_dict_free(&p.muxer.opts)
}
if p.video.opts != nil {
C.av_dict_free(&p.video.opts)
}
if p.metadata != nil {
C.av_dict_free(&p.metadata)
}
}
}
func hasVideoMetadata(fname string) bool {
if strings.HasPrefix(strings.ToLower(fname), "pipe:") {
return false
}
fileInfo, err := os.Stat(fname)
if err != nil {
return false
}
return !fileInfo.IsDir()
}
func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions) (*TranscodeResults, error) {
t.mu.Lock()
defer t.mu.Unlock()
if t.stopped || t.handle == nil {
return nil, ErrTranscoderStp
}
if input == nil {
return nil, ErrTranscoderInp
}
var reopendemux bool
reopendemux = false
// don't read metadata for inputs without video metadata, because it can't seek back and av_find_input_format in the decoder will fail
if hasVideoMetadata(input.Fname) {
status, format, err := GetCodecInfo(input.Fname)
if err != nil {
return nil, err
}
if format.DurSecs > 300 {
glog.Errorf("Input file %s has duration of %d seconds, which is more than 5 minutes. This is not supported by the transcoder.", input.Fname, format.DurSecs)
return nil, ErrTranscoderDuration
}
// TODO hoist the rest of this into C so we don't have to invoke GetCodecInfo
if !t.started {
// NeedsBypass is state where video is present in container & without any frames
videoMissing := status == CodecStatusNeedsBypass || format.Vcodec == ""
if videoMissing {
// Audio-only segment, fail fast right here as we cannot handle them nicely
return nil, ErrTranscoderVid
}
// keep last audio codec
t.lastacodec = format.Acodec
// Stream is either OK or completely broken, let the transcoder handle it
t.started = true
} else {
// check if we need to reopen demuxer because added audio in video
// TODO: fixes like that are needed because handling of cfg change in
// LPMS is a joke. We need to decide whether LPMS should support full
// dynamic config one day and either implement it there, or implement
// some generic workaround for the problem in Go code, such as marking
// config changes as significant/insignificant and re-creating the instance
// if the former type change happens
if format.Acodec != "" && !isAudioAllDrop(ps) {
if (t.lastacodec == "") || (t.lastacodec != "" && t.lastacodec != format.Acodec) {
reopendemux = true
t.lastacodec = format.Acodec
}
}
}
if format.Format == "mpegts" && format.Vcodec == "h264" {
if fixedPath, fixErr := FixMisplacedSEI(input.Fname); fixErr != nil {
glog.Warningf("SEI fix-up check failed for %s: %v", input.Fname, fixErr)
} else if fixedPath != input.Fname {
defer os.Remove(fixedPath)
input.Fname = fixedPath
}
}
}
hw_type, err := accelDeviceType(input.Accel)
if err != nil {
return nil, err
}
for _, p := range ps {
if p.From != 0 || p.To != 0 {
if p.VideoEncoder.Name == "drop" || p.VideoEncoder.Name == "copy" {
glog.Warning("Could clip only when transcoding video")
return nil, ErrTranscoderClipConfig
}
if p.From < 0 || p.To > 0 && p.From > 0 && p.To < p.From {
glog.Warning("'To' should be after 'From'")
return nil, ErrTranscoderClipConfig
}
}
}
if input.Transmuxing {
t.started = true
}
// Output configuration
params, finalizer, err := createCOutputParams(input, ps)
// This prevents C memory leaks
defer finalizer()
// Only now can we do this
if err != nil {
return nil, err
}
// Input configuration
var device *C.char
if input.Device != "" {
device = C.CString(input.Device)
defer C.free(unsafe.Pointer(device))
}
fname := C.CString(input.Fname)
defer C.free(unsafe.Pointer(fname))
xcoderParams := C.CString("")
defer C.free(unsafe.Pointer(xcoderParams))
var demuxerOpts C.component_opts
if input.Demuxer.Name != "" {
demuxerName := C.CString(input.Demuxer.Name)
defer C.free(unsafe.Pointer(demuxerName))
demuxerOpts.name = demuxerName
}
ext := filepath.Ext(input.Fname)
// If the input has an image file extension setup the image2 demuxer
if ext == ".png" {
image2 := C.CString("image2")
defer C.free(unsafe.Pointer(image2))
demuxerOpts = C.component_opts{
name: image2,
}
if input.Profile.Framerate > 0 {
if input.Profile.FramerateDen == 0 {
input.Profile.FramerateDen = 1
}
// changing the input map here is maybe not great
if input.Demuxer.Opts == nil {
input.Demuxer.Opts = map[string]string{}
}
input.Demuxer.Opts["framerate"] = fmt.Sprintf("%d/%d", input.Profile.Framerate, input.Profile.FramerateDen)
}
}
if len(input.Demuxer.Opts) > 0 {
// Do not free in this function because avformat_open_input()
// in the C code will destroy this
demuxerOpts.opts = newAVOpts(input.Demuxer.Opts)
}
inp := &C.input_params{fname: fname, hw_type: hw_type, device: device, xcoderParams: xcoderParams,
handle: t.handle, demuxer: demuxerOpts}
if input.Transmuxing {
inp.transmuxing = 1
}
results := make([]C.output_results, len(ps))
decoded := &C.output_results{}
var (
paramsPointer *C.output_params
resultsPointer *C.output_results
)
if len(params) > 0 {
paramsPointer = (*C.output_params)(&params[0])
resultsPointer = (*C.output_results)(&results[0])
}
if reopendemux {
// forcefully close and open demuxer
ret := int(C.lpms_transcode_reopen_demux(inp))
if ret != 0 {
if LogTranscodeErrors {
glog.Error("Reopen demux returned : ", ErrorMap[ret])
}
return nil, ErrorMap[ret]
}
}
ret := int(C.lpms_transcode(inp, paramsPointer, resultsPointer, C.int(len(params)), decoded))
if ret != 0 {
if LogTranscodeErrors {
glog.Error("Transcoder Return : ", ErrorMap[ret])
}
if ret == int(C.lpms_ERR_UNRECOVERABLE) {
panic(ErrorMap[ret])
}
return nil, ErrorMap[ret]
}
tr := make([]MediaInfo, len(ps))
for i, r := range results {
tr[i] = MediaInfo{
Frames: int(r.frames),
Pixels: int64(r.pixels),
}
}
dec := MediaInfo{
Frames: int(decoded.frames),
Pixels: int64(decoded.pixels),
}
return &TranscodeResults{Encoded: tr, Decoded: dec}, nil
}
func (t *Transcoder) Discontinuity() {
t.mu.Lock()
defer t.mu.Unlock()
C.lpms_transcode_discontinuity(t.handle)
}
func NewTranscoder() *Transcoder {
return &Transcoder{
handle: C.lpms_transcode_new(),
mu: &sync.Mutex{},
}
}
func (t *Transcoder) StopTranscoder() {
t.mu.Lock()
defer t.mu.Unlock()
if t.stopped {
return
}
C.lpms_transcode_stop(t.handle)
t.handle = nil // prevent accidental reuse
t.stopped = true
}
type LogLevel C.enum_LPMSLogLevel
const (
FFLogTrace = C.LPMS_LOG_TRACE
FFLogDebug = C.LPMS_LOG_DEBUG
FFLogVerbose = C.LPMS_LOG_VERBOSE
FFLogInfo = C.LPMS_LOG_INFO
FFLogWarning = C.LPMS_LOG_WARNING
FFLogError = C.LPMS_LOG_ERROR
FFLogFatal = C.LPMS_LOG_FATAL
FFLogPanic = C.LPMS_LOG_PANIC
FFLogQuiet = C.LPMS_LOG_QUIET
)
func InitFFmpegWithLogLevel(level LogLevel) {
C.lpms_init(C.enum_LPMSLogLevel(level))
}
func InitFFmpeg() {
InitFFmpegWithLogLevel(FFLogWarning)
}
func createBackendConfig(deviceid string) string {
configProto := &pb.ConfigProto{GpuOptions: &pb.GPUOptions{AllowGrowth: true}}
bytes, err := proto.Marshal(configProto)
if err != nil {
glog.Errorf("Unable to convert deviceid %v to Tensorflow config protobuf\n", err)
return ""
}
sessConfigOpt := fmt.Sprintf("device_id=%s&sess_config=0x", deviceid)
// serialize TF config proto as hex
for i := len(bytes) - 1; i >= 0; i-- {
sessConfigOpt += hex.EncodeToString(bytes[i : i+1])
}
return sessConfigOpt
}
func ffmpegStrEscape(origStr string) string {
tmpStr := strings.ReplaceAll(origStr, "\\", "\\\\")
outStr := strings.ReplaceAll(tmpStr, ":", "\\:")
return outStr
}
func hwScale() string {
if runtime.GOOS == "windows" {
// we don't build windows binaries with CUDA SDK, so need to use scale_cuda instead of scale_npp
return "scale_cuda"
} else {
return "scale_npp"
}
}
func hwScaleAlgo() string {
if runtime.GOOS == "windows" {
// we don't build windows binaries with CUDA SDK, so need to use the default scale algorithm
return ""
} else {
return "super"
}
}
func FfmpegSetLogLevel(level int) {
C.av_log_set_level(C.int(level))
}
func FfmpegGetLogLevel() int {
return int(C.av_log_get_level())
}