lpms/ffmpeg/ffmpeg.go

package ffmpeg

import (
	"bytes"
	"encoding/hex"
	"errors"
	"fmt"
	"io"
	"os"
	"path"
	"path/filepath"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"time"
	"unsafe"

	"github.com/golang/glog"
	"github.com/golang/protobuf/proto"
	pb "github.com/livepeer/lpms/ffmpeg/proto"
)

// #cgo pkg-config: libavformat libavfilter libavcodec libavutil libswscale
// #include <stdlib.h>
// #include "transcoder.h"
// #include "extras.h"
// #include <libavutil/log.h>
import "C"

var ErrTranscoderRes = errors.New("TranscoderInvalidResolution")
var ErrTranscoderHw = errors.New("TranscoderInvalidHardware")
var ErrTranscoderInp = errors.New("TranscoderInvalidInput")
var ErrTranscoderClipConfig = errors.New("TranscoderInvalidClipConfig")
var ErrTranscoderVid = errors.New("TranscoderInvalidVideo")
var ErrTranscoderDuration = errors.New("TranscoderInvalidDuration")
var ErrTranscoderStp = errors.New("TranscoderStopped")
var ErrTranscoderFmt = errors.New("TranscoderUnrecognizedFormat")
var ErrTranscoderPrf = errors.New("TranscoderUnrecognizedProfile")
var ErrTranscoderGOP = errors.New("TranscoderInvalidGOP")
var ErrTranscoderDev = errors.New("TranscoderIncompatibleDevices")
var ErrEmptyData = errors.New("EmptyData")
var ErrSignCompare = errors.New("InvalidSignData")
var ErrTranscoderPixelformat = errors.New("TranscoderInvalidPixelformat")
var ErrVideoCompare = errors.New("InvalidVideoData")

// Switch to turn off logging transcoding errors, when doing test transcoding
var LogTranscodeErrors = true

type Acceleration int

const (
	Software Acceleration = iota
	Nvidia
	Amd
	Netint
)

var AccelerationNameLookup = map[Acceleration]string{
	Software: "SW",
	Nvidia:   "Nvidia",
	Amd:      "Amd",
	Netint:   "Netint",
}

var FfEncoderLookup = map[Acceleration]map[VideoCodec]string{
	Software: {
		H264: "libx264",
		H265: "libx265",
		VP8:  "libvpx",
		VP9:  "libvpx-vp9",
	},
	Nvidia: {
		H264: "h264_nvenc",
		H265: "hevc_nvenc",
	},
	Netint: {
		H264: "h264_ni_enc",
		H265: "h265_ni_enc",
	},
}

type ComponentOptions struct {
	Name string
	Opts map[string]string
}

type Transcoder struct {
	handle     *C.struct_transcode_thread
	stopped    bool
	started    bool
	lastacodec string
	mu         *sync.Mutex
}

type TranscodeOptionsIn struct {
	Fname       string
	Accel       Acceleration
	Device      string
	Transmuxing bool
	Profile     VideoProfile
	Demuxer     ComponentOptions
}

type TranscodeOptions struct {
	Oname    string
	Profile  VideoProfile
	Accel    Acceleration
	Device   string
	CalcSign bool
	From     time.Duration
	To       time.Duration

	Muxer        ComponentOptions
	VideoEncoder ComponentOptions
	AudioEncoder ComponentOptions
	Metadata     map[string]string
}

type MediaInfo struct {
	Frames int
	Pixels int64
}

type TranscodeResults struct {
	Decoded MediaInfo
	Encoded []MediaInfo
}

type PixelFormat struct {
	RawValue int
}

const (
	PixelFormatNone        int = C.AV_PIX_FMT_NONE
	PixelFormatYUV420P     int = C.AV_PIX_FMT_YUV420P
	PixelFormatYUYV422     int = C.AV_PIX_FMT_YUYV422
	PixelFormatYUV422P     int = C.AV_PIX_FMT_YUV422P
	PixelFormatYUV444P     int = C.AV_PIX_FMT_YUV444P
	PixelFormatUYVY422     int = C.AV_PIX_FMT_UYVY422
	PixelFormatNV12        int = C.AV_PIX_FMT_NV12
	PixelFormatNV21        int = C.AV_PIX_FMT_NV21
	PixelFormatYUV420P10BE int = C.AV_PIX_FMT_YUV420P10BE
	PixelFormatYUV420P10LE int = C.AV_PIX_FMT_YUV420P10LE
	PixelFormatYUV422P10BE int = C.AV_PIX_FMT_YUV422P10BE
	PixelFormatYUV422P10LE int = C.AV_PIX_FMT_YUV422P10LE
	PixelFormatYUV444P10BE int = C.AV_PIX_FMT_YUV444P10BE
	PixelFormatYUV444P10LE int = C.AV_PIX_FMT_YUV444P10LE
	PixelFormatYUV420P16LE int = C.AV_PIX_FMT_YUV420P16LE
	PixelFormatYUV420P16BE int = C.AV_PIX_FMT_YUV420P16BE
	PixelFormatYUV422P16LE int = C.AV_PIX_FMT_YUV422P16LE
	PixelFormatYUV422P16BE int = C.AV_PIX_FMT_YUV422P16BE
	PixelFormatYUV444P16LE int = C.AV_PIX_FMT_YUV444P16LE
	PixelFormatYUV444P16BE int = C.AV_PIX_FMT_YUV444P16BE
	PixelFormatYUV420P12BE int = C.AV_PIX_FMT_YUV420P12BE
	PixelFormatYUV420P12LE int = C.AV_PIX_FMT_YUV420P12LE
	PixelFormatYUV422P12BE int = C.AV_PIX_FMT_YUV422P12BE
	PixelFormatYUV422P12LE int = C.AV_PIX_FMT_YUV422P12LE
	PixelFormatYUV444P12BE int = C.AV_PIX_FMT_YUV444P12BE
	PixelFormatYUV444P12LE int = C.AV_PIX_FMT_YUV444P12LE
)

// hold bit number minus 8; ColorDepthBits + 8 == bit number
type ColorDepthBits int

const (
	ColorDepth8Bit  ColorDepthBits = 0
	ColorDepth10Bit ColorDepthBits = 2
	ColorDepth12Bit ColorDepthBits = 4
	ColorDepth16Bit ColorDepthBits = 8
)

type ChromaSubsampling int

const (
	ChromaSubsampling420 ChromaSubsampling = iota
	ChromaSubsampling422
	ChromaSubsampling444
)

func (pixelFormat PixelFormat) Properties() (ChromaSubsampling, ColorDepthBits, error) {
	switch pixelFormat.RawValue {
	case C.AV_PIX_FMT_YUV420P:
		return ChromaSubsampling420, ColorDepth8Bit, nil
	case C.AV_PIX_FMT_YUYV422:
		return ChromaSubsampling422, ColorDepth8Bit, nil
	case C.AV_PIX_FMT_YUV422P:
		return ChromaSubsampling422, ColorDepth8Bit, nil
	case C.AV_PIX_FMT_YUV444P:
		return ChromaSubsampling444, ColorDepth8Bit, nil
	case C.AV_PIX_FMT_UYVY422:
		return ChromaSubsampling422, ColorDepth8Bit, nil
	case C.AV_PIX_FMT_NV12:
		return ChromaSubsampling420, ColorDepth8Bit, nil
	case C.AV_PIX_FMT_NV21:
		return ChromaSubsampling420, ColorDepth8Bit, nil
	case C.AV_PIX_FMT_YUV420P10BE:
		return ChromaSubsampling420, ColorDepth10Bit, nil
	case C.AV_PIX_FMT_YUV420P10LE:
		return ChromaSubsampling420, ColorDepth10Bit, nil
	case C.AV_PIX_FMT_YUV422P10BE:
		return ChromaSubsampling422, ColorDepth10Bit, nil
	case C.AV_PIX_FMT_YUV422P10LE:
		return ChromaSubsampling422, ColorDepth10Bit, nil
	case C.AV_PIX_FMT_YUV444P10BE:
		return ChromaSubsampling444, ColorDepth10Bit, nil
	case C.AV_PIX_FMT_YUV444P10LE:
		return ChromaSubsampling444, ColorDepth10Bit, nil
	case C.AV_PIX_FMT_YUV420P16LE:
		return ChromaSubsampling420, ColorDepth16Bit, nil
	case C.AV_PIX_FMT_YUV420P16BE:
		return ChromaSubsampling420, ColorDepth16Bit, nil
	case C.AV_PIX_FMT_YUV422P16LE:
		return ChromaSubsampling422, ColorDepth16Bit, nil
	case C.AV_PIX_FMT_YUV422P16BE:
		return ChromaSubsampling422, ColorDepth16Bit, nil
	case C.AV_PIX_FMT_YUV444P16LE:
		return ChromaSubsampling444, ColorDepth16Bit, nil
	case C.AV_PIX_FMT_YUV444P16BE:
		return ChromaSubsampling444, ColorDepth16Bit, nil
	case C.AV_PIX_FMT_YUV420P12BE:
		return ChromaSubsampling420, ColorDepth12Bit, nil
	case C.AV_PIX_FMT_YUV420P12LE:
		return ChromaSubsampling420, ColorDepth12Bit, nil
	case C.AV_PIX_FMT_YUV422P12BE:
		return ChromaSubsampling422, ColorDepth12Bit, nil
	case C.AV_PIX_FMT_YUV422P12LE:
		return ChromaSubsampling422, ColorDepth12Bit, nil
	case C.AV_PIX_FMT_YUV444P12BE:
		return ChromaSubsampling444, ColorDepth12Bit, nil
	case C.AV_PIX_FMT_YUV444P12LE:
		return ChromaSubsampling444, ColorDepth12Bit, nil
	default:
		return ChromaSubsampling420, ColorDepth8Bit, ErrTranscoderPixelformat
	}
}

type CodecStatus int

const (
	CodecStatusInternalError CodecStatus = -1
	CodecStatusOk            CodecStatus = 0
	CodecStatusNeedsBypass   CodecStatus = 1
	CodecStatusMissing       CodecStatus = 2
)

type MediaFormatInfo struct {
	Format         string
	Acodec, Vcodec string
	PixFormat      PixelFormat
	Width, Height  int
	FPS            float32
	DurSecs        int64
	AudioBitrate   int
}

func (f *MediaFormatInfo) ScaledHeight(width int) int {
	return int(float32(width) * float32(f.Height) / float32(f.Width))
}

func (f *MediaFormatInfo) ScaledWidth(height int) int {
	return int(float32(height) * float32(f.Width) / float32(f.Height))
}

func GetCodecInfo(fname string) (CodecStatus, MediaFormatInfo, error) {
	format := MediaFormatInfo{}
	cfname := C.CString(fname)
	defer C.free(unsafe.Pointer(cfname))
	fmtname := C.CString(strings.Repeat("0", 255))
	acodec_c := C.CString(strings.Repeat("0", 255))
	vcodec_c := C.CString(strings.Repeat("0", 255))
	defer C.free(unsafe.Pointer(fmtname))
	defer C.free(unsafe.Pointer(acodec_c))
	defer C.free(unsafe.Pointer(vcodec_c))
	var params_c C.codec_info
	params_c.format_name = fmtname
	params_c.video_codec = vcodec_c
	params_c.audio_codec = acodec_c
	params_c.pixel_format = C.AV_PIX_FMT_NONE
	status := CodecStatus(C.lpms_get_codec_info(cfname, &params_c))
	if C.strlen(fmtname) < 255 {
		format.Format = C.GoString(fmtname)
	}
	if C.strlen(acodec_c) < 255 {
		format.Acodec = C.GoString(acodec_c)
	}
	if C.strlen(vcodec_c) < 255 {
		format.Vcodec = C.GoString(vcodec_c)
	}
	format.PixFormat = PixelFormat{int(params_c.pixel_format)}
	format.Width = int(params_c.width)
	format.Height = int(params_c.height)
	format.FPS = float32(params_c.fps)
	format.DurSecs = int64(params_c.dur)
	format.AudioBitrate = int(params_c.audio_bit_rate)
	return status, format, nil
}

// GetCodecInfo opens the segment and attempts to get video and audio codec names. Additionally, first return value
// indicates whether the segment has zero video frames
func GetCodecInfoBytes(data []byte) (CodecStatus, MediaFormatInfo, error) {
	format := MediaFormatInfo{}
	status := CodecStatusInternalError
	or, ow, err := os.Pipe()
	go func() {
		br := bytes.NewReader(data)
		io.Copy(ow, br)
		ow.Close()
	}()
	if err != nil {
		return status, format, ErrEmptyData
	}
	fname := fmt.Sprintf("pipe:%d", or.Fd())
	status, format, err = GetCodecInfo(fname)

	// estimate duration from bitrate and filesize for audio
	// some formats do not have built-in track duration metadata,
	// and pipes do not have a filesize on their own which breaks ffmpeg's own
	// duration estimates. So do the estimation calculation ourselves
	// NB : mpegts has the same problem but may contain video so let's not handle that
	//      some other formats, eg ogg, show zero bitrate
	//
	// ffmpeg estimation of duration from bitrate:
	// https://github.com/FFmpeg/FFmpeg/blob/8280ec7a3213c9b7bad88aac3695be2dedd2c00b/libavformat/demux.c#L1798
	if format.DurSecs == 0 && format.AudioBitrate > 0 && (format.Format == "mp3" || format.Format == "wav" || format.Format == "aac") {
		format.DurSecs = int64(len(data) * 8 / format.AudioBitrate)
	}
	return status, format, err
}

// HasZeroVideoFrameBytes  opens video and returns true if it has video stream with 0-frame
func HasZeroVideoFrameBytes(data []byte) (bool, error) {
	if len(data) == 0 {
		return false, ErrEmptyData
	}
	or, ow, err := os.Pipe()
	if err != nil {
		return false, err
	}
	fname := fmt.Sprintf("pipe:%d", or.Fd())
	go func() {
		br := bytes.NewReader(data)
		io.Copy(ow, br)
		ow.Close()
	}()
	status, _, err := GetCodecInfo(fname)
	ow.Close()
	return status == CodecStatusNeedsBypass, err
}

// compare two signature files whether those matches or not
func CompareSignatureByPath(fname1 string, fname2 string) (bool, error) {
	if len(fname1) <= 0 || len(fname2) <= 0 {
		return false, nil
	}
	cfpath1 := C.CString(fname1)
	defer C.free(unsafe.Pointer(cfpath1))
	cfpath2 := C.CString(fname2)
	defer C.free(unsafe.Pointer(cfpath2))

	res := int(C.lpms_compare_sign_bypath(cfpath1, cfpath2))

	if res > 0 {
		return true, nil
	} else if res == 0 {
		return false, nil
	} else {
		return false, ErrSignCompare
	}
}

// compare two signature buffers whether those matches or not
func CompareSignatureByBuffer(data1 []byte, data2 []byte) (bool, error) {

	pdata1 := unsafe.Pointer(&data1[0])
	pdata2 := unsafe.Pointer(&data2[0])

	res := int(C.lpms_compare_sign_bybuffer(pdata1, C.int(len(data1)), pdata2, C.int(len(data2))))

	if res > 0 {
		return true, nil
	} else if res == 0 {
		return false, nil
	} else {
		return false, ErrSignCompare
	}
}

// compare two vidoe files whether those matches or not
func CompareVideoByPath(fname1 string, fname2 string) (bool, error) {
	if len(fname1) <= 0 || len(fname2) <= 0 {
		return false, nil
	}
	cfpath1 := C.CString(fname1)
	defer C.free(unsafe.Pointer(cfpath1))
	cfpath2 := C.CString(fname2)
	defer C.free(unsafe.Pointer(cfpath2))

	res := int(C.lpms_compare_video_bypath(cfpath1, cfpath2))

	if res == 0 {
		return true, nil
	} else if res == 1 {
		return false, nil
	} else {
		return false, ErrVideoCompare
	}
}

// compare two video buffers whether those matches or not
func CompareVideoByBuffer(data1 []byte, data2 []byte) (bool, error) {

	pdata1 := unsafe.Pointer(&data1[0])
	pdata2 := unsafe.Pointer(&data2[0])

	res := int(C.lpms_compare_video_bybuffer(pdata1, C.int(len(data1)), pdata2, C.int(len(data2))))

	if res == 0 {
		return true, nil
	} else if res == 1 {
		return false, nil
	} else {
		return false, ErrVideoCompare
	}
}

func RTMPToHLS(localRTMPUrl string, outM3U8 string, tmpl string, seglen_secs string, seg_start int) error {
	inp := C.CString(localRTMPUrl)
	outp := C.CString(outM3U8)
	ts_tmpl := C.CString(tmpl)
	seglen := C.CString(seglen_secs)
	segstart := C.CString(fmt.Sprintf("%v", seg_start))
	ret := int(C.lpms_rtmp2hls(inp, outp, ts_tmpl, seglen, segstart))
	C.free(unsafe.Pointer(inp))
	C.free(unsafe.Pointer(outp))
	C.free(unsafe.Pointer(ts_tmpl))
	C.free(unsafe.Pointer(seglen))
	C.free(unsafe.Pointer(segstart))
	if ret != 0 {
		glog.Infof("RTMP2HLS Transmux Return : %v\n", Strerror(ret))
		return ErrorMap[ret]
	}
	return nil
}

func Transcode(input string, workDir string, ps []VideoProfile) error {

	opts := make([]TranscodeOptions, len(ps))
	for i, param := range ps {
		oname := path.Join(workDir, fmt.Sprintf("out%v%v", i, filepath.Base(input)))
		opt := TranscodeOptions{
			Oname:   oname,
			Profile: param,
			Accel:   Software,
		}
		opts[i] = opt
	}
	inopts := &TranscodeOptionsIn{
		Fname: input,
		Accel: Software,
	}
	return Transcode2(inopts, opts)
}

func newAVOpts(opts map[string]string) *C.AVDictionary {
	var dict *C.AVDictionary
	for key, value := range opts {
		k := C.CString(key)
		v := C.CString(value)
		defer C.free(unsafe.Pointer(k))
		defer C.free(unsafe.Pointer(v))
		C.av_dict_set(&dict, k, v, 0)
	}
	return dict
}

// return encoding specific options for the given accel
func configEncoder(inOpts *TranscodeOptionsIn, outOpts TranscodeOptions) (string, string, string, error) {
	inDev := inOpts.Device
	outDev := outOpts.Device
	encoder := FfEncoderLookup[outOpts.Accel][outOpts.Profile.Encoder]
	switch inOpts.Accel {
	case Software:
		switch outOpts.Accel {
		case Software:
			return encoder, "scale", "", nil
		case Nvidia:
			upload := "hwupload_cuda"
			if outDev != "" {
				upload = upload + "=device=" + outDev
			}
			return encoder, upload + "," + hwScale(), hwScaleAlgo(), nil
		}
	case Nvidia:
		switch outOpts.Accel {
		case Software:
			return encoder, hwScale(), hwScaleAlgo(), nil
		case Nvidia:
			// If we encode on a different device from decode then need to transfer
			if outDev != "" && outDev != inDev {
				return "", "", "", ErrTranscoderDev // XXX not allowed
			}
			return encoder, hwScale(), hwScaleAlgo(), nil
		}
	case Netint:
		switch outOpts.Accel {
		case Software, Nvidia:
			return "", "", "", ErrTranscoderDev // XXX don't allow mix-match between NETINT and sw/nv
		case Netint:
			// Use software scale filter
			return encoder, "scale", "", nil
		}
	}
	return "", "", "", ErrTranscoderHw
}
func accelDeviceType(accel Acceleration) (C.enum_AVHWDeviceType, error) {
	switch accel {
	case Software:
		return C.AV_HWDEVICE_TYPE_NONE, nil
	case Nvidia:
		return C.AV_HWDEVICE_TYPE_CUDA, nil
	case Netint:
		return C.AV_HWDEVICE_TYPE_MEDIACODEC, nil
	}
	return C.AV_HWDEVICE_TYPE_NONE, ErrTranscoderHw
}

func Transcode2(input *TranscodeOptionsIn, ps []TranscodeOptions) error {
	_, err := Transcode3(input, ps)
	return err
}

func Transcode3(input *TranscodeOptionsIn, ps []TranscodeOptions) (*TranscodeResults, error) {
	t := NewTranscoder()
	defer t.StopTranscoder()
	return t.Transcode(input, ps)
}

type CodingSizeLimit struct {
	WidthMin, HeightMin int
	WidthMax, HeightMax int
}

func clamp(val, min, max int) int {
	if val <= min {
		return min
	}
	if val >= max {
		return max
	}
	return val
}

// 7th Gen NVENC limits:
var nvidiaCodecSizeLimts = map[VideoCodec]CodingSizeLimit{
	H264: {146, 50, 4096, 4096},
	H265: {132, 40, 8192, 8192},
}

func isAudioAllDrop(ps []TranscodeOptions) bool {
	for _, p := range ps {
		if p.AudioEncoder.Name != "drop" {
			return false
		}
	}
	return true
}

// create C output params array and return it along with corresponding finalizer
// function that makes sure there are no C memory leaks
func createCOutputParams(input *TranscodeOptionsIn, ps []TranscodeOptions) ([]C.output_params, func(), error) {
	params := make([]C.output_params, len(ps))
	finalizer := func() { destroyCOutputParams(params) }
	for i, p := range ps {
		param := p.Profile
		w, h, err := VideoProfileResolution(param)
		if err != nil {
			if p.VideoEncoder.Name != "drop" && p.VideoEncoder.Name != "copy" {
				return params, finalizer, err
			}
		}
		br := strings.Replace(param.Bitrate, "k", "000", 1)
		bitrate, err := strconv.Atoi(br)
		if err != nil {
			if p.VideoEncoder.Name != "drop" && p.VideoEncoder.Name != "copy" {
				return params, finalizer, err
			}
		}
		encoder, scale_filter := p.VideoEncoder.Name, "scale"
		var interpAlgo string
		if encoder == "" {
			encoder, scale_filter, interpAlgo, err = configEncoder(input, p)
			if err != nil {
				return params, finalizer, err
			}
		}

		// use these as common limits even for non-nvidia
		limits := nvidiaCodecSizeLimts[param.Encoder]
		w = clamp(w, limits.WidthMin, limits.WidthMax)
		h = clamp(h, limits.HeightMin, limits.HeightMax)

		/*
			use the larger dimension requested from the transcode resolution
			and proportionally rescale the smaller side of the input down
			this does imply transposing the dimensions if necessary

			if this causes the smaller side to rescale below the minimum
			then set the smaller side to the minimum
			and rescale the larger side instead

			NB possibility that could also blow past the maximum
			but the aspect ratio would have to be super skewed

			TODO check for unsupportable aspect ratios in the C
		*/

		maxD := w
		if h > w {
			maxD = h
		}

		wExpr := fmt.Sprintf(`trunc(
				if(gte(iw,ih),
					if(gte(%d*ih/iw,%d),%d,-2),
					if(gte(%d,%d*iw/ih),%d,-2)
				)/2)*2`, maxD, limits.HeightMin, maxD, limits.WidthMin, maxD, limits.WidthMin)

		hExpr := fmt.Sprintf(`trunc(
			if(gt(ih,iw),
				if(gte(%d*iw/ih,%d),%d,-2),
				if(gte(%d,%d*ih/iw),%d,-2)
			)/2)*2`, maxD, limits.WidthMin, maxD, limits.HeightMin, maxD, limits.HeightMin)

		filters := fmt.Sprintf("%s='w=%s:h=%s'", scale_filter, wExpr, hExpr)

		if interpAlgo != "" {
			filters = fmt.Sprintf("%s:interp_algo=%s", filters, interpAlgo)
		}
		if input.Accel == Nvidia && p.Accel == Software {
			// needed for hw dec -> hw rescale -> sw enc
			filters = filters + ",hwdownload,format=nv12"
		}
		if p.Accel == Nvidia && filepath.Ext(input.Fname) == ".png" {
			// If the input is PNG image(s) and we are scaling on a Nvidia device
			// we need to first convert to a pixel format that the scale_npp filter supports
			filters = "format=nv12," + filters
		}
		// set FPS denominator to 1 if unset by user
		if param.FramerateDen == 0 {
			param.FramerateDen = 1
		}
		// Add fps filter *after* scale filter because otherwise we could
		// be scaling duplicate frames unnecessarily. This becomes a DoS vector
		// when a user submits two frames that are "far apart" in pts and
		// the fps filter duplicates frames to fill out the difference to maintain
		// a consistent frame rate.
		// Once we allow for alternating segments, this issue should be mitigated
		// and the fps filter can come *before* the scale filter to minimize work
		// when going from high fps to low fps (much more common when transcoding
		// than going from low fps to high fps)
		var fps C.AVRational
		if param.Framerate > 0 {
			filters += fmt.Sprintf(",fps=%d/%d", param.Framerate, param.FramerateDen)
			fps = C.AVRational{num: C.int(param.Framerate), den: C.int(param.FramerateDen)}
		}

		// Set video encoder options
		// TODO understand how h264 profiles and GOP setting works for
		// NETINT encoder, and make sure we change relevant things here
		// Any other options for the encoder can also be added here
		xcoderOutParamsStr := ""
		if len(p.VideoEncoder.Name) <= 0 && len(p.VideoEncoder.Opts) <= 0 {
			p.VideoEncoder.Opts = map[string]string{
				"forced-idr": "1",
				"preset":     "medium",
				"tier":       "high",
			}
			if p.Profile.Quality != 0 {
				if p.Profile.Quality <= 63 {
					p.VideoEncoder.Opts["crf"] = strconv.Itoa(int(p.Profile.Quality))
				} else {
					glog.Warning("Cannot use CRF param, value out of range (0-63)")
				}

				// There's no direct numerical correspondence between CQ and CRF.
				// From some experiments, it seems that setting CQ = CRF + 7 gives similar visual effects.
				cq := p.Profile.Quality + 7
				if cq <= 51 {
					p.VideoEncoder.Opts["cq"] = strconv.Itoa(int(cq))
				} else {
					glog.Warning("Cannot use CQ param, value out of range (0-51)")
				}
			}
			switch p.Profile.Profile {
			case ProfileH264Baseline, ProfileH264ConstrainedHigh:
				if p.Accel != Netint {
					p.VideoEncoder.Opts["profile"] = ProfileParameters[p.Profile.Profile]
					p.VideoEncoder.Opts["bf"] = "0"
				} else {
					xcoderOutParamsStr = "profile=high:gopPresetIdx=2"
				}
			case ProfileH264Main, ProfileH264High:
				if p.Accel != Netint {
					p.VideoEncoder.Opts["profile"] = ProfileParameters[p.Profile.Profile]
					p.VideoEncoder.Opts["bf"] = "3"
				} else {
					xcoderOutParamsStr = "profile=high"
				}
			case ProfileNone:
				if p.Accel == Nvidia {
					p.VideoEncoder.Opts["bf"] = "0"
				} else {
					p.VideoEncoder.Opts["bf"] = "3"
				}
			default:
				return params, finalizer, ErrTranscoderPrf
			}
			if p.Profile.Framerate == 0 && p.Accel == Nvidia {
				// When the decoded video contains non-monotonic increases in PTS (common with OBS)
				// & when B-frames are enabled nvenc struggles at calculating correct DTS
				// XXX so we disable B-frames altogether to avoid PTS < DTS errors
				if p.VideoEncoder.Opts["bf"] != "0" {
					p.VideoEncoder.Opts["bf"] = "0"
					glog.Warning("Forcing max_b_frames=0 for nvenc, as it can't handle those well with timestamp passthrough")
				}
			}
		}

		gopMs := 0
		if param.GOP != 0 {
			if param.GOP <= GOPInvalid {
				return params, finalizer, ErrTranscoderGOP
			}
			// Check for intra-only
			if param.GOP == GOPIntraOnly {
				p.VideoEncoder.Opts["g"] = "0"
			} else {
				if param.Framerate > 0 {
					gop := param.GOP.Seconds()
					interval := strconv.Itoa(int(gop * float64(param.Framerate)))
					p.VideoEncoder.Opts["g"] = interval
				} else {
					gopMs = int(param.GOP.Milliseconds())
				}
			}
		}

		var muxOpts C.component_opts
		var muxName string
		switch p.Profile.Format {
		case FormatNone:
			muxOpts = C.component_opts{
				// don't free this bc of avformat_write_header API
				opts: newAVOpts(p.Muxer.Opts),
			}
			muxName = p.Muxer.Name
		case FormatMPEGTS:
			muxName = "mpegts"
		case FormatMP4:
			muxName = "mp4"
			muxOpts = C.component_opts{
				opts: newAVOpts(map[string]string{"movflags": "faststart"}),
			}
		default:
			return params, finalizer, ErrTranscoderFmt
		}

		if muxName != "" {
			muxOpts.name = C.CString(muxName)
		}
		vidOpts := C.component_opts{
			name: C.CString(encoder),
			opts: newAVOpts(p.VideoEncoder.Opts),
		}
		audioEncoder := p.AudioEncoder.Name
		if audioEncoder == "" {
			audioEncoder = "aac"
		}
		audioOpts := C.component_opts{
			name: C.CString(audioEncoder),
			opts: newAVOpts(p.AudioEncoder.Opts),
		}
		metadata := newAVOpts(p.Metadata)
		fromMs := int(p.From.Milliseconds())
		toMs := int(p.To.Milliseconds())
		vfilt := C.CString(filters)
		oname := C.CString(p.Oname)
		xcoderOutParams := C.CString(xcoderOutParamsStr)
		params[i] = C.output_params{fname: oname, fps: fps,
			w: C.int(w), h: C.int(h), bitrate: C.int(bitrate),
			gop_time: C.int(gopMs), from: C.int(fromMs), to: C.int(toMs),
			muxer: muxOpts, audio: audioOpts, video: vidOpts, metadata: metadata,
			vfilters: vfilt, sfilters: nil, xcoderParams: xcoderOutParams}
		if p.CalcSign {
			//signfilter string
			escapedOname := ffmpegStrEscape(p.Oname)
			signfilter := fmt.Sprintf("signature=filename='%s.bin'", escapedOname)
			if p.Accel == Nvidia {
				//hw frame -> cuda signature -> sign.bin
				signfilter = fmt.Sprintf("signature_cuda=filename='%s.bin'", escapedOname)
			}
			sfilt := C.CString(signfilter)
			params[i].sfilters = sfilt
		}
	}

	return params, finalizer, nil
}

func destroyCOutputParams(params []C.output_params) {
	for _, p := range params {
		// Note that _all_ memory is relased conditionally. This is because
		// creation process may fail at any point, and so params array may be
		// partially filled
		if p.fname != nil {
			C.free(unsafe.Pointer(p.fname))
		}
		if p.xcoderParams != nil {
			C.free(unsafe.Pointer(p.xcoderParams))
		}
		if p.audio.name != nil {
			C.free(unsafe.Pointer(p.audio.name))
		}
		if p.video.name != nil {
			C.free(unsafe.Pointer(p.video.name))
		}
		if p.vfilters != nil {
			C.free(unsafe.Pointer(p.vfilters))
		}
		if p.muxer.name != nil {
			C.free(unsafe.Pointer(p.muxer.name))
		}
		if p.sfilters != nil {
			C.free(unsafe.Pointer(p.sfilters))
		}

		// dictionaries are freed with special function
		if p.audio.opts != nil {
			C.av_dict_free(&p.audio.opts)
		}
		if p.muxer.opts != nil {
			C.av_dict_free(&p.muxer.opts)
		}
		if p.video.opts != nil {
			C.av_dict_free(&p.video.opts)
		}
		if p.metadata != nil {
			C.av_dict_free(&p.metadata)
		}
	}
}

func hasVideoMetadata(fname string) bool {
	if strings.HasPrefix(strings.ToLower(fname), "pipe:") {
		return false
	}

	fileInfo, err := os.Stat(fname)
	if err != nil {
		return false
	}

	return !fileInfo.IsDir()
}

func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions) (*TranscodeResults, error) {
	t.mu.Lock()
	defer t.mu.Unlock()
	if t.stopped || t.handle == nil {
		return nil, ErrTranscoderStp
	}
	if input == nil {
		return nil, ErrTranscoderInp
	}
	var reopendemux bool
	reopendemux = false
	// don't read metadata for inputs without video metadata, because it can't seek back and av_find_input_format in the decoder will fail
	if hasVideoMetadata(input.Fname) {
		status, format, err := GetCodecInfo(input.Fname)
		if err != nil {
			return nil, err
		}
		if format.DurSecs > 300 {
			glog.Errorf("Input file %s has duration of %d seconds, which is more than 5 minutes. This is not supported by the transcoder.", input.Fname, format.DurSecs)
			return nil, ErrTranscoderDuration
		}
		// TODO hoist the rest of this into C so we don't have to invoke GetCodecInfo
		if !t.started {
			// NeedsBypass is state where video is present in container & without any frames
			videoMissing := status == CodecStatusNeedsBypass || format.Vcodec == ""
			if videoMissing {
				// Audio-only segment, fail fast right here as we cannot handle them nicely
				return nil, ErrTranscoderVid
			}
			// keep last audio codec
			t.lastacodec = format.Acodec
			// Stream is either OK or completely broken, let the transcoder handle it
			t.started = true
		} else {
			// check if we need to reopen demuxer because added audio in video
			// TODO: fixes like that are needed because handling of cfg change in
			// LPMS is a joke. We need to decide whether LPMS should support full
			// dynamic config one day and either implement it there, or implement
			// some generic workaround for the problem in Go code, such as marking
			// config changes as significant/insignificant and re-creating the instance
			// if the former type change happens
			if format.Acodec != "" && !isAudioAllDrop(ps) {
				if (t.lastacodec == "") || (t.lastacodec != "" && t.lastacodec != format.Acodec) {
					reopendemux = true
					t.lastacodec = format.Acodec
				}
			}
		}
		if format.Format == "mpegts" && format.Vcodec == "h264" {
			if fixedPath, fixErr := FixMisplacedSEI(input.Fname); fixErr != nil {
				glog.Warningf("SEI fix-up check failed for %s: %v", input.Fname, fixErr)
			} else if fixedPath != input.Fname {
				defer os.Remove(fixedPath)
				input.Fname = fixedPath
			}
		}
	}
	hw_type, err := accelDeviceType(input.Accel)
	if err != nil {
		return nil, err
	}
	for _, p := range ps {
		if p.From != 0 || p.To != 0 {
			if p.VideoEncoder.Name == "drop" || p.VideoEncoder.Name == "copy" {
				glog.Warning("Could clip only when transcoding video")
				return nil, ErrTranscoderClipConfig
			}
			if p.From < 0 || p.To > 0 && p.From > 0 && p.To < p.From {
				glog.Warning("'To' should be after 'From'")
				return nil, ErrTranscoderClipConfig
			}
		}
	}
	if input.Transmuxing {
		t.started = true
	}
	// Output configuration
	params, finalizer, err := createCOutputParams(input, ps)
	// This prevents C memory leaks
	defer finalizer()
	// Only now can we do this
	if err != nil {
		return nil, err
	}

	// Input configuration
	var device *C.char
	if input.Device != "" {
		device = C.CString(input.Device)
		defer C.free(unsafe.Pointer(device))
	}
	fname := C.CString(input.Fname)
	defer C.free(unsafe.Pointer(fname))
	xcoderParams := C.CString("")
	defer C.free(unsafe.Pointer(xcoderParams))

	var demuxerOpts C.component_opts

	if input.Demuxer.Name != "" {
		demuxerName := C.CString(input.Demuxer.Name)
		defer C.free(unsafe.Pointer(demuxerName))
		demuxerOpts.name = demuxerName
	}

	ext := filepath.Ext(input.Fname)
	// If the input has an image file extension setup the image2 demuxer
	if ext == ".png" {
		image2 := C.CString("image2")
		defer C.free(unsafe.Pointer(image2))

		demuxerOpts = C.component_opts{
			name: image2,
		}

		if input.Profile.Framerate > 0 {
			if input.Profile.FramerateDen == 0 {
				input.Profile.FramerateDen = 1
			}

			// changing the input map here is maybe not great
			if input.Demuxer.Opts == nil {
				input.Demuxer.Opts = map[string]string{}
			}
			input.Demuxer.Opts["framerate"] = fmt.Sprintf("%d/%d", input.Profile.Framerate, input.Profile.FramerateDen)
		}
	}

	if len(input.Demuxer.Opts) > 0 {
		// Do not free in this function because avformat_open_input()
		// in the C code will destroy this
		demuxerOpts.opts = newAVOpts(input.Demuxer.Opts)
	}

	inp := &C.input_params{fname: fname, hw_type: hw_type, device: device, xcoderParams: xcoderParams,
		handle: t.handle, demuxer: demuxerOpts}
	if input.Transmuxing {
		inp.transmuxing = 1
	}
	results := make([]C.output_results, len(ps))
	decoded := &C.output_results{}
	var (
		paramsPointer  *C.output_params
		resultsPointer *C.output_results
	)
	if len(params) > 0 {
		paramsPointer = (*C.output_params)(&params[0])
		resultsPointer = (*C.output_results)(&results[0])
	}
	if reopendemux {
		// forcefully close and open demuxer
		ret := int(C.lpms_transcode_reopen_demux(inp))
		if ret != 0 {
			if LogTranscodeErrors {
				glog.Error("Reopen demux returned : ", ErrorMap[ret])
			}
			return nil, ErrorMap[ret]
		}
	}

	ret := int(C.lpms_transcode(inp, paramsPointer, resultsPointer, C.int(len(params)), decoded))
	if ret != 0 {
		if LogTranscodeErrors {
			glog.Error("Transcoder Return : ", ErrorMap[ret])
		}
		if ret == int(C.lpms_ERR_UNRECOVERABLE) {
			panic(ErrorMap[ret])
		}
		return nil, ErrorMap[ret]
	}
	tr := make([]MediaInfo, len(ps))
	for i, r := range results {
		tr[i] = MediaInfo{
			Frames: int(r.frames),
			Pixels: int64(r.pixels),
		}
	}
	dec := MediaInfo{
		Frames: int(decoded.frames),
		Pixels: int64(decoded.pixels),
	}
	return &TranscodeResults{Encoded: tr, Decoded: dec}, nil
}

func (t *Transcoder) Discontinuity() {
	t.mu.Lock()
	defer t.mu.Unlock()
	C.lpms_transcode_discontinuity(t.handle)
}

func NewTranscoder() *Transcoder {
	return &Transcoder{
		handle: C.lpms_transcode_new(),
		mu:     &sync.Mutex{},
	}
}

func (t *Transcoder) StopTranscoder() {
	t.mu.Lock()
	defer t.mu.Unlock()
	if t.stopped {
		return
	}
	C.lpms_transcode_stop(t.handle)
	t.handle = nil // prevent accidental reuse
	t.stopped = true
}

type LogLevel C.enum_LPMSLogLevel

const (
	FFLogTrace   = C.LPMS_LOG_TRACE
	FFLogDebug   = C.LPMS_LOG_DEBUG
	FFLogVerbose = C.LPMS_LOG_VERBOSE
	FFLogInfo    = C.LPMS_LOG_INFO
	FFLogWarning = C.LPMS_LOG_WARNING
	FFLogError   = C.LPMS_LOG_ERROR
	FFLogFatal   = C.LPMS_LOG_FATAL
	FFLogPanic   = C.LPMS_LOG_PANIC
	FFLogQuiet   = C.LPMS_LOG_QUIET
)

func InitFFmpegWithLogLevel(level LogLevel) {
	C.lpms_init(C.enum_LPMSLogLevel(level))
}

func InitFFmpeg() {
	InitFFmpegWithLogLevel(FFLogWarning)
}

func createBackendConfig(deviceid string) string {
	configProto := &pb.ConfigProto{GpuOptions: &pb.GPUOptions{AllowGrowth: true}}
	bytes, err := proto.Marshal(configProto)
	if err != nil {
		glog.Errorf("Unable to convert deviceid %v to Tensorflow config protobuf\n", err)
		return ""
	}
	sessConfigOpt := fmt.Sprintf("device_id=%s&sess_config=0x", deviceid)
	// serialize TF config proto as hex
	for i := len(bytes) - 1; i >= 0; i-- {
		sessConfigOpt += hex.EncodeToString(bytes[i : i+1])
	}
	return sessConfigOpt
}

func ffmpegStrEscape(origStr string) string {
	tmpStr := strings.ReplaceAll(origStr, "\\", "\\\\")
	outStr := strings.ReplaceAll(tmpStr, ":", "\\:")
	return outStr
}

func hwScale() string {
	if runtime.GOOS == "windows" {
		// we don't build windows binaries with CUDA SDK, so need to use scale_cuda instead of scale_npp
		return "scale_cuda"
	} else {
		return "scale_npp"
	}
}

func hwScaleAlgo() string {
	if runtime.GOOS == "windows" {
		// we don't build windows binaries with CUDA SDK, so need to use the default scale algorithm
		return ""
	} else {
		return "super"
	}
}

func FfmpegSetLogLevel(level int) {
	C.av_log_set_level(C.int(level))
}

func FfmpegGetLogLevel() int {
	return int(C.av_log_get_level())
}