diff --git a/ffmpeg/ffmpeg.go b/ffmpeg/ffmpeg.go index 04c6fb93bf..38410203c0 100755 --- a/ffmpeg/ffmpeg.go +++ b/ffmpeg/ffmpeg.go @@ -912,6 +912,14 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions) } } } + if format.Format == "mpegts" && format.Vcodec == "h264" { + if fixedPath, fixErr := FixMisplacedSEI(input.Fname); fixErr != nil { + glog.Warningf("SEI fix-up check failed for %s: %v", input.Fname, fixErr) + } else if fixedPath != input.Fname { + defer os.Remove(fixedPath) + input.Fname = fixedPath + } + } } hw_type, err := accelDeviceType(input.Accel) if err != nil { diff --git a/ffmpeg/sei_fixup.go b/ffmpeg/sei_fixup.go new file mode 100644 index 0000000000..1587b04106 --- /dev/null +++ b/ffmpeg/sei_fixup.go @@ -0,0 +1,341 @@ +package ffmpeg + +import ( + "io/ioutil" + "os" + "path/filepath" + + "github.com/livepeer/joy4/format/ts/tsio" +) + +const ( + tsPacketSize = 188 + invalidPID uint16 = 0x1fff +) + +type byteRange struct { + start int + end int +} + +type nalInfo struct { + start int + end int + typ uint8 +} + +// FixMisplacedSEI rewrites a TS segment into a temp file when SEI NAL units are +// found after VCL NAL units within an access unit. If no fix is needed, it +// returns the original input path. +func FixMisplacedSEI(inputPath string) (fixedPath string, err error) { + data, err := ioutil.ReadFile(inputPath) + if err != nil { + return "", err + } + fixedData, changed := fixSEIOrder(data) + if !changed { + return inputPath, nil + } + + dir := filepath.Dir(inputPath) + tmp, err := ioutil.TempFile(dir, "sei-fixup-*.ts") + if err != nil { + return "", err + } + tmpPath := tmp.Name() + if _, err := tmp.Write(fixedData); err != nil { + tmp.Close() + os.Remove(tmpPath) + return "", err + } + if err := tmp.Close(); err != nil { + os.Remove(tmpPath) + return "", err + } + return tmpPath, nil +} + +func fixSEIOrder(data []byte) ([]byte, bool) { + if len(data) < tsPacketSize { + return data, false + } + videoPID := findVideoPID(data) + if videoPID == invalidPID { + return data, false + } + + result := make([]byte, len(data)) + copy(result, data) + + var allPayload []byteRange + inVideoPES := false + for off := 0; off+tsPacketSize <= len(data); off += tsPacketSize { + pkt := data[off : off+tsPacketSize] + pid, start, _, hdrlen, err := tsio.ParseTSHeader(pkt) + if err != nil || hdrlen >= tsPacketSize { + continue + } + if pid != videoPID { + continue + } + payloadStart := off + hdrlen + payloadEnd := off + tsPacketSize + + if start { + inVideoPES = false + payload := pkt[hdrlen:] + if len(payload) < 9 || payload[0] != 0 || payload[1] != 0 || payload[2] != 1 { + continue + } + pesHdrLen, streamid, _, _, _, err := tsio.ParsePESHeader(payload) + if err != nil || streamid < 0xe0 || streamid > 0xef { + continue + } + payloadStart += pesHdrLen + if payloadStart > payloadEnd { + // Header spans this packet; skip payload bytes from this packet. + continue + } + inVideoPES = true + } + + if !inVideoPES { + continue + } + if payloadStart < payloadEnd { + allPayload = append(allPayload, byteRange{start: payloadStart, end: payloadEnd}) + } + } + if len(allPayload) == 0 { + return result, false + } + return result, fixPES(result, result, allPayload) +} + +func findVideoPID(data []byte) uint16 { + pmtPID := invalidPID + for off := 0; off+tsPacketSize <= len(data); off += tsPacketSize { + pkt := data[off : off+tsPacketSize] + pid, start, _, hdrlen, err := tsio.ParseTSHeader(pkt) + if err != nil || !start || hdrlen >= tsPacketSize { + continue + } + if pid != tsio.PAT_PID { + continue + } + payload := pkt[hdrlen:] + tableid, _, psihdrlen, datalen, err := tsio.ParsePSI(payload) + if err != nil || tableid != tsio.TableIdPAT { + continue + } + end := psihdrlen + datalen + if end > len(payload) || datalen <= 0 { + continue + } + var pat tsio.PAT + if _, err := pat.Unmarshal(payload[psihdrlen:end]); err != nil { + continue + } + for _, e := range pat.Entries { + if e.ProgramMapPID != 0 { + pmtPID = e.ProgramMapPID + break + } + } + if pmtPID != invalidPID { + break + } + } + + if pmtPID != invalidPID { + for off := 0; off+tsPacketSize <= len(data); off += tsPacketSize { + pkt := data[off : off+tsPacketSize] + pid, start, _, hdrlen, err := tsio.ParseTSHeader(pkt) + if err != nil || !start || hdrlen >= tsPacketSize || pid != pmtPID { + continue + } + payload := pkt[hdrlen:] + tableid, _, psihdrlen, datalen, err := tsio.ParsePSI(payload) + if err != nil || tableid != tsio.TableIdPMT { + continue + } + end := psihdrlen + datalen + if end > len(payload) || datalen <= 0 { + continue + } + var pmt tsio.PMT + if _, err := pmt.Unmarshal(payload[psihdrlen:end]); err != nil { + continue + } + for _, es := range pmt.ElementaryStreamInfos { + if es.StreamType == tsio.ElementaryStreamTypeH264 { + return es.ElementaryPID + } + } + } + } + + // Fallback for truncated segments that may not include PAT/PMT. + for off := 0; off+tsPacketSize <= len(data); off += tsPacketSize { + pkt := data[off : off+tsPacketSize] + pid, start, _, hdrlen, err := tsio.ParseTSHeader(pkt) + if err != nil || !start || hdrlen >= tsPacketSize { + continue + } + payload := pkt[hdrlen:] + if len(payload) >= 4 && payload[0] == 0 && payload[1] == 0 && payload[2] == 1 { + if payload[3] >= 0xe0 && payload[3] <= 0xef { + return pid + } + } + } + return invalidPID +} + +func fixPES(orig, result []byte, ranges []byteRange) bool { + total := 0 + for _, r := range ranges { + if r.end > r.start { + total += r.end - r.start + } + } + if total == 0 { + return false + } + + es := make([]byte, 0, total) + for _, r := range ranges { + if r.end <= r.start || r.start < 0 || r.end > len(orig) { + return false + } + es = append(es, orig[r.start:r.end]...) + } + nals := scanNALs(es) + if len(nals) == 0 { + return false + } + + leading := es[:nals[0].start] + reordered := make([]byte, 0, len(es)) + reordered = append(reordered, leading...) + + var changed bool + appendSegment := func(seg []nalInfo) { + if len(seg) == 0 { + return + } + firstVCL := -1 + for i, n := range seg { + if n.typ >= 1 && n.typ <= 5 { + firstVCL = i + break + } + } + if firstVCL < 0 { + for _, n := range seg { + reordered = append(reordered, es[n.start:n.end]...) + } + return + } + misplacedSEI := false + for i := firstVCL + 1; i < len(seg); i++ { + if seg[i].typ == 6 { + misplacedSEI = true + break + } + } + if !misplacedSEI { + for _, n := range seg { + reordered = append(reordered, es[n.start:n.end]...) + } + return + } + + changed = true + for i := 0; i < firstVCL; i++ { + n := seg[i] + reordered = append(reordered, es[n.start:n.end]...) + } + for i := firstVCL; i < len(seg); i++ { + n := seg[i] + if n.typ == 6 { + reordered = append(reordered, es[n.start:n.end]...) + } + } + for i := firstVCL; i < len(seg); i++ { + n := seg[i] + if n.typ != 6 { + reordered = append(reordered, es[n.start:n.end]...) + } + } + } + + segStart := 0 + for i := 0; i <= len(nals); i++ { + segmentBoundary := i == len(nals) || (i > segStart && nals[i].typ == 9) + if !segmentBoundary { + continue + } + appendSegment(nals[segStart:i]) + segStart = i + } + if !changed { + return false + } + if len(reordered) != len(es) { + return false + } + + pos := 0 + for _, r := range ranges { + n := r.end - r.start + if n <= 0 { + continue + } + copy(result[r.start:r.end], reordered[pos:pos+n]) + pos += n + } + return pos == len(reordered) +} + +func scanNALs(es []byte) []nalInfo { + var nals []nalInfo + for pos := 0; pos < len(es); { + start, scLen := findStartCode(es, pos) + if start < 0 { + break + } + nextStart, _ := findStartCode(es, start+scLen) + end := len(es) + if nextStart >= 0 { + end = nextStart + } + if start+scLen < end { + nals = append(nals, nalInfo{ + start: start, + end: end, + typ: es[start+scLen] & 0x1f, + }) + } + if nextStart < 0 { + break + } + pos = nextStart + } + return nals +} + +func findStartCode(b []byte, from int) (int, int) { + for i := from; i+3 < len(b); i++ { + if b[i] != 0 || b[i+1] != 0 { + continue + } + if b[i+2] == 1 { + return i, 3 + } + if i+4 < len(b) && b[i+2] == 0 && b[i+3] == 1 { + return i, 4 + } + } + return -1, 0 +} diff --git a/ffmpeg/sei_fixup_test.go b/ffmpeg/sei_fixup_test.go new file mode 100644 index 0000000000..4a3a2f532e --- /dev/null +++ b/ffmpeg/sei_fixup_test.go @@ -0,0 +1,123 @@ +package ffmpeg + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestFixMisplacedSEI_BrokenFiles(t *testing.T) { + run, dir := setupTest(t) + defer os.RemoveAll(dir) + + run(` + cat <<- 'EOF1' > input-sei-order.out + Access Unit Delimiter + Slice Header + Supplemental Enhancement Information + Access Unit Delimiter + EOF1 + + cat <<- 'EOF2' > fixed-sei-order.out + Access Unit Delimiter + Supplemental Enhancement Information + Slice Header + Access Unit Delimiter + EOF2 + + # missing-dts.ts has a SEI pre-prended, check that's preserved + cat <<- 'EOF3' > leading-sei.out + Supplemental Enhancement Information + Access Unit Delimiter + Slice Header + Supplemental Enhancement Information + EOF3 + + # missing-dts.ts has a SEI pre-prended, check that's preserved + cat <<- 'EOF3' > fixed-leading-sei.out + Supplemental Enhancement Information + Access Unit Delimiter + Supplemental Enhancement Information + Slice Header + EOF3 + + `) + + for _, name := range []string{"skip_1.ts", "skip_3.ts", "missing-dts.ts"} { + t.Run(name, func(t *testing.T) { + input := dataFilePath(t, name) + if "missing-dts.ts" == name { + checkNALSequence(t, run, input, "leading-sei.out") + } else { + checkNALSequence(t, run, input, "input-sei-order.out") + } + + inputData, err := ioutil.ReadFile(input) + require.NoError(t, err) + + fixedPath, err := FixMisplacedSEI(input) + require.NoError(t, err) + require.NotEqual(t, input, fixedPath, "expected fix-up to trigger") + defer os.Remove(fixedPath) + + fixedData, err := ioutil.ReadFile(fixedPath) + require.NoError(t, err) + require.Equal(t, len(inputData), len(fixedData), "fix-up must preserve byte size") + if "missing-dts.ts" == name { + checkNALSequence(t, run, fixedPath, "fixed-leading-sei.out") + } else { + checkNALSequence(t, run, fixedPath, "fixed-sei-order.out") + } + }) + } +} + +func TestFixMisplacedSEI_NoChanges(t *testing.T) { + run, dir := setupTest(t) + defer os.RemoveAll(dir) + + run(` + # normally SEI comes before any picture data + cat <<- 'EOF1' > vertical-sei-order.out + Access Unit Delimiter + Supplemental Enhancement Information + Slice Header + Access Unit Delimiter + EOF1 + + # this sample should NOT have any SEI + ! ffmpeg -hide_banner -i "$1/../data/portrait.ts" -c copy -bsf:v trace_headers -f null - 2>&1 | grep "Supplemental Enhancement Information" + `) + + checkNALSequence(t, run, dataFilePath(t, "vertical-sample.ts"), "vertical-sei-order.out") + + for _, name := range []string{"vertical-sample.ts", "portrait.ts", "broken-h264-parser.ts"} { + t.Run(name, func(t *testing.T) { + input := dataFilePath(t, name) + fixedPath, err := FixMisplacedSEI(input) + require.NoError(t, err) + require.Equal(t, input, fixedPath, "known-good sample should pass through unchanged") + }) + } +} + +func dataFilePath(t *testing.T, name string) string { + t.Helper() + wd, err := os.Getwd() + require.NoError(t, err) + return path.Join(wd, "..", "data", name) +} + +func checkNALSequence(t *testing.T, run func(cmd string) bool, inputPath, expectedPath string) { + t.Helper() + cmd := fmt.Sprintf(` + ffmpeg -hide_banner -i "%s" -c copy -bsf:v trace_headers -f null - 2>&1 | grep -e 'Access Unit\|Slice Header\|Supplement' | head -4 > pre.raw + sed -E 's/^\[[^]]+\] //' pre.raw > pre.out + diff -u %s pre.out + `, inputPath, expectedPath) + require.True(t, run(cmd), "NAL ordering check failed for %s", inputPath) +}