Files
monibuca/plugin/mp4/pkg/track.go
T
2024-09-14 13:52:24 +08:00

507 lines
14 KiB
Go

package mp4
import (
"io"
. "m7s.live/m7s/v5/plugin/mp4/pkg/box"
)
type (
Track struct {
Cid MP4_CODEC_TYPE
TrackId uint32
SampleTable
Duration uint32
Height uint32
Width uint32
SampleRate uint32
SampleSize uint16
SampleCount uint32
ChannelCount uint8
Timescale uint32
StartDts uint64
EndDts uint64
StartPts uint64
EndPts uint64
Samplelist []Sample
ELST *EditListBox
ExtraData []byte
writer io.WriteSeeker
fragments []Fragment
defaultSize uint32
defaultDuration uint32
defaultSampleFlags uint32
baseDataOffset uint64
//for subsample
defaultIsProtected uint8
defaultPerSampleIVSize uint8
defaultCryptByteBlock uint8
defaultSkipByteBlock uint8
defaultConstantIV []byte
defaultKID [16]byte
lastSeig *SeigSampleGroupEntry
lastSaiz *SaizBox
subSamples []SencEntry
}
Fragment struct {
Offset uint64
Duration uint32
FirstDts uint64
FirstPts uint64
LastPts uint64
LastDts uint64
}
)
func (track *Track) makeElstBox() []byte {
delay := track.Samplelist[0].PTS * 1000 / uint64(track.Timescale)
entryCount := 1
version := byte(0)
boxSize := 12
entrySize := 12
if delay > 0xFFFFFFFF {
version = 1
entrySize = 20
}
// if delay > 0 {
// entryCount += 1
// }
boxSize += 4 + entrySize*entryCount
elst := NewEditListBox(version)
elst.Entrys = make([]ELSTEntry, entryCount)
// if entryCount > 1 {
// elst.entrys.entrys[0].segmentDuration = startCt
// elst.entrys.entrys[0].mediaTime = -1
// elst.entrys.entrys[0].mediaRateInteger = 0x0001
// elst.entrys.entrys[0].mediaRateFraction = 0
// }
//简单起见,mediaTime先固定为0,即不延迟播放
elst.Entrys[entryCount-1].SegmentDuration = uint64(track.Duration)
elst.Entrys[entryCount-1].MediaTime = 0
elst.Entrys[entryCount-1].MediaRateInteger = 0x0001
elst.Entrys[entryCount-1].MediaRateFraction = 0
_, boxdata := elst.Encode(boxSize)
return boxdata
}
func (track *Track) Seek(dts uint64) int {
for i, sample := range track.Samplelist {
if sample.DTS*1000/uint64(track.Timescale) < dts {
continue
} else if track.Cid.IsVideo() {
if sample.KeyFrame {
return i
}
} else {
return i
}
}
return -1
}
func (track *Track) makeEdtsBox() []byte {
elst := track.makeElstBox()
edts := BasicBox{Type: TypeEDTS, Size: 8 + uint64(len(elst))}
offset, edtsbox := edts.Encode()
copy(edtsbox[offset:], elst)
return edtsbox
}
func (track *Track) AddSampleEntry(entry Sample) {
if len(track.Samplelist) <= 1 {
track.Duration = 0
} else {
delta := int64(entry.DTS - track.Samplelist[len(track.Samplelist)-1].DTS)
if delta < 0 {
track.Duration += 1
} else {
track.Duration += uint32(delta)
}
}
track.Samplelist = append(track.Samplelist, entry)
}
func (track *Track) makeTkhdBox() []byte {
tkhd := NewTrackHeaderBox()
tkhd.Duration = uint64(track.Duration)
tkhd.Track_ID = track.TrackId
if track.Cid == MP4_CODEC_AAC || track.Cid == MP4_CODEC_G711A || track.Cid == MP4_CODEC_G711U || track.Cid == MP4_CODEC_OPUS {
tkhd.Volume = 0x0100
} else {
tkhd.Width = track.Width << 16
tkhd.Height = track.Height << 16
}
_, tkhdbox := tkhd.Encode()
return tkhdbox
}
func (track *Track) makeMinfBox() []byte {
var mhdbox []byte
switch track.Cid {
case MP4_CODEC_H264, MP4_CODEC_H265:
mhdbox = MakeVmhdBox()
case MP4_CODEC_G711A, MP4_CODEC_G711U, MP4_CODEC_AAC,
MP4_CODEC_MP2, MP4_CODEC_MP3, MP4_CODEC_OPUS:
mhdbox = MakeSmhdBox()
default:
panic("unsupport codec id")
}
dinfbox := MakeDefaultDinfBox()
stblbox := track.makeStblBox()
minf := BasicBox{Type: TypeMINF, Size: 8 + uint64(len(mhdbox)+len(dinfbox)+len(stblbox))}
offset, minfbox := minf.Encode()
copy(minfbox[offset:], mhdbox)
offset += len(mhdbox)
copy(minfbox[offset:], dinfbox)
offset += len(dinfbox)
copy(minfbox[offset:], stblbox)
offset += len(stblbox)
return minfbox
}
func (track *Track) makeMdiaBox() []byte {
mdhdbox := MakeMdhdBox(track.Duration)
hdlrbox := MakeHdlrBox(GetHandlerType(track.Cid))
minfbox := track.makeMinfBox()
mdia := BasicBox{Type: TypeMDIA, Size: 8 + uint64(len(mdhdbox)+len(hdlrbox)+len(minfbox))}
offset, mdiabox := mdia.Encode()
copy(mdiabox[offset:], mdhdbox)
offset += len(mdhdbox)
copy(mdiabox[offset:], hdlrbox)
offset += len(hdlrbox)
copy(mdiabox[offset:], minfbox)
offset += len(minfbox)
return mdiabox
}
func (track *Track) makeStblBox() []byte {
var stsdbox, sttsbox, cttsbox, stscbox, stszbox, stcobox, stssbox []byte
stsdbox = track.makeStsd(GetHandlerType(track.Cid))
if track.SampleTable.STTS != nil {
_, sttsbox = track.SampleTable.STTS.Encode()
}
if track.SampleTable.CTTS != nil {
_, cttsbox = track.SampleTable.CTTS.Encode()
}
if track.SampleTable.STSC != nil {
_, stscbox = track.SampleTable.STSC.Encode()
}
if track.SampleTable.STSZ != nil {
_, stszbox = track.SampleTable.STSZ.Encode()
}
if track.SampleTable.STCO != nil {
_, stcobox = track.SampleTable.STCO.Encode()
}
if track.Cid == MP4_CODEC_H264 || track.Cid == MP4_CODEC_H265 {
stssbox = track.makeStssBox()
}
stbl := BasicBox{Type: TypeSTBL, Size: uint64(8 + len(stsdbox) + len(sttsbox) + len(cttsbox) + len(stscbox) + len(stszbox) + len(stcobox) + len(stssbox))}
offset, stblbox := stbl.Encode()
copy(stblbox[offset:], stsdbox)
offset += len(stsdbox)
copy(stblbox[offset:], sttsbox)
offset += len(sttsbox)
copy(stblbox[offset:], cttsbox)
offset += len(cttsbox)
copy(stblbox[offset:], stscbox)
offset += len(stscbox)
copy(stblbox[offset:], stszbox)
offset += len(stszbox)
copy(stblbox[offset:], stcobox)
offset += len(stcobox)
copy(stblbox[offset:], stssbox)
offset += len(stssbox)
return stblbox
}
func (track *Track) makeStsd(handler_type HandlerType) []byte {
var avbox []byte
if track.Cid == MP4_CODEC_H264 {
avbox = MakeAvcCBox(track.ExtraData)
} else if track.Cid == MP4_CODEC_H265 {
avbox = MakeHvcCBox(track.ExtraData)
} else if track.Cid == MP4_CODEC_AAC || track.Cid == MP4_CODEC_MP2 || track.Cid == MP4_CODEC_MP3 {
avbox = MakeEsdsBox(track.TrackId, track.Cid, track.ExtraData)
} else if track.Cid == MP4_CODEC_OPUS {
avbox = MakeOpusSpecificBox(track.ExtraData)
}
var se []byte
var offset int
if handler_type == TypeVIDE {
entry := NewVisualSampleEntry(GetCodecNameWithCodecId(track.Cid))
entry.Width = uint16(track.Width)
entry.Height = uint16(track.Height)
offset, se = entry.Encode(entry.Size() + uint64(len(avbox)))
} else if handler_type == TypeSOUN {
entry := NewAudioSampleEntry(GetCodecNameWithCodecId(track.Cid))
entry.ChannelCount = uint16(track.ChannelCount)
entry.Samplerate = track.SampleRate
entry.SampleSize = track.SampleSize
offset, se = entry.Encode(entry.Size() + uint64(len(avbox)))
}
copy(se[offset:], avbox)
var stsd SampleDescriptionBox = 1
offset2, stsdbox := stsd.Encode(FullBoxLen + 4 + uint64(len(se)))
copy(stsdbox[offset2:], se)
return stsdbox
}
// fmp4
func (track *Track) makeTraf(moofOffset int64, moofSize int64) []byte {
tfhd := track.makeTfhdBox(uint64(moofOffset))
tfdt := track.makeTfdtBox()
trun := track.makeTrunBoxes(moofSize)
traf := BasicBox{Type: TypeTRAF, Size: 8 + uint64(len(tfhd)+len(tfdt)+len(trun))}
offset, boxData := traf.Encode()
copy(boxData[offset:], tfhd)
offset += len(tfhd)
copy(boxData[offset:], tfdt)
offset += len(tfdt)
copy(boxData[offset:], trun)
offset += len(trun)
return boxData
}
func (track *Track) makeTfhdBox(offset uint64) []byte {
tfFlags := TF_FLAG_SAMPLE_DESCRIPTION_INDEX_PRESENT
tfFlags |= TF_FLAG_DEAAULT_BASE_IS_MOOF
tfhd := NewTrackFragmentHeaderBox(track.TrackId)
tfhd.BaseDataOffset = offset
if len(track.Samplelist) > 1 {
tfhd.DefaultSampleDuration = uint32(track.Samplelist[1].DTS - track.Samplelist[0].DTS)
} else if len(track.Samplelist) == 1 && len(track.fragments) > 0 {
tfhd.DefaultSampleDuration = uint32(track.Samplelist[0].DTS - track.fragments[len(track.fragments)-1].LastDts)
} else {
tfhd.DefaultSampleDuration = 0
tfFlags |= TF_FLAG_DURATION_IS_EMPTY
}
if len(track.Samplelist) > 0 {
tfFlags |= TF_FLAG_DEAAULT_SAMPLE_FLAGS_PRESENT
tfFlags |= TF_FLAG_DEFAULT_SAMPLE_DURATION_PRESENT
tfFlags |= TF_FLAG_DEFAULT_SAMPLE_SIZE_PRESENT
tfhd.DefaultSampleSize = uint32(track.Samplelist[0].Size)
} else {
tfhd.DefaultSampleSize = 0
}
//ffmpeg movenc.c mov_write_tfhd_tag
if track.Cid.IsVideo() {
tfhd.DefaultSampleFlags = MOV_FRAG_SAMPLE_FLAG_DEPENDS_YES | MOV_FRAG_SAMPLE_FLAG_IS_NON_SYNC
} else {
tfhd.DefaultSampleFlags = MOV_FRAG_SAMPLE_FLAG_DEPENDS_NO
}
track.defaultDuration = tfhd.DefaultSampleDuration
track.defaultSize = tfhd.DefaultSampleSize
track.defaultSampleFlags = tfhd.DefaultSampleFlags
_, boxData := tfhd.Encode(tfFlags)
return boxData
}
func (track *Track) makeTfdtBox() []byte {
tfdt := NewTrackFragmentBaseMediaDecodeTimeBox(uint64(track.Samplelist[0].DTS))
_, boxData := tfdt.Encode()
return boxData
}
func (track *Track) makeTrunBoxes(moofSize int64) []byte {
boxes := make([]byte, 0, 128)
start := 0
end := 0
for i := 1; i < len(track.Samplelist); i++ {
if track.Samplelist[i].Offset == track.Samplelist[i-1].Offset+int64(track.Samplelist[i-1].Size) {
continue
}
end = i
boxes = append(boxes, track.makeTrunBox(start, end, moofSize)...)
start = end
}
if start < len(track.Samplelist) {
boxes = append(boxes, track.makeTrunBox(start, len(track.Samplelist), moofSize)...)
}
return boxes
}
func (track *Track) makeStssBox() (boxdata []byte) {
var stss SyncSampleBox
for i, sample := range track.Samplelist {
if sample.KeyFrame {
stss = append(stss, uint32(i+1))
}
}
_, boxdata = stss.Encode()
return
}
func (track *Track) makeTfraBox() []byte {
tfra := NewTrackFragmentRandomAccessBox(track.TrackId)
tfra.LengthSizeOfSampleNum = 0
tfra.LengthSizeOfTrafNum = 0
tfra.LengthSizeOfTrunNum = 0
for _, f := range track.fragments {
tfra.FragEntrys = append(tfra.FragEntrys, FragEntry{
Time: f.FirstPts,
MoofOffset: f.Offset,
})
}
_, tfraData := tfra.Encode()
return tfraData
}
func (track *Track) makeTrunBox(start, end int, moofSize int64) []byte {
flag := TR_FLAG_DATA_OFFSET
if track.Cid.IsVideo() && track.Samplelist[start].KeyFrame {
flag |= TR_FLAG_DATA_FIRST_SAMPLE_FLAGS
}
for j := start; j < end; j++ {
if track.Samplelist[j].Size != int(track.defaultSize) {
flag |= TR_FLAG_DATA_SAMPLE_SIZE
}
if j+1 < end {
if track.Samplelist[j+1].DTS-track.Samplelist[j].DTS != uint64(track.defaultDuration) {
flag |= TR_FLAG_DATA_SAMPLE_DURATION
}
} else {
// if track.lastSample.DTS-track.Samplelist[j].DTS != uint64(track.defaultDuration) {
// flag |= TR_FLAG_DATA_SAMPLE_DURATION
// }
}
if track.Samplelist[j].PTS != track.Samplelist[j].DTS {
flag |= TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME
}
}
trun := NewTrackRunBox()
trun.SampleCount = uint32(end - start)
trun.Dataoffset = int32(moofSize + track.Samplelist[start].Offset)
trun.FirstSampleFlags = MOV_FRAG_SAMPLE_FLAG_DEPENDS_NO
for i := start; i < end; i++ {
sampleDuration := uint32(0)
if i == len(track.Samplelist)-1 {
sampleDuration = track.defaultDuration
} else {
sampleDuration = uint32(track.Samplelist[i+1].DTS - track.Samplelist[i].DTS)
}
entry := TrunEntry{
SampleDuration: sampleDuration,
SampleSize: uint32(track.Samplelist[i].Size),
SampleCompositionTimeOffset: uint32(track.Samplelist[i].PTS - track.Samplelist[i].DTS),
}
trun.EntryList = append(trun.EntryList, entry)
}
_, boxData := trun.Encode(flag)
return boxData
}
func (track *Track) makeStblTable() {
sameSize := true
movchunks := make([]movchunk, 0)
ckn := uint32(0)
var stts TimeToSampleBox
var ctts CompositionOffsetBox
var stco ChunkOffsetBox
for i, sample := range track.Samplelist {
sttsEntry := STTSEntry{SampleCount: 1, SampleDelta: 1}
cttsEntry := CTTSEntry{SampleCount: 1, SampleOffset: uint32(sample.PTS) - uint32(sample.DTS)}
if i == len(track.Samplelist)-1 {
stts = append(stts, sttsEntry)
} else {
var delta uint64 = 1
if track.Samplelist[i+1].PTS >= sample.PTS {
delta = track.Samplelist[i+1].PTS - sample.PTS
}
if len(stts) > 0 && delta == uint64(stts[len(stts)-1].SampleDelta) {
stts[len(stts)-1].SampleCount++
} else {
sttsEntry.SampleDelta = uint32(delta)
stts = append(stts, sttsEntry)
}
}
if len(ctts) == 0 {
ctts = append(ctts, cttsEntry)
} else {
if ctts[len(ctts)-1].SampleOffset == cttsEntry.SampleOffset {
ctts[len(ctts)-1].SampleCount++
} else {
ctts = append(ctts, cttsEntry)
}
}
if sameSize && i < len(track.Samplelist)-1 && track.Samplelist[i+1].Size != track.Samplelist[i].Size {
sameSize = false
}
if i > 0 && sample.Offset == track.Samplelist[i-1].Offset+int64(track.Samplelist[i-1].Size) {
movchunks[ckn-1].samplenum++
} else {
ck := movchunk{chunknum: ckn, samplenum: 1, chunkoffset: uint64(sample.Offset)}
movchunks = append(movchunks, ck)
stco = append(stco, uint64(sample.Offset))
ckn++
}
}
stsz := &SampleSizeBox{
SampleSize: 0,
SampleCount: uint32(len(track.Samplelist)),
}
if sameSize {
stsz.SampleSize = uint32(track.Samplelist[0].Size)
} else {
stsz.EntrySizelist = make([]uint32, stsz.SampleCount)
for i := 0; i < len(stsz.EntrySizelist); i++ {
stsz.EntrySizelist[i] = uint32(track.Samplelist[i].Size)
}
}
var stsc SampleToChunkBox
for i, chunk := range movchunks {
if i == 0 || chunk.samplenum != movchunks[i-1].samplenum {
stsc = append(stsc, STSCEntry{FirstChunk: chunk.chunknum + 1, SampleDescriptionIndex: 1, SamplesPerChunk: chunk.samplenum})
}
}
track.SampleTable.STTS = &stts
track.SampleTable.STSC = &stsc
track.SampleTable.STCO = &stco
track.SampleTable.STSZ = stsz
if track.Cid == MP4_CODEC_H264 || track.Cid == MP4_CODEC_H265 {
track.SampleTable.CTTS = &ctts
}
}
func (track *Track) makeSidxBox(totalSidxSize uint32, refsize uint32) []byte {
sidx := NewSegmentIndexBox()
sidx.ReferenceID = track.TrackId
sidx.TimeScale = track.Timescale
sidx.EarliestPresentationTime = track.StartPts
sidx.ReferenceCount = 1
sidx.FirstOffset = 52 + uint64(totalSidxSize)
entry := SidxEntry{
ReferenceType: 0,
ReferencedSize: refsize,
SubsegmentDuration: 0,
StartsWithSAP: 1,
SAPType: 0,
SAPDeltaTime: 0,
}
if len(track.Samplelist) > 0 {
entry.SubsegmentDuration = uint32(track.Samplelist[len(track.Samplelist)-1].DTS) - uint32(track.StartDts)
}
sidx.Entrys = append(sidx.Entrys, entry)
sidx.Box.Box.Size = sidx.Size()
_, boxData := sidx.Encode()
return boxData
}