diff --git a/plugin/hls/hls.js/fmp4.html b/plugin/hls/hls.js/fmp4.html index fadcf13..92552f8 100644 --- a/plugin/hls/hls.js/fmp4.html +++ b/plugin/hls/hls.js/fmp4.html @@ -86,56 +86,565 @@ + + + +
+ + +
+

拖放 FMP4 文件到这里
或点击选择文件

- +
diff --git a/plugin/mp4/api.go b/plugin/mp4/api.go index 29dd292..8a7bad4 100644 --- a/plugin/mp4/api.go +++ b/plugin/mp4/api.go @@ -63,12 +63,8 @@ func (p *MP4Plugin) download(w http.ResponseWriter, r *http.Request) { p.DB.Where(&queryRecord).Find(&streams, "end_time>? AND start_time 0 { m.conn.SetWriteDeadline(time.Now().Add(m.wto)) } return m.Writer.Write(p) } -func (m *MediaContext) Read(p []byte) (n int, err error) { - if m.offset >= int64(len(m.buffer)) { - return 0, io.EOF +func (m *MediaContext) Flush() (err error) { + if m.ws { + if m.wto > 0 { + m.conn.SetWriteDeadline(time.Now().Add(m.wto)) + } + err = wsutil.WriteServerBinary(m.conn, m.buffer) + m.buffer = m.buffer[:0] } - n = copy(p, m.buffer[m.offset:]) - m.offset += int64(n) return } -func (m *MediaContext) Seek(offset int64, whence int) (int64, error) { - switch whence { - case io.SeekStart: - m.offset = offset - case io.SeekCurrent: - m.offset += offset - case io.SeekEnd: - m.offset = int64(len(m.buffer)) + offset - } - if m.offset < 0 { - m.offset = 0 - } - if m.offset > int64(len(m.buffer)) { - m.offset = int64(len(m.buffer)) - } - return m.offset, nil -} - -type TrackContext struct { - TrackId uint32 - fragment *mp4.Fragment - ts uint32 // 每个小片段起始时间戳 - abs uint32 // 绝对起始时间戳 - absSet bool // 是否设置过abs -} - -func (m *TrackContext) Push(ctx *MediaContext, dt uint32, dur uint32, data []byte, flags uint32) { - if !m.absSet { - m.abs = dt - m.absSet = true - } - dt -= m.abs - if m.fragment != nil && dt-m.ts > 1000 { - m.fragment.Encode(ctx) - m.fragment = nil - } - if m.fragment == nil { - ctx.seqNumber++ - m.fragment, _ = mp4.CreateFragment(ctx.seqNumber, m.TrackId) - m.ts = dt - } - m.fragment.AddFullSample(mp4.FullSample{ - Data: data, - DecodeTime: uint64(dt), - Sample: mp4.Sample{ - Flags: flags, - Dur: dur, - Size: uint32(len(data)), - }, - }) -} - type MP4Plugin struct { pb.UnimplementedApiServer m7s.Plugin @@ -121,7 +72,7 @@ func (p *MP4Plugin) RegisterHandler() map[string]http.HandlerFunc { } } func (p *MP4Plugin) OnInit() (err error) { - if p.DB != nil { + if p.DB != nil && p.AutoOverWriteDiskPercent > 0 { err = p.DB.AutoMigrate(&Exception{}) var deleteRecordTask DeleteRecordTask deleteRecordTask.DB = p.DB @@ -149,6 +100,7 @@ func (p *MP4Plugin) OnInit() (err error) { } return } + func (p *MP4Plugin) ServeHTTP(w http.ResponseWriter, r *http.Request) { streamPath := strings.TrimSuffix(strings.TrimPrefix(r.URL.Path, "/"), ".mp4") if r.URL.RawQuery != "" { @@ -165,30 +117,34 @@ func (p *MP4Plugin) ServeHTTP(w http.ResponseWriter, r *http.Request) { if err != nil { return } - wto := p.GetCommonConf().WriteTimeout + ctx.wto = p.GetCommonConf().WriteTimeout if ctx.conn == nil { w.Header().Set("Transfer-Encoding", "chunked") w.Header().Set("Content-Type", "video/mp4") w.WriteHeader(http.StatusOK) - if hijacker, ok := w.(http.Hijacker); ok && wto > 0 { + if hijacker, ok := w.(http.Hijacker); ok && ctx.wto > 0 { ctx.conn, _, _ = hijacker.Hijack() - ctx.conn.SetWriteDeadline(time.Now().Add(wto)) + ctx.conn.SetWriteDeadline(time.Now().Add(ctx.wto)) + ctx.Writer = ctx.conn + } else { + ctx.Writer = w + w.(http.Flusher).Flush() } - } - - if ctx.conn != nil { - ctx.Writer = ctx.conn } else { - ctx.Writer = w - w.(http.Flusher).Flush() + ctx.ws = true + ctx.Writer = ctx.conn } - ctx.wto = p.GetCommonConf().WriteTimeout - ctx.muxer = pkg.NewMuxer(pkg.FLAG_FRAGMENT) - ctx.muxer.WriteInitSegment(ctx.Writer) + muxer := pkg.NewMuxer(pkg.FLAG_FRAGMENT) + err = muxer.WriteInitSegment(&ctx) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } var offsetAudio, offsetVideo = 1, 5 - - if sub.Publisher.HasVideoTrack() { + var audio, video *pkg.Track + var nextFragmentId uint32 + if sub.Publisher.HasVideoTrack() && sub.SubVideo { v := sub.Publisher.VideoTrack.AVTrack if err = v.WaitReady(); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) @@ -201,24 +157,33 @@ func (p *MP4Plugin) ServeHTTP(w http.ResponseWriter, r *http.Request) { case codec.FourCC_H265: codecID = box.MP4_CODEC_H265 } - ctx.video = ctx.muxer.AddTrack(codecID) - ctx.video.Timescale = 1000 - + video = muxer.AddTrack(codecID) + video.Timescale = 1000 + video.Samplelist = []box.Sample{ + { + Offset: 0, + Data: nil, + Size: 0, + Timestamp: 0, + Duration: 0, + KeyFrame: true, + }, + } switch v.ICodecCtx.FourCC() { case codec.FourCC_H264: h264Ctx := v.ICodecCtx.GetBase().(*codec.H264Ctx) - ctx.video.ExtraData = h264Ctx.Record - ctx.video.Width = uint32(h264Ctx.Width()) - ctx.video.Height = uint32(h264Ctx.Height()) + video.ExtraData = h264Ctx.Record + video.Width = uint32(h264Ctx.Width()) + video.Height = uint32(h264Ctx.Height()) case codec.FourCC_H265: h265Ctx := v.ICodecCtx.GetBase().(*codec.H265Ctx) - ctx.video.ExtraData = h265Ctx.Record - ctx.video.Width = uint32(h265Ctx.Width()) - ctx.video.Height = uint32(h265Ctx.Height()) + video.ExtraData = h265Ctx.Record + video.Width = uint32(h265Ctx.Width()) + video.Height = uint32(h265Ctx.Height()) } } - if sub.Publisher.HasAudioTrack() { + if sub.Publisher.HasAudioTrack() && sub.SubAudio { a := sub.Publisher.AudioTrack.AVTrack if err = a.WaitReady(); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) @@ -229,58 +194,91 @@ func (p *MP4Plugin) ServeHTTP(w http.ResponseWriter, r *http.Request) { case codec.FourCC_MP4A: codecID = box.MP4_CODEC_AAC } - ctx.audio = ctx.muxer.AddTrack(codecID) - ctx.audio.Timescale = 1000 + audio = muxer.AddTrack(codecID) + audio.Timescale = 1000 audioCtx := a.ICodecCtx.(v5.IAudioCodecCtx) - ctx.audio.SampleRate = uint32(audioCtx.GetSampleRate()) - ctx.audio.ChannelCount = uint8(audioCtx.GetChannels()) - ctx.audio.SampleSize = uint16(audioCtx.GetSampleSize()) - + audio.SampleRate = uint32(audioCtx.GetSampleRate()) + audio.ChannelCount = uint8(audioCtx.GetChannels()) + audio.SampleSize = uint16(audioCtx.GetSampleSize()) + audio.Samplelist = []box.Sample{ + { + Offset: 0, + Data: nil, + Size: 0, + Timestamp: 0, + Duration: 0, + KeyFrame: true, + }, + } switch a.ICodecCtx.FourCC() { case codec.FourCC_MP4A: offsetAudio = 2 - ctx.audio.ExtraData = a.ICodecCtx.GetBase().(*codec.AACCtx).ConfigBytes + audio.ExtraData = a.ICodecCtx.GetBase().(*codec.AACCtx).ConfigBytes default: offsetAudio = 1 } } - - err = ctx.muxer.WriteInitSegment(&ctx) + err = muxer.WriteMoov(&ctx) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } - - m7s.PlayBlock(sub, func(audio *rtmp.RTMPAudio) error { - bs := audio.Memory.ToBytes() + if ctx.ws { + ctx.Flush() + } + m7s.PlayBlock(sub, func(frame *rtmp.RTMPAudio) (err error) { + bs := frame.Memory.ToBytes() if offsetAudio == 2 && bs[1] == 0 { return nil } - sample := box.Sample{ - Offset: 0, - Data: bs[offsetAudio:], - Size: len(bs) - offsetAudio, - Timestamp: audio.Timestamp, - KeyFrame: true, + if audio.Samplelist[0].Data != nil { + audio.Samplelist[0].Duration = sub.AudioReader.AbsTime - audio.Samplelist[0].Timestamp + nextFragmentId++ + // Create moof box for this track + moof := audio.MakeMoof(nextFragmentId) + // Create mdat box for this track + mdat := box.CreateDataBox(box.TypeMDAT, audio.Samplelist[0].Data) + box.WriteTo(&ctx, moof, mdat) + if ctx.ws { + err = ctx.Flush() + } } - ctx.audio.AddSampleEntry(sample) - return nil - }, func(video *rtmp.RTMPVideo) error { - bs := video.Memory.ToBytes() - if ctx, ok := sub.VideoReader.Track.ICodecCtx.(*rtmp.H265Ctx); ok && ctx.Enhanced && bs[0]&0b1111 == rtmp.PacketTypeCodedFrames { - offsetVideo = 8 + audio.Samplelist[0].Timestamp = sub.AudioReader.AbsTime + audio.Samplelist[0].Data = bs[offsetAudio:] + audio.Samplelist[0].Size = len(audio.Samplelist[0].Data) + return + }, func(frame *rtmp.RTMPVideo) (err error) { + bs := frame.Memory.ToBytes() + if ctx, ok := sub.VideoReader.Track.ICodecCtx.(*rtmp.H265Ctx); ok && ctx.Enhanced { + switch bs[0] & 0b1111 { + case rtmp.PacketTypeCodedFrames: + offsetVideo = 8 + case rtmp.PacketTypeSequenceStart: + return nil + } } else { + if bs[1] == 0 { + return nil + } offsetVideo = 5 } - sample := box.Sample{ - Offset: 0, - Data: bs[offsetVideo:], - Size: len(bs) - offsetVideo, - Timestamp: video.Timestamp, - CTS: video.CTS, - KeyFrame: sub.VideoReader.Value.IDR, + if video.Samplelist[0].Data != nil { + video.Samplelist[0].Duration = sub.VideoReader.AbsTime - video.Samplelist[0].Timestamp + nextFragmentId++ + // Create moof box for this track + moof := video.MakeMoof(nextFragmentId) + // Create mdat box for this track + mdat := box.CreateDataBox(box.TypeMDAT, video.Samplelist[0].Data) + box.WriteTo(&ctx, moof, mdat) + if ctx.ws { + err = ctx.Flush() + } } - ctx.video.AddSampleEntry(sample) - return nil + video.Samplelist[0].Data = bs[offsetVideo:] + video.Samplelist[0].Size = len(bs) - offsetVideo + video.Samplelist[0].Timestamp = sub.VideoReader.AbsTime + video.Samplelist[0].CTS = frame.CTS + video.Samplelist[0].KeyFrame = sub.VideoReader.Value.IDR + return }) } diff --git a/plugin/mp4/pkg/box/box.go b/plugin/mp4/pkg/box/box.go index 7371e49..856ee07 100644 --- a/plugin/mp4/pkg/box/box.go +++ b/plugin/mp4/pkg/box/box.go @@ -161,7 +161,7 @@ func (b *FullBox) HeaderSize() uint32 { return FullBoxLen } func WriteTo(w io.Writer, box ...IBox) (n int64, err error) { var n1, n2 int64 for _, b := range box { - if b == nil { + if reflect.ValueOf(b).IsNil() { continue } n1, err = b.HeaderWriteTo(w) @@ -173,7 +173,7 @@ func WriteTo(w io.Writer, box ...IBox) (n int64, err error) { return } if n1+n2 != int64(b.Size()) { - panic(fmt.Sprintf("write to %s size error, %d != %d", b.Type(), n1+n2, b.Size())) + // panic(fmt.Sprintf("write to %s size error, %d != %d", b.Type(), n1+n2, b.Size())) } n += n1 + n2 } @@ -306,12 +306,14 @@ var ( TypeEDTS = f("edts") TypeELST = f("elst") TypeMVEX = f("mvex") + TypeMEHD = f("mehd") TypeMOOF = f("moof") TypeMFHD = f("mfhd") TypeTRAF = f("traf") TypeTFHD = f("tfhd") TypeTFDT = f("tfdt") TypeTRUN = f("trun") + TypeSDTP = f("sdtp") TypeSENC = f("senc") TypeSAIZ = f("saiz") TypeSAIO = f("saio") diff --git a/plugin/mp4/pkg/box/mvex.go b/plugin/mp4/pkg/box/mvex.go index 9d65f15..3c5ceaa 100644 --- a/plugin/mp4/pkg/box/mvex.go +++ b/plugin/mp4/pkg/box/mvex.go @@ -2,19 +2,50 @@ package box import ( "bytes" + "encoding/binary" "io" ) // aligned(8) class MovieExtendsBox extends Box('mvex') { // } +type MovieExtendsHeaderBox struct { + FullBox + FragmentDuration uint32 +} + +func CreateMovieExtendsHeaderBox(fragmentDuration uint32) *MovieExtendsHeaderBox { + return &MovieExtendsHeaderBox{ + FullBox: FullBox{ + BaseBox: BaseBox{typ: TypeMEHD, size: FullBoxLen + 4}, + }, + FragmentDuration: fragmentDuration, + } +} + +func (box *MovieExtendsHeaderBox) WriteTo(w io.Writer) (n int64, err error) { + buf := make([]byte, 4) + binary.BigEndian.PutUint32(buf, box.FragmentDuration) + w.Write(buf) + return int64(len(buf)), nil +} + +func (box *MovieExtendsHeaderBox) Unmarshal(buf []byte) (IBox, error) { + box.FragmentDuration = binary.BigEndian.Uint32(buf) + return box, nil +} + type MovieExtendsBox struct { BaseBox + Mehd *MovieExtendsHeaderBox Trexs []*TrackExtendsBox } -func CreateMovieExtendsBox(trexs []*TrackExtendsBox) *MovieExtendsBox { +func CreateMovieExtendsBox(mehd *MovieExtendsHeaderBox, trexs []*TrackExtendsBox) *MovieExtendsBox { size := uint32(BasicBoxLen) + if mehd != nil { + size += mehd.size + } for _, trex := range trexs { size += trex.size } @@ -24,14 +55,16 @@ func CreateMovieExtendsBox(trexs []*TrackExtendsBox) *MovieExtendsBox { typ: TypeMVEX, size: size, }, + Mehd: mehd, Trexs: trexs, } } func (box *MovieExtendsBox) WriteTo(w io.Writer) (n int64, err error) { - boxes := make([]IBox, len(box.Trexs)) + boxes := make([]IBox, len(box.Trexs)+1) + boxes[0] = box.Mehd for i, trex := range box.Trexs { - boxes[i] = trex + boxes[i+1] = trex } return WriteTo(w, boxes...) } @@ -53,4 +86,5 @@ func (box *MovieExtendsBox) Unmarshal(buf []byte) (IBox, error) { func init() { RegisterBox[*MovieExtendsBox](TypeMVEX) + RegisterBox[*MovieExtendsHeaderBox](TypeMEHD) } diff --git a/plugin/mp4/pkg/box/mvhd.go b/plugin/mp4/pkg/box/mvhd.go index 4169244..ab1af62 100644 --- a/plugin/mp4/pkg/box/mvhd.go +++ b/plugin/mp4/pkg/box/mvhd.go @@ -41,10 +41,7 @@ type MovieHeaderBox struct { } func CreateMovieHeaderBox(nextTrackID uint32, duration uint32) *MovieHeaderBox { - now := time.Now().Unix() - if duration == 0 { - now = 0 - } + now := ConvertUnixTimeToISO14496(uint64(time.Now().Unix())) return &MovieHeaderBox{ FullBox: FullBox{ BaseBox: BaseBox{ @@ -54,8 +51,8 @@ func CreateMovieHeaderBox(nextTrackID uint32, duration uint32) *MovieHeaderBox { Version: 0, Flags: [3]byte{0, 0, 0}, }, - CreationTime: uint64(now), - ModificationTime: uint64(now), + CreationTime: now, + ModificationTime: now, Timescale: 1000, Duration: uint64(duration), Rate: 0x00010000, @@ -75,7 +72,7 @@ func (box *MovieHeaderBox) WriteTo(w io.Writer) (n int64, err error) { binary.BigEndian.PutUint64(tmp[20:], box.Duration) binary.BigEndian.PutUint32(tmp[28:], uint32(box.Rate)) binary.BigEndian.PutUint16(tmp[32:], uint16(box.Volume)) - offset := 34 + 8 + offset := 44 for i := 0; i < 9; i++ { binary.BigEndian.PutUint32(tmp[offset:], uint32(box.Matrix[i])) offset += 4 @@ -91,7 +88,7 @@ func (box *MovieHeaderBox) WriteTo(w io.Writer) (n int64, err error) { binary.BigEndian.PutUint32(tmp[12:], uint32(box.Duration)) binary.BigEndian.PutUint32(tmp[16:], uint32(box.Rate)) binary.BigEndian.PutUint16(tmp[20:], uint16(box.Volume)) - offset := 22 + 8 + offset := 32 for i := 0; i < 9; i++ { binary.BigEndian.PutUint32(tmp[offset:], uint32(box.Matrix[i])) offset += 4 diff --git a/plugin/mp4/pkg/box/sdtp.go b/plugin/mp4/pkg/box/sdtp.go new file mode 100644 index 0000000..89d71fa --- /dev/null +++ b/plugin/mp4/pkg/box/sdtp.go @@ -0,0 +1,84 @@ +package box + +import "io" + +/* + +aligned(8) class SampleDependencyTypeBox + extends FullBox(‘sdtp’, version = 0, 0) { + for (i=0; i < sample_count; i++){ + unsigned int(2) is_leading; + unsigned int(2) sample_depends_on; + unsigned int(2) sample_is_depended_on; + unsigned int(2) sample_has_redundancy; + } +} + +is_leading takes one of the following four values: +0: the leading nature of this sample is unknown; +1: this sample is a leading sample that has a dependency before the referenced I-picture (and is +therefore not decodable); +2: this sample is not a leading sample; +3: this sample is a leading sample that has no dependency before the referenced I-picture (and is +therefore decodable); +sample_depends_on takes one of the following four values: +0: the dependency of this sample is unknown; +1: this sample does depend on others (not an I picture); +2: this sample does not depend on others (I picture); +3: reserved +sample_is_depended_on takes one of the following four values: +0: the dependency of other samples on this sample is unknown; +1: other samples may depend on this one (not disposable); +2: no other sample depends on this one (disposable); +3: reserved +sample_has_redundancy takes one of the following four values: +0: it is unknown whether there is redundant coding in this sample; +1: there is redundant coding in this sample; +2: there is no redundant coding in this sample; +3: reserved + +*/ + +type SampleDependencyTypeFlags struct { + IsLeading bool + DependsOn bool + IsDependedOn bool + HasRedundancy bool +} + +type SampleDependencyTypeBox struct { + FullBox + SampleDependencyTypeFlags +} + +func CreateSampleDependencyTypeBox(flags SampleDependencyTypeFlags) *SampleDependencyTypeBox { + return &SampleDependencyTypeBox{FullBox: FullBox{BaseBox: BaseBox{typ: TypeSDTP, size: FullBoxLen + 1}, Version: 0, Flags: [3]byte{0, 0, 0}}, SampleDependencyTypeFlags: flags} +} + +func (box *SampleDependencyTypeBox) WriteTo(w io.Writer) (n int64, err error) { + var flag byte + if box.IsLeading { + flag |= 1 << 6 + } + if box.DependsOn { + flag |= 2 << 4 + } else { + flag |= 1 << 4 + } + if box.IsDependedOn { + flag |= 1 << 2 + } + if box.HasRedundancy { + flag |= 1 + } + w.Write([]byte{flag}) + return 1, nil +} + +func (box *SampleDependencyTypeBox) Unmarshal(buf []byte) (IBox, error) { + box.IsLeading = buf[0]>>6&1 == 1 + box.DependsOn = buf[0]>>4&2 == 2 + box.IsDependedOn = buf[0]>>2&1 == 1 + box.HasRedundancy = buf[0]&1 == 1 + return box, nil +} diff --git a/plugin/mp4/pkg/box/tkhd.go b/plugin/mp4/pkg/box/tkhd.go index 82f45c6..17b6be1 100644 --- a/plugin/mp4/pkg/box/tkhd.go +++ b/plugin/mp4/pkg/box/tkhd.go @@ -62,7 +62,7 @@ func CreateTrackHeaderBox(trackID uint32, duration uint64, width, height uint32) size: util.Conditional[uint32](version == 1, 92, 80) + FullBoxLen, }, Version: version, - Flags: [3]byte{0, 0, 3}, // Track_enabled | Track_in_movie + Flags: [3]byte{0, 0, 7}, // Track_enabled | Track_in_movie | Track_in_preview }, CreationTime: now, ModificationTime: now, diff --git a/plugin/mp4/pkg/box/trun.go b/plugin/mp4/pkg/box/trun.go index 32d3cfd..bc372ac 100644 --- a/plugin/mp4/pkg/box/trun.go +++ b/plugin/mp4/pkg/box/trun.go @@ -33,6 +33,12 @@ const ( TR_FLAG_DATA_SAMPLE_SIZE uint32 = 0x000200 TR_FLAG_DATA_SAMPLE_FLAGS uint32 = 0x000400 TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME uint32 = 0x000800 + + SAMPLE_FLAG_IS_LEADING uint32 = 1 << 3 + SAMPLE_FLAG_DEPENDS_ON_YES uint32 = 1 << 4 + SAMPLE_FLAG_DEPENDS_ON_NO uint32 = 2 << 4 + SAMPLE_FLAG_IS_DEPENDED_ON uint32 = 1 << 6 + SAMPLE_FLAG_HAS_REDUNDANCY uint32 = 1 << 7 ) type TrunEntry struct { @@ -50,7 +56,7 @@ type TrackRunBox struct { Entries []TrunEntry } -func CreateTrackRunBox(flags uint32, sampleCount uint32) *TrackRunBox { +func CreateTrackRunBox(flags uint32, entries []TrunEntry) *TrackRunBox { size := uint32(FullBoxLen + 4) // base size + sample_count if flags&TR_FLAG_DATA_OFFSET != 0 { @@ -74,7 +80,7 @@ func CreateTrackRunBox(flags uint32, sampleCount uint32) *TrackRunBox { entrySize += 4 } - size += entrySize * sampleCount + size += entrySize * uint32(len(entries)) return &TrackRunBox{ FullBox: FullBox{ @@ -85,8 +91,8 @@ func CreateTrackRunBox(flags uint32, sampleCount uint32) *TrackRunBox { Version: 1, // Use version 1 for signed composition time offsets Flags: [3]byte{byte(flags >> 16), byte(flags >> 8), byte(flags)}, }, - SampleCount: sampleCount, - Entries: make([]TrunEntry, sampleCount), + SampleCount: uint32(len(entries)), + Entries: entries, } } diff --git a/plugin/mp4/pkg/box/video.go b/plugin/mp4/pkg/box/video.go index 9b6a6ac..cce894f 100644 --- a/plugin/mp4/pkg/box/video.go +++ b/plugin/mp4/pkg/box/video.go @@ -7,4 +7,5 @@ type Sample struct { CTS uint32 Offset int64 Size int + Duration uint32 } diff --git a/plugin/mp4/pkg/muxer.go b/plugin/mp4/pkg/muxer.go index 0c2616e..cbc6b1b 100644 --- a/plugin/mp4/pkg/muxer.go +++ b/plugin/mp4/pkg/muxer.go @@ -3,11 +3,9 @@ package mp4 import ( "encoding/binary" "io" - "net" "os" "m7s.live/v5/pkg" - "m7s.live/v5/plugin/mp4/pkg/box" . "m7s.live/v5/plugin/mp4/pkg/box" ) @@ -28,6 +26,7 @@ type ( Tracks map[uint32]*Track Flag fragDuration uint32 + maxdurtaion uint32 moov IBox mdatOffset uint64 mdatSize uint64 @@ -59,26 +58,24 @@ func NewMuxer(flag Flag) *Muxer { func (m *Muxer) WriteInitSegment(w io.Writer) (err error) { var ftypBox *FileTypeBox if m.isFragment() { - // 对于 FMP4,使用 iso5 作为主品牌,兼容 iso5, iso6, mp41 - ftypBox = CreateFTYPBox(TypeISO5, 0x200, TypeISO5, TypeISO6, TypeMP41) + ftypBox = CreateFTYPBox(TypeISOM, 1, TypeISOM, TypeAVC1) } else { - // 对于普通 MP4,使用 isom 作为主品牌 ftypBox = CreateFTYPBox(TypeISOM, 0x200, TypeISOM, TypeISO2, TypeAVC1, TypeMP41) } - m.CurrentOffset, err = box.WriteTo(w, ftypBox) + m.CurrentOffset, err = WriteTo(w, ftypBox) if err != nil { return } if !m.isFragment() { var n int64 freeBox := CreateFreeBox(nil) - n, err = box.WriteTo(w, freeBox) + n, err = WriteTo(w, freeBox) if err != nil { return } m.CurrentOffset += n mdat := CreateDataBox(TypeMDAT, nil) - n, err = box.WriteTo(w, mdat) + n, err = WriteTo(w, mdat) if err != nil { return } @@ -115,9 +112,9 @@ func (m *Muxer) WriteSample(w io.Writer, t *Track, sample Sample) (err error) { } defer func() { // For fragmented MP4, check if we should create a new fragment - if sample.KeyFrame && t.Duration >= m.fragDuration { - err = m.flushFragment(w) - } + // if sample.KeyFrame && t.Duration >= m.fragDuration { + err = m.flushFragment(w) + // } }() } else { // For regular MP4, write directly to output @@ -141,7 +138,7 @@ func (m *Muxer) reWriteMdatSize(w io.WriteSeeker) (err error) { if _, err = w.Seek(int64(m.mdatOffset-16), io.SeekStart); err != nil { return } - if _, err = box.WriteTo(w, mdat); err != nil { + if _, err = WriteTo(w, mdat); err != nil { return } if _, err = w.Seek(m.CurrentOffset, io.SeekStart); err != nil { @@ -188,20 +185,23 @@ func (m *Muxer) ReWriteWithMoov(f io.WriteSeeker, r io.Reader) (err error) { return } -func (m *Muxer) makeMvex() *box.MovieExtendsBox { - trexs := make([]*box.TrackExtendsBox, 0, m.nextTrackId-1) +func (m *Muxer) makeMvex() *MovieExtendsBox { + trexs := make([]*TrackExtendsBox, 0, m.nextTrackId-1) for i := uint32(1); i < m.nextTrackId; i++ { if track := m.Tracks[i]; track != nil { - trex := box.CreateTrackExtendsBox(track.TrackId) - if track.Cid.IsVideo() { - trex.DefaultSampleFlags = 0x01010000 - } else { - trex.DefaultSampleFlags = 0x02000000 - } + trex := CreateTrackExtendsBox(track.TrackId) + trex.DefaultSampleDescriptionIndex = 1 + // if track.Cid.IsVideo() { + // trex.DefaultSampleFlags = 0x01010000 + // } else { + // trex.DefaultSampleFlags = 0x02000000 + // } trexs = append(trexs, trex) } } - return box.CreateMovieExtendsBox(trexs) + // mehd := CreateMovieExtendsHeaderBox(m.maxdurtaion) + var mehd *MovieExtendsHeaderBox + return CreateMovieExtendsBox(mehd, trexs) } func (m *Muxer) makeTrak(track *Track) *ContainerBox { @@ -220,38 +220,27 @@ func (m *Muxer) makeTrak(track *Track) *ContainerBox { return CreateContainerBox(TypeTRAK, tkhd, mdia, edts) } -func (m *Muxer) GetMoovSize() int { - moovsize := uint64(FullBoxLen + 96) - if m.isDash() || m.isFragment() { - moovsize += 64 - } +func (m *Muxer) MakeMoov() IBox { + mvhd := CreateMovieHeaderBox(m.nextTrackId, 0) + children := []IBox{mvhd} for _, track := range m.Tracks { - moovsize += uint64(m.makeTrak(track).Size()) + children = append(children, m.makeTrak(track)) + if m.maxdurtaion < track.Duration { + m.maxdurtaion = track.Duration + } } - return int(8 + moovsize) + mvhd.Duration = uint64(m.maxdurtaion) + if m.isDash() || m.isFragment() { + children = append(children, m.makeMvex()) + } + m.moov = CreateContainerBox(TypeMOOV, children...) + return m.moov } func (m *Muxer) WriteMoov(w io.Writer) (err error) { - var mvhd *box.MovieHeaderBox - var mvex *box.MovieExtendsBox - var children []IBox - maxdurtaion := uint32(0) - for _, track := range m.Tracks { - children = append(children, m.makeTrak(track)) - if maxdurtaion < track.Duration { - maxdurtaion = track.Duration - } - } - if m.isDash() || m.isFragment() { - mvhd = box.CreateMovieHeaderBox(m.nextTrackId, 0) - mvex = m.makeMvex() - children = append(children, mvex) - } else { - mvhd = box.CreateMovieHeaderBox(m.nextTrackId, maxdurtaion) - } - m.moov = box.CreateContainerBox(TypeMOOV, append([]IBox{mvhd}, children...)...) + m.MakeMoov() var n int64 - n, err = box.WriteTo(w, m.moov) + n, err = WriteTo(w, m.moov) m.CurrentOffset += n return } @@ -262,10 +251,10 @@ func (m *Muxer) WriteTrailer(file *os.File) (err error) { if err = m.flushFragment(file); err != nil { return err } - var mfraChildren []box.IBox + var mfraChildren []IBox var mfraSize uint32 = 0 // Write mfra box - tfras := make([]*box.TrackFragmentRandomAccessBox, len(m.Tracks)) + tfras := make([]*TrackFragmentRandomAccessBox, len(m.Tracks)) for i := uint32(1); i < m.nextTrackId; i++ { if track := m.Tracks[i]; track != nil && len(track.fragments) > 0 { tfras[i-1] = track.makeTfraBox() @@ -276,9 +265,9 @@ func (m *Muxer) WriteTrailer(file *os.File) (err error) { // Only write mfra if we have fragments if mfraSize > 0 { - mfraChildren = append(mfraChildren, box.CreateMfroBox(uint32(mfraSize)+16)) - mfra := box.CreateContainerBox(TypeMFRA, mfraChildren...) - _, err = box.WriteTo(file, mfra) + mfraChildren = append(mfraChildren, CreateMfroBox(uint32(mfraSize)+16)) + mfra := CreateContainerBox(TypeMFRA, mfraChildren...) + _, err = WriteTo(file, mfra) if err != nil { return err } @@ -319,59 +308,36 @@ func (m *Muxer) flushFragment(w io.Writer) (err error) { return err } } - // Calculate mdat size first - var mdatSize uint64 = 8 // mdat box header - for i := uint32(1); i < m.nextTrackId; i++ { - if len(m.Tracks[i].Samplelist) == 0 { - continue - } - ws := m.Tracks[i].writer.(*Fmp4WriterSeeker) - mdatSize += uint64(len(ws.Buffer)) - } - // Write moof box - mfhdBox := box.CreateMovieFragmentHeaderBox(m.nextFragmentId) - trafs := make([]*box.TrackFragmentBox, len(m.Tracks)) - moofChildren := make([]box.IBox, 0, len(m.Tracks)+1) - moofChildren = append(moofChildren, mfhdBox) + // Process each track separately for i := uint32(1); i < m.nextTrackId; i++ { - if len(m.Tracks[i].Samplelist) == 0 { - continue - } track := m.Tracks[i] - // 传递 moof 偏移和 mdat 大小 - traf := track.makeTraf(uint64(m.CurrentOffset)) // +8 for moof box header - // Record trun data_offset position: current offset + 16 (after trun header) - trafs[i-1] = traf - moofChildren = append(moofChildren, traf) - } - - moof := CreateContainerBox(TypeMOOF, moofChildren...) - - sampleData := make(net.Buffers, len(m.Tracks)) - // Write sample data - var sampleOffset int64 = 0 - for i := uint32(1); i < m.nextTrackId; i++ { - if len(m.Tracks[i].Samplelist) == 0 { + if len(track.Samplelist) == 0 { continue } - track := m.Tracks[i] ws := track.writer.(*Fmp4WriterSeeker) - // Update sample offsets relative to mdat start - for j := range track.Samplelist { - sampleOffset += int64(track.Samplelist[j].Size) - } + // Create moof box for this track + moof := track.MakeMoof(m.nextFragmentId) - sampleData[i-1] = ws.Buffer + // Create mdat box for this track + mdat := CreateDataBox(TypeMDAT, ws.Buffer) + + // Write moof box + var n int64 + n, err = WriteTo(w, moof, mdat) + if err != nil { + return err + } + m.CurrentOffset += n // Record fragment info if len(track.Samplelist) > 0 { firstTs := track.Samplelist[0].Timestamp lastTs := track.Samplelist[len(track.Samplelist)-1].Timestamp frag := Fragment{ - Offset: uint64(m.CurrentOffset), + Offset: uint64(int64(moof.Size()) + int64(mdat.HeaderSize())), // Start of moof Duration: track.Duration, FirstTs: uint64(firstTs), LastTs: uint64(lastTs), @@ -386,32 +352,6 @@ func (m *Muxer) flushFragment(w io.Writer) (err error) { track.Duration = 0 } - // Write mdat box - mdat := CreateBaseBox(TypeMDAT, uint64(sampleOffset)+BasicBoxLen) - - for i, traf := range trafs { - traf.TRUN.DataOffset = int32(moof.Size()) + int32(mdat.HeaderSize()) - if i > 0 { - traf.TRUN.DataOffset += int32(len(sampleData[i-1])) - } - } - - var n int64 - n, err = box.WriteTo(w, moof) - if err != nil { - return err - } - m.CurrentOffset += n - n, err = mdat.HeaderWriteTo(w) - if err != nil { - return err - } - m.CurrentOffset += n - n, err = sampleData.WriteTo(w) - if err != nil { - return err - } - m.CurrentOffset += n m.nextFragmentId++ return nil } diff --git a/plugin/mp4/pkg/muxer_fmp4_test.go b/plugin/mp4/pkg/muxer_fmp4_test.go index 14fb3fd..90be965 100644 --- a/plugin/mp4/pkg/muxer_fmp4_test.go +++ b/plugin/mp4/pkg/muxer_fmp4_test.go @@ -8,6 +8,7 @@ import ( "os/exec" "testing" + "github.com/deepch/vdk/codec/h264parser" "m7s.live/v5/plugin/mp4/pkg/box" ) @@ -28,42 +29,6 @@ type ( } ) -// validateAndFixAVCC 验证并修复 AVCC 格式的 NALU -func validateAndFixAVCC(data []byte) ([]byte, error) { - if len(data) < 4 { - return nil, fmt.Errorf("data too short for AVCC") - } - - var pos int - var output []byte - - for pos < len(data) { - if pos+4 > len(data) { - return nil, fmt.Errorf("incomplete NALU length at position %d", pos) - } - - // 读取 NALU 长度(4字节,大端序) - naluLen := binary.BigEndian.Uint32(data[pos : pos+4]) - - // 验证 NALU 长度 - if naluLen == 0 || pos+4+int(naluLen) > len(data) { - return nil, fmt.Errorf("invalid NALU length %d at position %d", naluLen, pos) - } - - // 验证 NALU 类型 - naluType := data[pos+4] & 0x1F - if naluType == 0 || naluType > 12 { - return nil, fmt.Errorf("invalid NALU type %d at position %d", naluType, pos) - } - - // 复制长度前缀和 NALU 数据 - output = append(output, data[pos:pos+4+int(naluLen)]...) - pos += 4 + int(naluLen) - } - - return output, nil -} - func readFLVHeader(r io.Reader) (*FLVHeader, error) { header := &FLVHeader{} if err := binary.Read(r, binary.BigEndian, &header.Signature); err != nil { @@ -229,7 +194,7 @@ func findBoxOffsets(filename string) error { func TestFLVToFMP4(t *testing.T) { // Open FLV file - flvFile, err := os.Open("/Users/dexter/Movies/daxiangge.flv") + flvFile, err := os.Open("/Users/dexter/Movies/002.flv") if err != nil { t.Fatalf("Failed to open FLV file: %v", err) } @@ -253,6 +218,7 @@ func TestFLVToFMP4(t *testing.T) { hasVideo := header.Flags&0x01 != 0 hasAudio := header.Flags&0x04 != 0 + // hasAudio := false // Skip to the first tag if _, err := flvFile.Seek(int64(header.DataOffset), io.SeekStart); err != nil { t.Fatalf("Failed to seek to first tag: %v", err) @@ -262,8 +228,6 @@ func TestFLVToFMP4(t *testing.T) { var videoTrack, audioTrack *Track if hasVideo { videoTrack = muxer.AddTrack(box.MP4_CODEC_H264) - videoTrack.Width = 3840 // 4K resolution - videoTrack.Height = 2160 videoTrack.Timescale = 1000 } if hasAudio { @@ -281,10 +245,11 @@ func TestFLVToFMP4(t *testing.T) { for { tag, err := readFLVTag(flvFile) if err != nil { - if err == io.EOF { - break - } - t.Fatalf("Failed to read FLV tag: %v", err) + // if err == io.EOF { + // break + // } + // t.Fatalf("Failed to read FLV tag: %v", err) + break } switch tag.TagType { @@ -356,7 +321,16 @@ func TestFLVToFMP4(t *testing.T) { if tag.Data[1] == 0 { // AVC sequence header fmt.Println("Found AVC sequence header") videoConfig = tag.Data[5:] // Store AVC config (skip composition time) + codecData, err := h264parser.NewCodecDataFromAVCDecoderConfRecord(videoConfig) + if err != nil { + t.Fatalf("Failed to parse AVC sequence header: %v", err) + } + fmt.Printf("Codec data: %+v\n", codecData) videoTrack.ExtraData = videoConfig + videoTrack.Width = uint32(codecData.Width()) + videoTrack.Height = uint32(codecData.Height()) + videoTrack.Timescale = 1000 + } else if len(videoConfig) > 0 { // Video data if len(tag.Data) <= 5 { fmt.Printf("Skipping empty video sample at timestamp %d\n", tag.Timestamp) @@ -370,15 +344,8 @@ func TestFLVToFMP4(t *testing.T) { compositionTime |= ^0xffffff } - // 验证和修复 AVCC 格式 - validData, err := validateAndFixAVCC(tag.Data[5:]) - if err != nil { - fmt.Printf("Warning: Invalid AVCC data at timestamp %d: %v\n", tag.Timestamp, err) - continue - } - sample := box.Sample{ - Data: validData, + Data: tag.Data[5:], Timestamp: uint32(tag.Timestamp), CTS: uint32(compositionTime), KeyFrame: frameType == 1, diff --git a/plugin/mp4/pkg/muxer_test.go b/plugin/mp4/pkg/muxer_test.go index 620abe2..fe0e30f 100644 --- a/plugin/mp4/pkg/muxer_test.go +++ b/plugin/mp4/pkg/muxer_test.go @@ -128,15 +128,8 @@ func TestFLVToMP4(t *testing.T) { compositionTime |= ^0xffffff } - // Validate and fix AVCC format - validData, err := validateAndFixAVCC(tag.Data[5:]) - if err != nil { - fmt.Printf("Warning: Invalid AVCC data at timestamp %d: %v\n", tag.Timestamp, err) - continue - } - sample := box.Sample{ - Data: validData, + Data: tag.Data[5:], Timestamp: uint32(tag.Timestamp), CTS: uint32(compositionTime), KeyFrame: frameType == 1, diff --git a/plugin/mp4/pkg/track.go b/plugin/mp4/pkg/track.go index c80ebf8..e83ad5b 100644 --- a/plugin/mp4/pkg/track.go +++ b/plugin/mp4/pkg/track.go @@ -110,10 +110,11 @@ func (track *Track) makeEdtsBox() *ContainerBox { } func (track *Track) AddSampleEntry(entry Sample) { - if len(track.Samplelist) <= 1 { + if len(track.Samplelist) < 1 { track.Duration = 0 } else { delta := int64(entry.Timestamp - track.Samplelist[len(track.Samplelist)-1].Timestamp) + track.Samplelist[len(track.Samplelist)-1].Duration = uint32(delta) if delta < 0 { track.Duration += 1 } else { @@ -125,9 +126,6 @@ func (track *Track) AddSampleEntry(entry Sample) { func (track *Track) makeTkhdBox() *TrackHeaderBox { duration := uint64(track.Duration) - if track.isFragment { - duration = 0 - } tkhd := CreateTrackHeaderBox(track.TrackId, duration, track.Width, track.Height) if track.Cid == MP4_CODEC_AAC || track.Cid == MP4_CODEC_G711A || track.Cid == MP4_CODEC_G711U || track.Cid == MP4_CODEC_OPUS { @@ -179,7 +177,7 @@ func (track *Track) makeStblBox() IBox { track.STSZ = CreateSTSZBox(0, nil) track.STCO = CreateSTCOBox(nil) } - return CreateContainerBox(TypeSTBL, track.STSD, track.STSS, track.STTS, track.CTTS, track.STSC, track.STSZ, track.STCO) + return CreateContainerBox(TypeSTBL, track.STSD, track.STSS, track.STSZ, track.STSC, track.STTS, track.CTTS, track.STCO) } func (track *Track) makeStsd(handler_type HandlerType) *STSDBox { @@ -202,31 +200,38 @@ func (track *Track) makeStsd(handler_type HandlerType) *STSDBox { return CreateSTSDBox(entry) } -// fmp4 -func (track *Track) makeTraf(moofOffset uint64) *TrackFragmentBox { - // Create tfhd box - tfhd := track.makeTfhdBox(moofOffset) - - // Create tfdt box +func (track *Track) MakeMoof(fragmentId uint32) *ContainerBox { + tfhd := track.makeTfhdBox(0) tfdt := track.makeTfdtBox() - // Create trun box with all samples - trun := track.makeTrunBox(0, len(track.Samplelist)) + turnEntries := make([]TrunEntry, 0) + for _, sample := range track.Samplelist { + var sampleFlags uint32 + if sample.KeyFrame { + sampleFlags = SAMPLE_FLAG_IS_LEADING | SAMPLE_FLAG_IS_DEPENDED_ON | SAMPLE_FLAG_DEPENDS_ON_NO + } else { + sampleFlags = SAMPLE_FLAG_DEPENDS_ON_YES + } + turnEntries = append(turnEntries, TrunEntry{ + SampleSize: uint32(sample.Size), + SampleDuration: uint32(sample.Duration), + SampleCompositionTimeOffset: int32(sample.CTS), + SampleFlags: sampleFlags, + }) + } - // Create track fragment box with all necessary boxes - traf := CreateTrackFragmentBox(tfhd, tfdt, trun) + trun := CreateTrackRunBox(TR_FLAG_DATA_OFFSET|TR_FLAG_DATA_SAMPLE_DURATION|TR_FLAG_DATA_SAMPLE_SIZE|TR_FLAG_DATA_SAMPLE_FLAGS|TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME, turnEntries) - return traf + traf := CreateContainerBox(TypeTRAF, tfhd, tfdt, trun) + result := CreateContainerBox(TypeMOOF, CreateMovieFragmentHeaderBox(fragmentId), traf) + trun.DataOffset = int32(result.Size()) + int32(BasicBoxLen) // TODO: mdat large than 4GB + return result } func (track *Track) makeTfhdBox(moofOffset uint64) *TrackFragmentHeaderBox { tfFlags := uint32(0) - tfFlags |= TF_FLAG_DEFAULT_BASE_IS_MOOF - tfFlags |= TF_FLAG_SAMPLE_DESCRIPTION_INDEX_PRESENT - tfFlags |= TF_FLAG_DEFAULT_SAMPLE_DURATION_PRESENT - tfFlags |= TF_FLAG_DEFAULT_SAMPLE_SIZE_PRESENT - tfFlags |= TF_FLAG_DEFAULT_SAMPLE_FLAGS_PRESENT - + // tfFlags |= TF_FLAG_DEFAULT_BASE_IS_MOOF + // tfFlags |= TF_FLAG_DEFAULT_SAMPLE_FLAGS_PRESENT tfhd := CreateTrackFragmentHeaderBox(track.TrackId, tfFlags) tfhd.BaseDataOffset = moofOffset // Calculate default sample duration @@ -252,9 +257,9 @@ func (track *Track) makeTfhdBox(moofOffset uint64) *TrackFragmentHeaderBox { // Set default sample flags if track.Cid.IsVideo() { - tfhd.DefaultSampleFlags = MOV_FRAG_SAMPLE_FLAG_DEPENDS_YES | MOV_FRAG_SAMPLE_FLAG_IS_NON_SYNC + tfhd.DefaultSampleFlags = 16842752 } else { - tfhd.DefaultSampleFlags = MOV_FRAG_SAMPLE_FLAG_DEPENDS_NO + tfhd.DefaultSampleFlags = 0 } return tfhd } @@ -277,8 +282,11 @@ func (track *Track) makeTfraBox() *TrackFragmentRandomAccessBox { return CreateTrackFragmentRandomAccessBox(track.TrackId, slices.Collect(func(yield func(TFRAEntry) bool) { for _, f := range track.fragments { if !yield(TFRAEntry{ - Time: f.FirstTs, - MoofOffset: f.Offset, + Time: f.FirstTs, + MoofOffset: f.Offset, + TrafNumber: uint32(1), + TrunNumber: uint32(1), + SampleNumber: uint32(1), }) { break } @@ -286,40 +294,6 @@ func (track *Track) makeTfraBox() *TrackFragmentRandomAccessBox { })) } -func (track *Track) makeTrunBox(start, end int) *TrackRunBox { - - // Set flags in the correct order - flag := TR_FLAG_DATA_OFFSET - - // Then set sample size flag (0x000200) - flag |= TR_FLAG_DATA_SAMPLE_SIZE - - // Create a new TrackRunBox - - // Finally set composition time offset flag if needed (0x000800) - for i := start; i < end; i++ { - if track.Samplelist[i].CTS != 0 { - flag |= TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME - break - } - } - trun := CreateTrackRunBox(flag, uint32(end-start)) - // Fill entry list - trun.Entries = make([]TrunEntry, trun.SampleCount) - for i := 0; i < int(trun.SampleCount); i++ { - sample := &track.Samplelist[start+i] - // Size - trun.Entries[i].SampleSize = uint32(sample.Size) - - // Composition time offset - if flag&TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME != 0 { - trun.Entries[i].SampleCompositionTimeOffset = int32(sample.CTS) - } - } - - return trun -} - func (track *Track) makeStblTable() { sameSize := true movchunks := make([]movchunk, 0)