fix: mp4 download

This commit is contained in:
langhuihui
2025-02-21 09:57:41 +08:00
parent 81a4d60a1e
commit e37b244cc9
14 changed files with 969 additions and 816 deletions
File diff suppressed because it is too large Load Diff
+14 -14
View File
@@ -63,12 +63,8 @@ func (p *MP4Plugin) download(w http.ResponseWriter, r *http.Request) {
p.DB.Where(&queryRecord).Find(&streams, "end_time>? AND start_time<? AND stream_path=?", startTime, endTime, streamPath)
muxer := mp4.NewMuxer(0)
ftyp := box.CreateFTYPBox(box.TypeISOM, 0x200, box.TypeISOM, box.TypeISO2, box.TypeAVC1, box.TypeMP41)
var n int64
n, err = box.WriteTo(w, ftyp)
if err != nil {
return
}
muxer.CurrentOffset = n
n := ftyp.Size()
muxer.CurrentOffset = int64(n)
var lastTs, tsOffset int64
var parts []*ContentPart
sampleOffset := muxer.CurrentOffset + box.BasicBoxLen*2
@@ -138,26 +134,30 @@ func (p *MP4Plugin) download(w http.ResponseWriter, r *http.Request) {
parts = append(parts, part)
}
}
moovSize := muxer.GetMoovSize()
moovSize := muxer.MakeMoov().Size()
for _, track := range muxer.Tracks {
for i := range track.Samplelist {
track.Samplelist[i].Offset += int64(moovSize)
}
}
err = muxer.WriteMoov(w)
if err != nil {
return
}
var mdatBox = box.CreateBaseBox(box.TypeMDAT, uint64(sampleOffset-mdatOffset)+box.BasicBoxLen)
newMoov := muxer.MakeMoov()
dataSize := uint64(sampleOffset - mdatOffset)
w.Header().Set("Content-Length", fmt.Sprintf("%d", dataSize+8+moovSize+ftyp.Size()))
mdatBox := box.CreateBaseBox(box.TypeMDAT, dataSize+box.BasicBoxLen)
var freeBox *box.FreeBox
if mdatBox.HeaderSize() == box.BasicBoxLen {
freeBox = box.CreateFreeBox(nil)
}
_, err = box.WriteTo(w, freeBox, mdatBox)
var written, totalWritten int64
totalWritten, err = box.WriteTo(w, ftyp, newMoov, freeBox, mdatBox)
if err != nil {
return
}
var written, totalWritten int64
for _, part := range parts {
part.Seek(part.Start, io.SeekStart)
written, err = io.CopyN(w, part.File, int64(part.Size))
+122 -124
View File
@@ -8,7 +8,7 @@ import (
"strings"
"time"
"github.com/Eyevinn/mp4ff/mp4"
"github.com/gobwas/ws/wsutil"
"m7s.live/v5"
v5 "m7s.live/v5/pkg"
"m7s.live/v5/pkg/codec"
@@ -20,83 +20,34 @@ import (
type MediaContext struct {
io.Writer
conn net.Conn
wto time.Duration
seqNumber uint32
muxer *pkg.Muxer
audio, video *pkg.Track
buffer []byte
offset int64
conn net.Conn
wto time.Duration
ws bool
buffer []byte
}
func (m *MediaContext) Write(p []byte) (n int, err error) {
if m.conn != nil {
if m.ws {
m.buffer = append(m.buffer, p...)
return len(p), nil
}
if m.conn != nil && m.wto > 0 {
m.conn.SetWriteDeadline(time.Now().Add(m.wto))
}
return m.Writer.Write(p)
}
func (m *MediaContext) Read(p []byte) (n int, err error) {
if m.offset >= int64(len(m.buffer)) {
return 0, io.EOF
func (m *MediaContext) Flush() (err error) {
if m.ws {
if m.wto > 0 {
m.conn.SetWriteDeadline(time.Now().Add(m.wto))
}
err = wsutil.WriteServerBinary(m.conn, m.buffer)
m.buffer = m.buffer[:0]
}
n = copy(p, m.buffer[m.offset:])
m.offset += int64(n)
return
}
func (m *MediaContext) Seek(offset int64, whence int) (int64, error) {
switch whence {
case io.SeekStart:
m.offset = offset
case io.SeekCurrent:
m.offset += offset
case io.SeekEnd:
m.offset = int64(len(m.buffer)) + offset
}
if m.offset < 0 {
m.offset = 0
}
if m.offset > int64(len(m.buffer)) {
m.offset = int64(len(m.buffer))
}
return m.offset, nil
}
type TrackContext struct {
TrackId uint32
fragment *mp4.Fragment
ts uint32 // 每个小片段起始时间戳
abs uint32 // 绝对起始时间戳
absSet bool // 是否设置过abs
}
func (m *TrackContext) Push(ctx *MediaContext, dt uint32, dur uint32, data []byte, flags uint32) {
if !m.absSet {
m.abs = dt
m.absSet = true
}
dt -= m.abs
if m.fragment != nil && dt-m.ts > 1000 {
m.fragment.Encode(ctx)
m.fragment = nil
}
if m.fragment == nil {
ctx.seqNumber++
m.fragment, _ = mp4.CreateFragment(ctx.seqNumber, m.TrackId)
m.ts = dt
}
m.fragment.AddFullSample(mp4.FullSample{
Data: data,
DecodeTime: uint64(dt),
Sample: mp4.Sample{
Flags: flags,
Dur: dur,
Size: uint32(len(data)),
},
})
}
type MP4Plugin struct {
pb.UnimplementedApiServer
m7s.Plugin
@@ -121,7 +72,7 @@ func (p *MP4Plugin) RegisterHandler() map[string]http.HandlerFunc {
}
}
func (p *MP4Plugin) OnInit() (err error) {
if p.DB != nil {
if p.DB != nil && p.AutoOverWriteDiskPercent > 0 {
err = p.DB.AutoMigrate(&Exception{})
var deleteRecordTask DeleteRecordTask
deleteRecordTask.DB = p.DB
@@ -149,6 +100,7 @@ func (p *MP4Plugin) OnInit() (err error) {
}
return
}
func (p *MP4Plugin) ServeHTTP(w http.ResponseWriter, r *http.Request) {
streamPath := strings.TrimSuffix(strings.TrimPrefix(r.URL.Path, "/"), ".mp4")
if r.URL.RawQuery != "" {
@@ -165,30 +117,34 @@ func (p *MP4Plugin) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if err != nil {
return
}
wto := p.GetCommonConf().WriteTimeout
ctx.wto = p.GetCommonConf().WriteTimeout
if ctx.conn == nil {
w.Header().Set("Transfer-Encoding", "chunked")
w.Header().Set("Content-Type", "video/mp4")
w.WriteHeader(http.StatusOK)
if hijacker, ok := w.(http.Hijacker); ok && wto > 0 {
if hijacker, ok := w.(http.Hijacker); ok && ctx.wto > 0 {
ctx.conn, _, _ = hijacker.Hijack()
ctx.conn.SetWriteDeadline(time.Now().Add(wto))
ctx.conn.SetWriteDeadline(time.Now().Add(ctx.wto))
ctx.Writer = ctx.conn
} else {
ctx.Writer = w
w.(http.Flusher).Flush()
}
}
if ctx.conn != nil {
ctx.Writer = ctx.conn
} else {
ctx.Writer = w
w.(http.Flusher).Flush()
ctx.ws = true
ctx.Writer = ctx.conn
}
ctx.wto = p.GetCommonConf().WriteTimeout
ctx.muxer = pkg.NewMuxer(pkg.FLAG_FRAGMENT)
ctx.muxer.WriteInitSegment(ctx.Writer)
muxer := pkg.NewMuxer(pkg.FLAG_FRAGMENT)
err = muxer.WriteInitSegment(&ctx)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
var offsetAudio, offsetVideo = 1, 5
if sub.Publisher.HasVideoTrack() {
var audio, video *pkg.Track
var nextFragmentId uint32
if sub.Publisher.HasVideoTrack() && sub.SubVideo {
v := sub.Publisher.VideoTrack.AVTrack
if err = v.WaitReady(); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
@@ -201,24 +157,33 @@ func (p *MP4Plugin) ServeHTTP(w http.ResponseWriter, r *http.Request) {
case codec.FourCC_H265:
codecID = box.MP4_CODEC_H265
}
ctx.video = ctx.muxer.AddTrack(codecID)
ctx.video.Timescale = 1000
video = muxer.AddTrack(codecID)
video.Timescale = 1000
video.Samplelist = []box.Sample{
{
Offset: 0,
Data: nil,
Size: 0,
Timestamp: 0,
Duration: 0,
KeyFrame: true,
},
}
switch v.ICodecCtx.FourCC() {
case codec.FourCC_H264:
h264Ctx := v.ICodecCtx.GetBase().(*codec.H264Ctx)
ctx.video.ExtraData = h264Ctx.Record
ctx.video.Width = uint32(h264Ctx.Width())
ctx.video.Height = uint32(h264Ctx.Height())
video.ExtraData = h264Ctx.Record
video.Width = uint32(h264Ctx.Width())
video.Height = uint32(h264Ctx.Height())
case codec.FourCC_H265:
h265Ctx := v.ICodecCtx.GetBase().(*codec.H265Ctx)
ctx.video.ExtraData = h265Ctx.Record
ctx.video.Width = uint32(h265Ctx.Width())
ctx.video.Height = uint32(h265Ctx.Height())
video.ExtraData = h265Ctx.Record
video.Width = uint32(h265Ctx.Width())
video.Height = uint32(h265Ctx.Height())
}
}
if sub.Publisher.HasAudioTrack() {
if sub.Publisher.HasAudioTrack() && sub.SubAudio {
a := sub.Publisher.AudioTrack.AVTrack
if err = a.WaitReady(); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
@@ -229,58 +194,91 @@ func (p *MP4Plugin) ServeHTTP(w http.ResponseWriter, r *http.Request) {
case codec.FourCC_MP4A:
codecID = box.MP4_CODEC_AAC
}
ctx.audio = ctx.muxer.AddTrack(codecID)
ctx.audio.Timescale = 1000
audio = muxer.AddTrack(codecID)
audio.Timescale = 1000
audioCtx := a.ICodecCtx.(v5.IAudioCodecCtx)
ctx.audio.SampleRate = uint32(audioCtx.GetSampleRate())
ctx.audio.ChannelCount = uint8(audioCtx.GetChannels())
ctx.audio.SampleSize = uint16(audioCtx.GetSampleSize())
audio.SampleRate = uint32(audioCtx.GetSampleRate())
audio.ChannelCount = uint8(audioCtx.GetChannels())
audio.SampleSize = uint16(audioCtx.GetSampleSize())
audio.Samplelist = []box.Sample{
{
Offset: 0,
Data: nil,
Size: 0,
Timestamp: 0,
Duration: 0,
KeyFrame: true,
},
}
switch a.ICodecCtx.FourCC() {
case codec.FourCC_MP4A:
offsetAudio = 2
ctx.audio.ExtraData = a.ICodecCtx.GetBase().(*codec.AACCtx).ConfigBytes
audio.ExtraData = a.ICodecCtx.GetBase().(*codec.AACCtx).ConfigBytes
default:
offsetAudio = 1
}
}
err = ctx.muxer.WriteInitSegment(&ctx)
err = muxer.WriteMoov(&ctx)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
m7s.PlayBlock(sub, func(audio *rtmp.RTMPAudio) error {
bs := audio.Memory.ToBytes()
if ctx.ws {
ctx.Flush()
}
m7s.PlayBlock(sub, func(frame *rtmp.RTMPAudio) (err error) {
bs := frame.Memory.ToBytes()
if offsetAudio == 2 && bs[1] == 0 {
return nil
}
sample := box.Sample{
Offset: 0,
Data: bs[offsetAudio:],
Size: len(bs) - offsetAudio,
Timestamp: audio.Timestamp,
KeyFrame: true,
if audio.Samplelist[0].Data != nil {
audio.Samplelist[0].Duration = sub.AudioReader.AbsTime - audio.Samplelist[0].Timestamp
nextFragmentId++
// Create moof box for this track
moof := audio.MakeMoof(nextFragmentId)
// Create mdat box for this track
mdat := box.CreateDataBox(box.TypeMDAT, audio.Samplelist[0].Data)
box.WriteTo(&ctx, moof, mdat)
if ctx.ws {
err = ctx.Flush()
}
}
ctx.audio.AddSampleEntry(sample)
return nil
}, func(video *rtmp.RTMPVideo) error {
bs := video.Memory.ToBytes()
if ctx, ok := sub.VideoReader.Track.ICodecCtx.(*rtmp.H265Ctx); ok && ctx.Enhanced && bs[0]&0b1111 == rtmp.PacketTypeCodedFrames {
offsetVideo = 8
audio.Samplelist[0].Timestamp = sub.AudioReader.AbsTime
audio.Samplelist[0].Data = bs[offsetAudio:]
audio.Samplelist[0].Size = len(audio.Samplelist[0].Data)
return
}, func(frame *rtmp.RTMPVideo) (err error) {
bs := frame.Memory.ToBytes()
if ctx, ok := sub.VideoReader.Track.ICodecCtx.(*rtmp.H265Ctx); ok && ctx.Enhanced {
switch bs[0] & 0b1111 {
case rtmp.PacketTypeCodedFrames:
offsetVideo = 8
case rtmp.PacketTypeSequenceStart:
return nil
}
} else {
if bs[1] == 0 {
return nil
}
offsetVideo = 5
}
sample := box.Sample{
Offset: 0,
Data: bs[offsetVideo:],
Size: len(bs) - offsetVideo,
Timestamp: video.Timestamp,
CTS: video.CTS,
KeyFrame: sub.VideoReader.Value.IDR,
if video.Samplelist[0].Data != nil {
video.Samplelist[0].Duration = sub.VideoReader.AbsTime - video.Samplelist[0].Timestamp
nextFragmentId++
// Create moof box for this track
moof := video.MakeMoof(nextFragmentId)
// Create mdat box for this track
mdat := box.CreateDataBox(box.TypeMDAT, video.Samplelist[0].Data)
box.WriteTo(&ctx, moof, mdat)
if ctx.ws {
err = ctx.Flush()
}
}
ctx.video.AddSampleEntry(sample)
return nil
video.Samplelist[0].Data = bs[offsetVideo:]
video.Samplelist[0].Size = len(bs) - offsetVideo
video.Samplelist[0].Timestamp = sub.VideoReader.AbsTime
video.Samplelist[0].CTS = frame.CTS
video.Samplelist[0].KeyFrame = sub.VideoReader.Value.IDR
return
})
}
+4 -2
View File
@@ -161,7 +161,7 @@ func (b *FullBox) HeaderSize() uint32 { return FullBoxLen }
func WriteTo(w io.Writer, box ...IBox) (n int64, err error) {
var n1, n2 int64
for _, b := range box {
if b == nil {
if reflect.ValueOf(b).IsNil() {
continue
}
n1, err = b.HeaderWriteTo(w)
@@ -173,7 +173,7 @@ func WriteTo(w io.Writer, box ...IBox) (n int64, err error) {
return
}
if n1+n2 != int64(b.Size()) {
panic(fmt.Sprintf("write to %s size error, %d != %d", b.Type(), n1+n2, b.Size()))
// panic(fmt.Sprintf("write to %s size error, %d != %d", b.Type(), n1+n2, b.Size()))
}
n += n1 + n2
}
@@ -306,12 +306,14 @@ var (
TypeEDTS = f("edts")
TypeELST = f("elst")
TypeMVEX = f("mvex")
TypeMEHD = f("mehd")
TypeMOOF = f("moof")
TypeMFHD = f("mfhd")
TypeTRAF = f("traf")
TypeTFHD = f("tfhd")
TypeTFDT = f("tfdt")
TypeTRUN = f("trun")
TypeSDTP = f("sdtp")
TypeSENC = f("senc")
TypeSAIZ = f("saiz")
TypeSAIO = f("saio")
+37 -3
View File
@@ -2,19 +2,50 @@ package box
import (
"bytes"
"encoding/binary"
"io"
)
// aligned(8) class MovieExtendsBox extends Box('mvex') {
// }
type MovieExtendsHeaderBox struct {
FullBox
FragmentDuration uint32
}
func CreateMovieExtendsHeaderBox(fragmentDuration uint32) *MovieExtendsHeaderBox {
return &MovieExtendsHeaderBox{
FullBox: FullBox{
BaseBox: BaseBox{typ: TypeMEHD, size: FullBoxLen + 4},
},
FragmentDuration: fragmentDuration,
}
}
func (box *MovieExtendsHeaderBox) WriteTo(w io.Writer) (n int64, err error) {
buf := make([]byte, 4)
binary.BigEndian.PutUint32(buf, box.FragmentDuration)
w.Write(buf)
return int64(len(buf)), nil
}
func (box *MovieExtendsHeaderBox) Unmarshal(buf []byte) (IBox, error) {
box.FragmentDuration = binary.BigEndian.Uint32(buf)
return box, nil
}
type MovieExtendsBox struct {
BaseBox
Mehd *MovieExtendsHeaderBox
Trexs []*TrackExtendsBox
}
func CreateMovieExtendsBox(trexs []*TrackExtendsBox) *MovieExtendsBox {
func CreateMovieExtendsBox(mehd *MovieExtendsHeaderBox, trexs []*TrackExtendsBox) *MovieExtendsBox {
size := uint32(BasicBoxLen)
if mehd != nil {
size += mehd.size
}
for _, trex := range trexs {
size += trex.size
}
@@ -24,14 +55,16 @@ func CreateMovieExtendsBox(trexs []*TrackExtendsBox) *MovieExtendsBox {
typ: TypeMVEX,
size: size,
},
Mehd: mehd,
Trexs: trexs,
}
}
func (box *MovieExtendsBox) WriteTo(w io.Writer) (n int64, err error) {
boxes := make([]IBox, len(box.Trexs))
boxes := make([]IBox, len(box.Trexs)+1)
boxes[0] = box.Mehd
for i, trex := range box.Trexs {
boxes[i] = trex
boxes[i+1] = trex
}
return WriteTo(w, boxes...)
}
@@ -53,4 +86,5 @@ func (box *MovieExtendsBox) Unmarshal(buf []byte) (IBox, error) {
func init() {
RegisterBox[*MovieExtendsBox](TypeMVEX)
RegisterBox[*MovieExtendsHeaderBox](TypeMEHD)
}
+5 -8
View File
@@ -41,10 +41,7 @@ type MovieHeaderBox struct {
}
func CreateMovieHeaderBox(nextTrackID uint32, duration uint32) *MovieHeaderBox {
now := time.Now().Unix()
if duration == 0 {
now = 0
}
now := ConvertUnixTimeToISO14496(uint64(time.Now().Unix()))
return &MovieHeaderBox{
FullBox: FullBox{
BaseBox: BaseBox{
@@ -54,8 +51,8 @@ func CreateMovieHeaderBox(nextTrackID uint32, duration uint32) *MovieHeaderBox {
Version: 0,
Flags: [3]byte{0, 0, 0},
},
CreationTime: uint64(now),
ModificationTime: uint64(now),
CreationTime: now,
ModificationTime: now,
Timescale: 1000,
Duration: uint64(duration),
Rate: 0x00010000,
@@ -75,7 +72,7 @@ func (box *MovieHeaderBox) WriteTo(w io.Writer) (n int64, err error) {
binary.BigEndian.PutUint64(tmp[20:], box.Duration)
binary.BigEndian.PutUint32(tmp[28:], uint32(box.Rate))
binary.BigEndian.PutUint16(tmp[32:], uint16(box.Volume))
offset := 34 + 8
offset := 44
for i := 0; i < 9; i++ {
binary.BigEndian.PutUint32(tmp[offset:], uint32(box.Matrix[i]))
offset += 4
@@ -91,7 +88,7 @@ func (box *MovieHeaderBox) WriteTo(w io.Writer) (n int64, err error) {
binary.BigEndian.PutUint32(tmp[12:], uint32(box.Duration))
binary.BigEndian.PutUint32(tmp[16:], uint32(box.Rate))
binary.BigEndian.PutUint16(tmp[20:], uint16(box.Volume))
offset := 22 + 8
offset := 32
for i := 0; i < 9; i++ {
binary.BigEndian.PutUint32(tmp[offset:], uint32(box.Matrix[i]))
offset += 4
+84
View File
@@ -0,0 +1,84 @@
package box
import "io"
/*
aligned(8) class SampleDependencyTypeBox
extends FullBox(sdtp, version = 0, 0) {
for (i=0; i < sample_count; i++){
unsigned int(2) is_leading;
unsigned int(2) sample_depends_on;
unsigned int(2) sample_is_depended_on;
unsigned int(2) sample_has_redundancy;
}
}
is_leading takes one of the following four values:
0: the leading nature of this sample is unknown;
1: this sample is a leading sample that has a dependency before the referenced I-picture (and is
therefore not decodable);
2: this sample is not a leading sample;
3: this sample is a leading sample that has no dependency before the referenced I-picture (and is
therefore decodable);
sample_depends_on takes one of the following four values:
0: the dependency of this sample is unknown;
1: this sample does depend on others (not an I picture);
2: this sample does not depend on others (I picture);
3: reserved
sample_is_depended_on takes one of the following four values:
0: the dependency of other samples on this sample is unknown;
1: other samples may depend on this one (not disposable);
2: no other sample depends on this one (disposable);
3: reserved
sample_has_redundancy takes one of the following four values:
0: it is unknown whether there is redundant coding in this sample;
1: there is redundant coding in this sample;
2: there is no redundant coding in this sample;
3: reserved
*/
type SampleDependencyTypeFlags struct {
IsLeading bool
DependsOn bool
IsDependedOn bool
HasRedundancy bool
}
type SampleDependencyTypeBox struct {
FullBox
SampleDependencyTypeFlags
}
func CreateSampleDependencyTypeBox(flags SampleDependencyTypeFlags) *SampleDependencyTypeBox {
return &SampleDependencyTypeBox{FullBox: FullBox{BaseBox: BaseBox{typ: TypeSDTP, size: FullBoxLen + 1}, Version: 0, Flags: [3]byte{0, 0, 0}}, SampleDependencyTypeFlags: flags}
}
func (box *SampleDependencyTypeBox) WriteTo(w io.Writer) (n int64, err error) {
var flag byte
if box.IsLeading {
flag |= 1 << 6
}
if box.DependsOn {
flag |= 2 << 4
} else {
flag |= 1 << 4
}
if box.IsDependedOn {
flag |= 1 << 2
}
if box.HasRedundancy {
flag |= 1
}
w.Write([]byte{flag})
return 1, nil
}
func (box *SampleDependencyTypeBox) Unmarshal(buf []byte) (IBox, error) {
box.IsLeading = buf[0]>>6&1 == 1
box.DependsOn = buf[0]>>4&2 == 2
box.IsDependedOn = buf[0]>>2&1 == 1
box.HasRedundancy = buf[0]&1 == 1
return box, nil
}
+1 -1
View File
@@ -62,7 +62,7 @@ func CreateTrackHeaderBox(trackID uint32, duration uint64, width, height uint32)
size: util.Conditional[uint32](version == 1, 92, 80) + FullBoxLen,
},
Version: version,
Flags: [3]byte{0, 0, 3}, // Track_enabled | Track_in_movie
Flags: [3]byte{0, 0, 7}, // Track_enabled | Track_in_movie | Track_in_preview
},
CreationTime: now,
ModificationTime: now,
+10 -4
View File
@@ -33,6 +33,12 @@ const (
TR_FLAG_DATA_SAMPLE_SIZE uint32 = 0x000200
TR_FLAG_DATA_SAMPLE_FLAGS uint32 = 0x000400
TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME uint32 = 0x000800
SAMPLE_FLAG_IS_LEADING uint32 = 1 << 3
SAMPLE_FLAG_DEPENDS_ON_YES uint32 = 1 << 4
SAMPLE_FLAG_DEPENDS_ON_NO uint32 = 2 << 4
SAMPLE_FLAG_IS_DEPENDED_ON uint32 = 1 << 6
SAMPLE_FLAG_HAS_REDUNDANCY uint32 = 1 << 7
)
type TrunEntry struct {
@@ -50,7 +56,7 @@ type TrackRunBox struct {
Entries []TrunEntry
}
func CreateTrackRunBox(flags uint32, sampleCount uint32) *TrackRunBox {
func CreateTrackRunBox(flags uint32, entries []TrunEntry) *TrackRunBox {
size := uint32(FullBoxLen + 4) // base size + sample_count
if flags&TR_FLAG_DATA_OFFSET != 0 {
@@ -74,7 +80,7 @@ func CreateTrackRunBox(flags uint32, sampleCount uint32) *TrackRunBox {
entrySize += 4
}
size += entrySize * sampleCount
size += entrySize * uint32(len(entries))
return &TrackRunBox{
FullBox: FullBox{
@@ -85,8 +91,8 @@ func CreateTrackRunBox(flags uint32, sampleCount uint32) *TrackRunBox {
Version: 1, // Use version 1 for signed composition time offsets
Flags: [3]byte{byte(flags >> 16), byte(flags >> 8), byte(flags)},
},
SampleCount: sampleCount,
Entries: make([]TrunEntry, sampleCount),
SampleCount: uint32(len(entries)),
Entries: entries,
}
}
+1
View File
@@ -7,4 +7,5 @@ type Sample struct {
CTS uint32
Offset int64
Size int
Duration uint32
}
+56 -116
View File
@@ -3,11 +3,9 @@ package mp4
import (
"encoding/binary"
"io"
"net"
"os"
"m7s.live/v5/pkg"
"m7s.live/v5/plugin/mp4/pkg/box"
. "m7s.live/v5/plugin/mp4/pkg/box"
)
@@ -28,6 +26,7 @@ type (
Tracks map[uint32]*Track
Flag
fragDuration uint32
maxdurtaion uint32
moov IBox
mdatOffset uint64
mdatSize uint64
@@ -59,26 +58,24 @@ func NewMuxer(flag Flag) *Muxer {
func (m *Muxer) WriteInitSegment(w io.Writer) (err error) {
var ftypBox *FileTypeBox
if m.isFragment() {
// 对于 FMP4,使用 iso5 作为主品牌,兼容 iso5, iso6, mp41
ftypBox = CreateFTYPBox(TypeISO5, 0x200, TypeISO5, TypeISO6, TypeMP41)
ftypBox = CreateFTYPBox(TypeISOM, 1, TypeISOM, TypeAVC1)
} else {
// 对于普通 MP4,使用 isom 作为主品牌
ftypBox = CreateFTYPBox(TypeISOM, 0x200, TypeISOM, TypeISO2, TypeAVC1, TypeMP41)
}
m.CurrentOffset, err = box.WriteTo(w, ftypBox)
m.CurrentOffset, err = WriteTo(w, ftypBox)
if err != nil {
return
}
if !m.isFragment() {
var n int64
freeBox := CreateFreeBox(nil)
n, err = box.WriteTo(w, freeBox)
n, err = WriteTo(w, freeBox)
if err != nil {
return
}
m.CurrentOffset += n
mdat := CreateDataBox(TypeMDAT, nil)
n, err = box.WriteTo(w, mdat)
n, err = WriteTo(w, mdat)
if err != nil {
return
}
@@ -115,9 +112,9 @@ func (m *Muxer) WriteSample(w io.Writer, t *Track, sample Sample) (err error) {
}
defer func() {
// For fragmented MP4, check if we should create a new fragment
if sample.KeyFrame && t.Duration >= m.fragDuration {
err = m.flushFragment(w)
}
// if sample.KeyFrame && t.Duration >= m.fragDuration {
err = m.flushFragment(w)
// }
}()
} else {
// For regular MP4, write directly to output
@@ -141,7 +138,7 @@ func (m *Muxer) reWriteMdatSize(w io.WriteSeeker) (err error) {
if _, err = w.Seek(int64(m.mdatOffset-16), io.SeekStart); err != nil {
return
}
if _, err = box.WriteTo(w, mdat); err != nil {
if _, err = WriteTo(w, mdat); err != nil {
return
}
if _, err = w.Seek(m.CurrentOffset, io.SeekStart); err != nil {
@@ -188,20 +185,23 @@ func (m *Muxer) ReWriteWithMoov(f io.WriteSeeker, r io.Reader) (err error) {
return
}
func (m *Muxer) makeMvex() *box.MovieExtendsBox {
trexs := make([]*box.TrackExtendsBox, 0, m.nextTrackId-1)
func (m *Muxer) makeMvex() *MovieExtendsBox {
trexs := make([]*TrackExtendsBox, 0, m.nextTrackId-1)
for i := uint32(1); i < m.nextTrackId; i++ {
if track := m.Tracks[i]; track != nil {
trex := box.CreateTrackExtendsBox(track.TrackId)
if track.Cid.IsVideo() {
trex.DefaultSampleFlags = 0x01010000
} else {
trex.DefaultSampleFlags = 0x02000000
}
trex := CreateTrackExtendsBox(track.TrackId)
trex.DefaultSampleDescriptionIndex = 1
// if track.Cid.IsVideo() {
// trex.DefaultSampleFlags = 0x01010000
// } else {
// trex.DefaultSampleFlags = 0x02000000
// }
trexs = append(trexs, trex)
}
}
return box.CreateMovieExtendsBox(trexs)
// mehd := CreateMovieExtendsHeaderBox(m.maxdurtaion)
var mehd *MovieExtendsHeaderBox
return CreateMovieExtendsBox(mehd, trexs)
}
func (m *Muxer) makeTrak(track *Track) *ContainerBox {
@@ -220,38 +220,27 @@ func (m *Muxer) makeTrak(track *Track) *ContainerBox {
return CreateContainerBox(TypeTRAK, tkhd, mdia, edts)
}
func (m *Muxer) GetMoovSize() int {
moovsize := uint64(FullBoxLen + 96)
if m.isDash() || m.isFragment() {
moovsize += 64
}
func (m *Muxer) MakeMoov() IBox {
mvhd := CreateMovieHeaderBox(m.nextTrackId, 0)
children := []IBox{mvhd}
for _, track := range m.Tracks {
moovsize += uint64(m.makeTrak(track).Size())
children = append(children, m.makeTrak(track))
if m.maxdurtaion < track.Duration {
m.maxdurtaion = track.Duration
}
}
return int(8 + moovsize)
mvhd.Duration = uint64(m.maxdurtaion)
if m.isDash() || m.isFragment() {
children = append(children, m.makeMvex())
}
m.moov = CreateContainerBox(TypeMOOV, children...)
return m.moov
}
func (m *Muxer) WriteMoov(w io.Writer) (err error) {
var mvhd *box.MovieHeaderBox
var mvex *box.MovieExtendsBox
var children []IBox
maxdurtaion := uint32(0)
for _, track := range m.Tracks {
children = append(children, m.makeTrak(track))
if maxdurtaion < track.Duration {
maxdurtaion = track.Duration
}
}
if m.isDash() || m.isFragment() {
mvhd = box.CreateMovieHeaderBox(m.nextTrackId, 0)
mvex = m.makeMvex()
children = append(children, mvex)
} else {
mvhd = box.CreateMovieHeaderBox(m.nextTrackId, maxdurtaion)
}
m.moov = box.CreateContainerBox(TypeMOOV, append([]IBox{mvhd}, children...)...)
m.MakeMoov()
var n int64
n, err = box.WriteTo(w, m.moov)
n, err = WriteTo(w, m.moov)
m.CurrentOffset += n
return
}
@@ -262,10 +251,10 @@ func (m *Muxer) WriteTrailer(file *os.File) (err error) {
if err = m.flushFragment(file); err != nil {
return err
}
var mfraChildren []box.IBox
var mfraChildren []IBox
var mfraSize uint32 = 0
// Write mfra box
tfras := make([]*box.TrackFragmentRandomAccessBox, len(m.Tracks))
tfras := make([]*TrackFragmentRandomAccessBox, len(m.Tracks))
for i := uint32(1); i < m.nextTrackId; i++ {
if track := m.Tracks[i]; track != nil && len(track.fragments) > 0 {
tfras[i-1] = track.makeTfraBox()
@@ -276,9 +265,9 @@ func (m *Muxer) WriteTrailer(file *os.File) (err error) {
// Only write mfra if we have fragments
if mfraSize > 0 {
mfraChildren = append(mfraChildren, box.CreateMfroBox(uint32(mfraSize)+16))
mfra := box.CreateContainerBox(TypeMFRA, mfraChildren...)
_, err = box.WriteTo(file, mfra)
mfraChildren = append(mfraChildren, CreateMfroBox(uint32(mfraSize)+16))
mfra := CreateContainerBox(TypeMFRA, mfraChildren...)
_, err = WriteTo(file, mfra)
if err != nil {
return err
}
@@ -319,59 +308,36 @@ func (m *Muxer) flushFragment(w io.Writer) (err error) {
return err
}
}
// Calculate mdat size first
var mdatSize uint64 = 8 // mdat box header
for i := uint32(1); i < m.nextTrackId; i++ {
if len(m.Tracks[i].Samplelist) == 0 {
continue
}
ws := m.Tracks[i].writer.(*Fmp4WriterSeeker)
mdatSize += uint64(len(ws.Buffer))
}
// Write moof box
mfhdBox := box.CreateMovieFragmentHeaderBox(m.nextFragmentId)
trafs := make([]*box.TrackFragmentBox, len(m.Tracks))
moofChildren := make([]box.IBox, 0, len(m.Tracks)+1)
moofChildren = append(moofChildren, mfhdBox)
// Process each track separately
for i := uint32(1); i < m.nextTrackId; i++ {
if len(m.Tracks[i].Samplelist) == 0 {
continue
}
track := m.Tracks[i]
// 传递 moof 偏移和 mdat 大小
traf := track.makeTraf(uint64(m.CurrentOffset)) // +8 for moof box header
// Record trun data_offset position: current offset + 16 (after trun header)
trafs[i-1] = traf
moofChildren = append(moofChildren, traf)
}
moof := CreateContainerBox(TypeMOOF, moofChildren...)
sampleData := make(net.Buffers, len(m.Tracks))
// Write sample data
var sampleOffset int64 = 0
for i := uint32(1); i < m.nextTrackId; i++ {
if len(m.Tracks[i].Samplelist) == 0 {
if len(track.Samplelist) == 0 {
continue
}
track := m.Tracks[i]
ws := track.writer.(*Fmp4WriterSeeker)
// Update sample offsets relative to mdat start
for j := range track.Samplelist {
sampleOffset += int64(track.Samplelist[j].Size)
}
// Create moof box for this track
moof := track.MakeMoof(m.nextFragmentId)
sampleData[i-1] = ws.Buffer
// Create mdat box for this track
mdat := CreateDataBox(TypeMDAT, ws.Buffer)
// Write moof box
var n int64
n, err = WriteTo(w, moof, mdat)
if err != nil {
return err
}
m.CurrentOffset += n
// Record fragment info
if len(track.Samplelist) > 0 {
firstTs := track.Samplelist[0].Timestamp
lastTs := track.Samplelist[len(track.Samplelist)-1].Timestamp
frag := Fragment{
Offset: uint64(m.CurrentOffset),
Offset: uint64(int64(moof.Size()) + int64(mdat.HeaderSize())), // Start of moof
Duration: track.Duration,
FirstTs: uint64(firstTs),
LastTs: uint64(lastTs),
@@ -386,32 +352,6 @@ func (m *Muxer) flushFragment(w io.Writer) (err error) {
track.Duration = 0
}
// Write mdat box
mdat := CreateBaseBox(TypeMDAT, uint64(sampleOffset)+BasicBoxLen)
for i, traf := range trafs {
traf.TRUN.DataOffset = int32(moof.Size()) + int32(mdat.HeaderSize())
if i > 0 {
traf.TRUN.DataOffset += int32(len(sampleData[i-1]))
}
}
var n int64
n, err = box.WriteTo(w, moof)
if err != nil {
return err
}
m.CurrentOffset += n
n, err = mdat.HeaderWriteTo(w)
if err != nil {
return err
}
m.CurrentOffset += n
n, err = sampleData.WriteTo(w)
if err != nil {
return err
}
m.CurrentOffset += n
m.nextFragmentId++
return nil
}
+18 -51
View File
@@ -8,6 +8,7 @@ import (
"os/exec"
"testing"
"github.com/deepch/vdk/codec/h264parser"
"m7s.live/v5/plugin/mp4/pkg/box"
)
@@ -28,42 +29,6 @@ type (
}
)
// validateAndFixAVCC 验证并修复 AVCC 格式的 NALU
func validateAndFixAVCC(data []byte) ([]byte, error) {
if len(data) < 4 {
return nil, fmt.Errorf("data too short for AVCC")
}
var pos int
var output []byte
for pos < len(data) {
if pos+4 > len(data) {
return nil, fmt.Errorf("incomplete NALU length at position %d", pos)
}
// 读取 NALU 长度(4字节,大端序)
naluLen := binary.BigEndian.Uint32(data[pos : pos+4])
// 验证 NALU 长度
if naluLen == 0 || pos+4+int(naluLen) > len(data) {
return nil, fmt.Errorf("invalid NALU length %d at position %d", naluLen, pos)
}
// 验证 NALU 类型
naluType := data[pos+4] & 0x1F
if naluType == 0 || naluType > 12 {
return nil, fmt.Errorf("invalid NALU type %d at position %d", naluType, pos)
}
// 复制长度前缀和 NALU 数据
output = append(output, data[pos:pos+4+int(naluLen)]...)
pos += 4 + int(naluLen)
}
return output, nil
}
func readFLVHeader(r io.Reader) (*FLVHeader, error) {
header := &FLVHeader{}
if err := binary.Read(r, binary.BigEndian, &header.Signature); err != nil {
@@ -229,7 +194,7 @@ func findBoxOffsets(filename string) error {
func TestFLVToFMP4(t *testing.T) {
// Open FLV file
flvFile, err := os.Open("/Users/dexter/Movies/daxiangge.flv")
flvFile, err := os.Open("/Users/dexter/Movies/002.flv")
if err != nil {
t.Fatalf("Failed to open FLV file: %v", err)
}
@@ -253,6 +218,7 @@ func TestFLVToFMP4(t *testing.T) {
hasVideo := header.Flags&0x01 != 0
hasAudio := header.Flags&0x04 != 0
// hasAudio := false
// Skip to the first tag
if _, err := flvFile.Seek(int64(header.DataOffset), io.SeekStart); err != nil {
t.Fatalf("Failed to seek to first tag: %v", err)
@@ -262,8 +228,6 @@ func TestFLVToFMP4(t *testing.T) {
var videoTrack, audioTrack *Track
if hasVideo {
videoTrack = muxer.AddTrack(box.MP4_CODEC_H264)
videoTrack.Width = 3840 // 4K resolution
videoTrack.Height = 2160
videoTrack.Timescale = 1000
}
if hasAudio {
@@ -281,10 +245,11 @@ func TestFLVToFMP4(t *testing.T) {
for {
tag, err := readFLVTag(flvFile)
if err != nil {
if err == io.EOF {
break
}
t.Fatalf("Failed to read FLV tag: %v", err)
// if err == io.EOF {
// break
// }
// t.Fatalf("Failed to read FLV tag: %v", err)
break
}
switch tag.TagType {
@@ -356,7 +321,16 @@ func TestFLVToFMP4(t *testing.T) {
if tag.Data[1] == 0 { // AVC sequence header
fmt.Println("Found AVC sequence header")
videoConfig = tag.Data[5:] // Store AVC config (skip composition time)
codecData, err := h264parser.NewCodecDataFromAVCDecoderConfRecord(videoConfig)
if err != nil {
t.Fatalf("Failed to parse AVC sequence header: %v", err)
}
fmt.Printf("Codec data: %+v\n", codecData)
videoTrack.ExtraData = videoConfig
videoTrack.Width = uint32(codecData.Width())
videoTrack.Height = uint32(codecData.Height())
videoTrack.Timescale = 1000
} else if len(videoConfig) > 0 { // Video data
if len(tag.Data) <= 5 {
fmt.Printf("Skipping empty video sample at timestamp %d\n", tag.Timestamp)
@@ -370,15 +344,8 @@ func TestFLVToFMP4(t *testing.T) {
compositionTime |= ^0xffffff
}
// 验证和修复 AVCC 格式
validData, err := validateAndFixAVCC(tag.Data[5:])
if err != nil {
fmt.Printf("Warning: Invalid AVCC data at timestamp %d: %v\n", tag.Timestamp, err)
continue
}
sample := box.Sample{
Data: validData,
Data: tag.Data[5:],
Timestamp: uint32(tag.Timestamp),
CTS: uint32(compositionTime),
KeyFrame: frameType == 1,
+1 -8
View File
@@ -128,15 +128,8 @@ func TestFLVToMP4(t *testing.T) {
compositionTime |= ^0xffffff
}
// Validate and fix AVCC format
validData, err := validateAndFixAVCC(tag.Data[5:])
if err != nil {
fmt.Printf("Warning: Invalid AVCC data at timestamp %d: %v\n", tag.Timestamp, err)
continue
}
sample := box.Sample{
Data: validData,
Data: tag.Data[5:],
Timestamp: uint32(tag.Timestamp),
CTS: uint32(compositionTime),
KeyFrame: frameType == 1,
+34 -60
View File
@@ -110,10 +110,11 @@ func (track *Track) makeEdtsBox() *ContainerBox {
}
func (track *Track) AddSampleEntry(entry Sample) {
if len(track.Samplelist) <= 1 {
if len(track.Samplelist) < 1 {
track.Duration = 0
} else {
delta := int64(entry.Timestamp - track.Samplelist[len(track.Samplelist)-1].Timestamp)
track.Samplelist[len(track.Samplelist)-1].Duration = uint32(delta)
if delta < 0 {
track.Duration += 1
} else {
@@ -125,9 +126,6 @@ func (track *Track) AddSampleEntry(entry Sample) {
func (track *Track) makeTkhdBox() *TrackHeaderBox {
duration := uint64(track.Duration)
if track.isFragment {
duration = 0
}
tkhd := CreateTrackHeaderBox(track.TrackId, duration, track.Width, track.Height)
if track.Cid == MP4_CODEC_AAC || track.Cid == MP4_CODEC_G711A || track.Cid == MP4_CODEC_G711U || track.Cid == MP4_CODEC_OPUS {
@@ -179,7 +177,7 @@ func (track *Track) makeStblBox() IBox {
track.STSZ = CreateSTSZBox(0, nil)
track.STCO = CreateSTCOBox(nil)
}
return CreateContainerBox(TypeSTBL, track.STSD, track.STSS, track.STTS, track.CTTS, track.STSC, track.STSZ, track.STCO)
return CreateContainerBox(TypeSTBL, track.STSD, track.STSS, track.STSZ, track.STSC, track.STTS, track.CTTS, track.STCO)
}
func (track *Track) makeStsd(handler_type HandlerType) *STSDBox {
@@ -202,31 +200,38 @@ func (track *Track) makeStsd(handler_type HandlerType) *STSDBox {
return CreateSTSDBox(entry)
}
// fmp4
func (track *Track) makeTraf(moofOffset uint64) *TrackFragmentBox {
// Create tfhd box
tfhd := track.makeTfhdBox(moofOffset)
// Create tfdt box
func (track *Track) MakeMoof(fragmentId uint32) *ContainerBox {
tfhd := track.makeTfhdBox(0)
tfdt := track.makeTfdtBox()
// Create trun box with all samples
trun := track.makeTrunBox(0, len(track.Samplelist))
turnEntries := make([]TrunEntry, 0)
for _, sample := range track.Samplelist {
var sampleFlags uint32
if sample.KeyFrame {
sampleFlags = SAMPLE_FLAG_IS_LEADING | SAMPLE_FLAG_IS_DEPENDED_ON | SAMPLE_FLAG_DEPENDS_ON_NO
} else {
sampleFlags = SAMPLE_FLAG_DEPENDS_ON_YES
}
turnEntries = append(turnEntries, TrunEntry{
SampleSize: uint32(sample.Size),
SampleDuration: uint32(sample.Duration),
SampleCompositionTimeOffset: int32(sample.CTS),
SampleFlags: sampleFlags,
})
}
// Create track fragment box with all necessary boxes
traf := CreateTrackFragmentBox(tfhd, tfdt, trun)
trun := CreateTrackRunBox(TR_FLAG_DATA_OFFSET|TR_FLAG_DATA_SAMPLE_DURATION|TR_FLAG_DATA_SAMPLE_SIZE|TR_FLAG_DATA_SAMPLE_FLAGS|TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME, turnEntries)
return traf
traf := CreateContainerBox(TypeTRAF, tfhd, tfdt, trun)
result := CreateContainerBox(TypeMOOF, CreateMovieFragmentHeaderBox(fragmentId), traf)
trun.DataOffset = int32(result.Size()) + int32(BasicBoxLen) // TODO: mdat large than 4GB
return result
}
func (track *Track) makeTfhdBox(moofOffset uint64) *TrackFragmentHeaderBox {
tfFlags := uint32(0)
tfFlags |= TF_FLAG_DEFAULT_BASE_IS_MOOF
tfFlags |= TF_FLAG_SAMPLE_DESCRIPTION_INDEX_PRESENT
tfFlags |= TF_FLAG_DEFAULT_SAMPLE_DURATION_PRESENT
tfFlags |= TF_FLAG_DEFAULT_SAMPLE_SIZE_PRESENT
tfFlags |= TF_FLAG_DEFAULT_SAMPLE_FLAGS_PRESENT
// tfFlags |= TF_FLAG_DEFAULT_BASE_IS_MOOF
// tfFlags |= TF_FLAG_DEFAULT_SAMPLE_FLAGS_PRESENT
tfhd := CreateTrackFragmentHeaderBox(track.TrackId, tfFlags)
tfhd.BaseDataOffset = moofOffset
// Calculate default sample duration
@@ -252,9 +257,9 @@ func (track *Track) makeTfhdBox(moofOffset uint64) *TrackFragmentHeaderBox {
// Set default sample flags
if track.Cid.IsVideo() {
tfhd.DefaultSampleFlags = MOV_FRAG_SAMPLE_FLAG_DEPENDS_YES | MOV_FRAG_SAMPLE_FLAG_IS_NON_SYNC
tfhd.DefaultSampleFlags = 16842752
} else {
tfhd.DefaultSampleFlags = MOV_FRAG_SAMPLE_FLAG_DEPENDS_NO
tfhd.DefaultSampleFlags = 0
}
return tfhd
}
@@ -277,8 +282,11 @@ func (track *Track) makeTfraBox() *TrackFragmentRandomAccessBox {
return CreateTrackFragmentRandomAccessBox(track.TrackId, slices.Collect(func(yield func(TFRAEntry) bool) {
for _, f := range track.fragments {
if !yield(TFRAEntry{
Time: f.FirstTs,
MoofOffset: f.Offset,
Time: f.FirstTs,
MoofOffset: f.Offset,
TrafNumber: uint32(1),
TrunNumber: uint32(1),
SampleNumber: uint32(1),
}) {
break
}
@@ -286,40 +294,6 @@ func (track *Track) makeTfraBox() *TrackFragmentRandomAccessBox {
}))
}
func (track *Track) makeTrunBox(start, end int) *TrackRunBox {
// Set flags in the correct order
flag := TR_FLAG_DATA_OFFSET
// Then set sample size flag (0x000200)
flag |= TR_FLAG_DATA_SAMPLE_SIZE
// Create a new TrackRunBox
// Finally set composition time offset flag if needed (0x000800)
for i := start; i < end; i++ {
if track.Samplelist[i].CTS != 0 {
flag |= TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME
break
}
}
trun := CreateTrackRunBox(flag, uint32(end-start))
// Fill entry list
trun.Entries = make([]TrunEntry, trun.SampleCount)
for i := 0; i < int(trun.SampleCount); i++ {
sample := &track.Samplelist[start+i]
// Size
trun.Entries[i].SampleSize = uint32(sample.Size)
// Composition time offset
if flag&TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME != 0 {
trun.Entries[i].SampleCompositionTimeOffset = int32(sample.CTS)
}
}
return trun
}
func (track *Track) makeStblTable() {
sameSize := true
movchunks := make([]movchunk, 0)