Needs sample. Also might be incorrect if mdat has moov atoms inside mdat for some reason. But the concept is reasonable enough - mov files tend to have the moov atom either in the header or in the footer. The need for this patch may have been deprecated by the introduction of mfra parsing, which is very similar to the RandomIndexPack (RIP) in MXF..
The heuristic for how far to seek back from the end could use some work. stts, ctts, stss, stsc, stsz and stco add up to 48 bytes per frame, so the present heuristic won't work for lower bitrate files. For example a 25 Hz video file below 3.6 Mbit/s is on average under 18k per frame which when divided by 384 comes to 46 bytes compared to 48 bytes' worth of stbl stuff. There some RLE possible with the headers, and keyframe density makes a difference, but still I also feel this should leverage the existing atom parsing code. Spotify comments ---------------- Input mp4 file with lots of mdat:s (non-fragmented?) causing lots of reads and seeks making initial parse over mid-latency network access very slow. Possible other solutions: • Other or more safe way to quickly find moov? • TODO: wasn’t there some option to use “mfra”? Only for fragmented files? caused other problems? /Tomas
From 34d4732f0beb12d58113958153db3f6a8006fd2c Mon Sep 17 00:00:00 2001 From: Ulrik <ulr...@spotify.com> Date: Thu, 9 Dec 2021 17:48:00 +0100 Subject: [PATCH 07/15] avformat/mov:Heuristic search for moov After passing more than 5 mdat-boxes before seeing moov, insert a quick attempt at reading moov from the end of the file. Moov is scanned for with the byte-sequence `moov`. When located, the area is inspected to avoid spurious matches. In particular, it's expected that the inner boxes of the `moov` contains `mfhd` and `trak` boxes, and that reading root-boxes from the found offset ends up evenly at end of file. --- libavformat/mov.c | 156 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 151 insertions(+), 5 deletions(-) diff --git a/libavformat/mov.c b/libavformat/mov.c index 8c3329b815..f58f8f3102 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -84,6 +84,7 @@ typedef struct MOVParseTableEntry { } MOVParseTableEntry; static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom); +static int mov_heuristic_scan_moov(MOVContext *c, AVIOContext *pb); static int mov_read_mfra(MOVContext *c, AVIOContext *f); static void mov_free_stream_context(AVFormatContext *s, AVStream *st); static int64_t add_ctts_entry(MOVCtts** ctts_data, unsigned int* ctts_count, unsigned int* allocated_size, @@ -1361,7 +1362,12 @@ static int mov_read_mdat(MOVContext *c, AVIOContext *pb, MOVAtom atom) { if (atom.size == 0) /* wrong one (MP4) */ return 0; - c->found_mdat=1; + c->found_mdat+=1; + if (c->found_mdat == 5 && !c->found_moov) { + /* Some mp4:s consist of 100s or even 1000s of mdats, with a moov at the end. To avoid a + ton of seeking, we heuristically look for a moov at the end of the file instead */ + mov_heuristic_scan_moov(c, pb); + } return 0; /* now go for moov */ } @@ -1571,6 +1577,141 @@ static int mov_read_moov(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; /* now go for mdat */ } +static int mov_read_atom_header(AVIOContext *pb, MOVAtom *atom) { + atom->size = avio_rb32(pb); + atom->type = avio_rl32(pb); + if (atom->size == 1) { + atom->size = avio_rb64(pb); + return 12; + } else { + return 8; + } +} + +static int mov_try_read_moov(MOVContext *c, AVIOContext *pb, int64_t offset, int64_t filesize) { + MOVAtom moov, child; + int64_t moov_header_size, ret, i; + struct { uint32_t type; uint8_t seen; } boxes_seen[] = { + { MKTAG('m','v','h','d'), 0 }, + { MKTAG('t','r','a','k'), 0 }, + }; + const size_t num_boxes = sizeof(boxes_seen)/sizeof(boxes_seen[0]); + + av_log(c->fc, AV_LOG_TRACE, "Trying to read moov from hint at offset %"PRId64"\n", offset); + if ((ret = avio_seek(pb, offset, SEEK_SET)) < 0) { + return ret; + } + if ((moov_header_size = mov_read_atom_header(pb, &moov)) < 0) { + return moov_header_size; + } + + if (moov.type != MKTAG('m','o','o','v')) { + av_log(c->fc, AV_LOG_TRACE, + "Moov-hint at %"PRId64" failed type&size-check, %"PRId64" != %"PRId64"\n", + offset, moov.size, filesize - offset + ); + return -1; + } + + /* Iterate through inner boxes, looking for some must-have types*/ + while (avio_tell(pb) < offset + moov.size) { + ret = mov_read_atom_header(pb, &child); + for (i=0; i < num_boxes; i++) { + if (child.type == boxes_seen[i].type) { + boxes_seen[i].seen = 1; + } + } + avio_seek(pb, child.size - ret, SEEK_CUR); + } + + /* Verify that expected children were seen */ + for (i=0; i < num_boxes; i++) { + if (!boxes_seen[i].seen) { + return -1; + } + } + + if (avio_tell(pb) > filesize) { + return -1; + } + + /* Verify that the remaining root-level boxes ends up evenly at the end */ + while (avio_tell(pb) < filesize) { + ret = mov_read_atom_header(pb, &child); + for (i=0; i < num_boxes; i++) { + if (child.type == boxes_seen[i].type) { + boxes_seen[i].seen = 1; + } + } + if (child.size == 0) { // Child is expected to continue to EOF + break; + } + if (child.size - ret + avio_tell(pb) > filesize) { + av_log(c->fc, AV_LOG_TRACE, "Remaining boxes does not align with EOF", offset); + return -1; + } + avio_seek(pb, child.size - ret, SEEK_CUR); + } + + av_log(c->fc, AV_LOG_TRACE, "Moov hint checks out at offset %"PRId64"\n", offset); + if ((ret = avio_seek(pb, offset + moov_header_size, SEEK_SET)) < 0) { + return ret; + } + moov.type = MKTAG('r', 'o', 'o', 't'); + moov.size = filesize; + return mov_read_moov(c, pb, moov); +} + +#define BUF_SPILLOVER (sizeof(needle) - 1) +static int mov_heuristic_scan_moov(MOVContext *c, AVIOContext *pb) { + uint8_t needle[4] = {'m', 'o', 'o', 'v'}, buf[4096]; + size_t buf_fill = 0, i; + int64_t ret, new_pos; + + int64_t original_pos = avio_tell(pb); + int64_t filesize = avio_size(pb); + + if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) || original_pos < 0 || filesize < 0) { + return 0; + } + + /* Try to determine position from end, where moov-box would fit with good margin */ + new_pos = filesize - ((4 * 1024 * 1024) + (filesize / 384)); + if (new_pos <= original_pos) { + return 0; + } + + if (avio_seek(pb, new_pos, SEEK_SET) < 0) { + return 0; + } + + /* Scan for 'moov' string */ + while (1) { + ret = avio_read(pb, buf + buf_fill, sizeof(buf) - buf_fill); + if (ret <= 0) { + goto quit; + } + buf_fill += ret; + new_pos += ret; + for (i=0; i < buf_fill - sizeof(needle); i++) { + if (memcmp(buf + i, needle, sizeof(needle)) == 0) { + /* 'moov' tag located, try position-hinted read */ + if (mov_try_read_moov(c, pb, new_pos + i - buf_fill - 4, filesize) == 0) { + goto quit; + } else if (avio_seek(pb, new_pos, SEEK_SET) < 0) { + goto quit; + } + } + } + if (buf_fill > BUF_SPILLOVER) { + memmove(buf, buf + buf_fill - BUF_SPILLOVER, BUF_SPILLOVER); + buf_fill = BUF_SPILLOVER; + } + } +quit: + return avio_seek(pb, original_pos, SEEK_SET); +} + static MOVFragmentStreamInfo * get_frag_stream_info( MOVFragmentIndex *frag_index, int index, @@ -9355,15 +9496,20 @@ static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom) } else { int64_t start_pos = avio_tell(pb); int64_t left; + uint8_t index_satisfied, at_end; int err = parse(c, pb, a); if (err < 0) { c->atom_depth --; return err; } - if (c->found_moov && c->found_mdat && a.size <= INT64_MAX - start_pos && - ((!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->frag_index.complete) || - start_pos + a.size == avio_size(pb))) { - if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->frag_index.complete) + index_satisfied = (!(pb->seekable & AVIO_SEEKABLE_NORMAL)) + || (c->fc->flags & AVFMT_FLAG_IGNIDX) + || c->frag_index.complete + || (c->found_mdat > 2 && (c->frag_index.nb_items == 0)); /* If we've read past 2 + mdats, with no fragments in fragment-index, we assume file is not fragmented */ + at_end = (start_pos + a.size) == avio_size(pb); + if (c->found_moov && c->found_mdat && (a.size <= INT64_MAX - start_pos) && (index_satisfied || at_end)) { + if (!at_end) c->next_root_atom = start_pos + a.size; c->atom_depth --; return 0; -- 2.39.2
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".