James Almer: > Since we can't blindly trust the keyframe flag in packets and assume its > contents are a valid Sync Sample, do some basic bitstream parsing to build the > Sync Sample table in addition to a Random Access Recovery Point table. > > Suggested-by: ffm...@fb.com > Signed-off-by: James Almer <jamr...@gmail.com> > --- > libavformat/movenc.c | 125 +++++++++++++++++++++++++++++++++-- > libavformat/movenc.h | 1 + > tests/ref/lavf-fate/h264.mp4 | 6 +- > 3 files changed, 123 insertions(+), 9 deletions(-) > > diff --git a/libavformat/movenc.c b/libavformat/movenc.c > index 57062f45c5..159e0261b7 100644 > --- a/libavformat/movenc.c > +++ b/libavformat/movenc.c > @@ -34,13 +34,17 @@ > #include "avc.h" > #include "libavcodec/ac3_parser_internal.h" > #include "libavcodec/dnxhddata.h" > +#include "libavcodec/h264.h" > +#include "libavcodec/h2645_parse.h" > #include "libavcodec/flac.h" > #include "libavcodec/get_bits.h" > +#include "libavcodec/golomb.h" > > #include "libavcodec/internal.h" > #include "libavcodec/put_bits.h" > #include "libavcodec/vc1_common.h" > #include "libavcodec/raw.h" > +#include "libavcodec/sei.h" > #include "internal.h" > #include "libavutil/avstring.h" > #include "libavutil/channel_layout.h" > @@ -2537,7 +2541,9 @@ static int mov_preroll_write_stbl_atoms(AVIOContext > *pb, MOVTrack *track) > if (!sgpd_entries) > return AVERROR(ENOMEM); > > - av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || > track->par->codec_id == AV_CODEC_ID_AAC); > + av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || > + track->par->codec_id == AV_CODEC_ID_AAC || > + track->par->codec_id == AV_CODEC_ID_H264); > > if (track->par->codec_id == AV_CODEC_ID_OPUS) { > for (i = 0; i < track->entry; i++) { > @@ -2545,7 +2551,7 @@ static int mov_preroll_write_stbl_atoms(AVIOContext > *pb, MOVTrack *track) > int distance = 0; > for (j = i - 1; j >= 0; j--) { > roll_samples_remaining -= get_cluster_duration(track, j); > - distance++; > + distance--; > if (roll_samples_remaining <= 0) > break; > } > @@ -2555,7 +2561,7 @@ static int mov_preroll_write_stbl_atoms(AVIOContext > *pb, MOVTrack *track) > if (roll_samples_remaining > 0) > distance = 0; > /* Verify distance is a maximum of 32 (2.5ms) packets. */ > - if (distance > 32) > + if (distance < 32) > return AVERROR_INVALIDDATA; > if (i && distance == sgpd_entries[entries].roll_distance) { > sgpd_entries[entries].count++; > @@ -2566,10 +2572,22 @@ static int mov_preroll_write_stbl_atoms(AVIOContext > *pb, MOVTrack *track) > sgpd_entries[entries].group_description_index = distance ? > ++group : 0; > } > } > + } else if (track->par->codec_id == AV_CODEC_ID_H264) { > + for (i = 0; i < track->entry; i++) { > + int distance = track->cluster[i].roll_distance; > + if (i && distance == sgpd_entries[entries].roll_distance) { > + sgpd_entries[entries].count++; > + } else { > + entries++; > + sgpd_entries[entries].count = 1; > + sgpd_entries[entries].roll_distance = distance; > + sgpd_entries[entries].group_description_index = distance ? > ++group : 0; > + } > + } > } else { > entries++; > sgpd_entries[entries].count = track->sample_count; > - sgpd_entries[entries].roll_distance = 1; > + sgpd_entries[entries].roll_distance = -1;
You seem to be negate roll_distance in this patch; shouldn't this be a separate patch? > sgpd_entries[entries].group_description_index = ++group; > } > entries++; > @@ -2588,7 +2606,7 @@ static int mov_preroll_write_stbl_atoms(AVIOContext > *pb, MOVTrack *track) > avio_wb32(pb, group); /* entry_count */ > for (i = 0; i < entries; i++) { > if (sgpd_entries[i].group_description_index) { > - avio_wb16(pb, -sgpd_entries[i].roll_distance); /* roll_distance > */ > + avio_wb16(pb, sgpd_entries[i].roll_distance); /* roll_distance */ > } > } > > @@ -2639,7 +2657,9 @@ static int mov_write_stbl_tag(AVFormatContext *s, > AVIOContext *pb, MOVMuxContext > if (track->cenc.aes_ctr) { > ff_mov_cenc_write_stbl_atoms(&track->cenc, pb); > } > - if (track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == > AV_CODEC_ID_AAC) { > + if (track->par->codec_id == AV_CODEC_ID_OPUS || > + track->par->codec_id == AV_CODEC_ID_AAC || > + track->par->codec_id == AV_CODEC_ID_H264) { > mov_preroll_write_stbl_atoms(pb, track); > } > return update_size(pb, pos); > @@ -5150,6 +5170,96 @@ static int mov_parse_mpeg2_frame(AVPacket *pkt, > uint32_t *flags) > return 0; > } > > +static int mov_parse_h264_frame(AVPacket *pkt, MOVTrack *trk) > +{ > + GetBitContext gb; > + const uint8_t *buf = pkt->data; > + const uint8_t *buf_end = pkt->data + pkt->size; > + uint32_t state = -1; > + unsigned roll_distance = 0; > + int nal_length_size = 0, nalsize; > + int idr = 0; > + > + if (!pkt->size) > + return 0; > + if (trk->vos_data && trk->vos_data[0] == 1) { > + if (trk->vos_len < 5) > + return 0; > + nal_length_size = (trk->vos_data[4] & 0x03) + 1; > + } > + > + while (buf_end - buf >= 4) { > + if (nal_length_size) { > + int i = 0; > + nalsize = get_nalsize(nal_length_size, buf, buf_end - buf, &i, > NULL); > + if (nalsize < 0) > + break; > + state = buf[nal_length_size]; > + buf += nal_length_size + 1; > + } else { > + buf = avpriv_find_start_code(buf, buf_end, &state); This checks the whole packet for start codes. But given that we have to convert annex B input to ISOBMFF format anyway, this separate codepath seems avoidable by working with already reformatted data here. (In any case: In case this is not an IDR access unit you really check the whole packet, even when you have already encountered a VLC NALU despite SEIs having to be in front of them.) > + if (buf >= buf_end) > + break; > + } > + > + switch (state & 0x1f) { > + case H264_NAL_IDR_SLICE: > + idr = 1; > + goto end; > + case H264_NAL_SEI: > + init_get_bits8(&gb, buf, buf_end - buf); Weird that you checked the init_get_bits8() below but not this one. > + do { > + GetBitContext gb_payload; > + int ret, type = 0, size = 0; > + > + do { > + if (get_bits_left(&gb) < 8) > + goto end; > + type += show_bits(&gb, 8); > + } while (get_bits(&gb, 8) == 255); > + do { > + if (get_bits_left(&gb) < 8) > + goto end; > + size += show_bits(&gb, 8); > + } while (get_bits(&gb, 8) == 255); > + > + if (size > get_bits_left(&gb) / 8) > + goto end; > + > + ret = init_get_bits8(&gb_payload, gb.buffer + > get_bits_count(&gb) / 8, size); > + if (ret < 0) > + goto end; > + > + switch (type) { > + case SEI_TYPE_RECOVERY_POINT: > + roll_distance = get_ue_golomb_long(&gb_payload); > + break; > + default: > + break; > + } > + skip_bits_long(&gb, 8 * size); > + } while (get_bits_left(&gb) > 0 && show_bits(&gb, 8) != 0x80); 1. You are using the outer bitreader gb only for byte-aligned reads/skips; better read bytes directly. 2. You are not unescaping the buffer: The SEI size is the size of an unescaped SEI (after 0x03 has been stripped away). Knowing the size in advance comes in really handy for this; this is where only working with mp4-formatted data brings benefits. > + break; > + default: > + break; > + } > + if (nal_length_size) > + buf += nalsize - 1; > + } > + > +end: > + if (roll_distance != (int16_t)roll_distance) The H.264 spec restricts this field to values in the range 0..MaxFrameNum - 1, where the latter can be UINT16_MAX. So I don't understand why you are using a signed int16_t here and as type in MOVIentry. > + roll_distance = 0; > + trk->cluster[trk->entry].roll_distance = roll_distance; > + > + if (idr) { > + trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE; > + trk->has_keyframes++; > + } > + > + return 0; > +} > + > static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk) > { > const uint8_t *start, *next, *end = pkt->data + pkt->size; > @@ -5740,6 +5850,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket > *pkt) > trk->cluster[trk->entry].entries = samples_in_chunk; > trk->cluster[trk->entry].dts = pkt->dts; > trk->cluster[trk->entry].pts = pkt->pts; > + trk->cluster[trk->entry].roll_distance = 0; > if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) { > if (!trk->frag_discont) { > /* First packet of a new fragment. We already wrote the duration > @@ -5821,6 +5932,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket > *pkt) > > if (par->codec_id == AV_CODEC_ID_VC1) { > mov_parse_vc1_frame(pkt, trk); > + } else if (par->codec_id == AV_CODEC_ID_H264) { > + mov_parse_h264_frame(pkt, trk); > } else if (par->codec_id == AV_CODEC_ID_TRUEHD) { > mov_parse_truehd_frame(pkt, trk); > } else if (pkt->flags & AV_PKT_FLAG_KEY) { > diff --git a/libavformat/movenc.h b/libavformat/movenc.h > index af1ea0bce6..73bf73ce8f 100644 > --- a/libavformat/movenc.h > +++ b/libavformat/movenc.h > @@ -56,6 +56,7 @@ typedef struct MOVIentry { > #define MOV_PARTIAL_SYNC_SAMPLE 0x0002 > #define MOV_DISPOSABLE_SAMPLE 0x0004 > uint32_t flags; > + int16_t roll_distance; > AVProducerReferenceTime prft; > } MOVIentry; > > diff --git a/tests/ref/lavf-fate/h264.mp4 b/tests/ref/lavf-fate/h264.mp4 > index a9c3823c2c..c08ee4c7ae 100644 > --- a/tests/ref/lavf-fate/h264.mp4 > +++ b/tests/ref/lavf-fate/h264.mp4 > @@ -1,3 +1,3 @@ > -fe299ea5205b71a48281f917b1256a5d *tests/data/lavf-fate/lavf.h264.mp4 > -547928 tests/data/lavf-fate/lavf.h264.mp4 > -tests/data/lavf-fate/lavf.h264.mp4 CRC=0x9da2c999 > +2812f617314d23474fcb23898b8a56ab *tests/data/lavf-fate/lavf.h264.mp4 > +548084 tests/data/lavf-fate/lavf.h264.mp4 > +tests/data/lavf-fate/lavf.h264.mp4 CRC=0x396f0829 > I guess this filesize increase is due to the SyncSample table? What exactly has been written before this patch, what gets written with it? - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".