James Almer: > On 7/28/2021 7:43 PM, Andreas Rheinhardt wrote: >> James Almer: >>> Since we can't blindly trust the keyframe flag in packets and assume its >>> contents are a valid Sync Sample, do some basic bitstream parsing to >>> build the >>> Sync Sample table in addition to a Random Access Recovery Point table. >>> >>> Suggested-by: ffm...@fb.com >>> Signed-off-by: James Almer <jamr...@gmail.com> >>> --- >>> libavformat/movenc.c | 125 +++++++++++++++++++++++++++++++++-- >>> libavformat/movenc.h | 1 + >>> tests/ref/lavf-fate/h264.mp4 | 6 +- >>> 3 files changed, 123 insertions(+), 9 deletions(-) >>> >>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c >>> index 57062f45c5..159e0261b7 100644 >>> --- a/libavformat/movenc.c >>> +++ b/libavformat/movenc.c >>> @@ -34,13 +34,17 @@ >>> #include "avc.h" >>> #include "libavcodec/ac3_parser_internal.h" >>> #include "libavcodec/dnxhddata.h" >>> +#include "libavcodec/h264.h" >>> +#include "libavcodec/h2645_parse.h" >>> #include "libavcodec/flac.h" >>> #include "libavcodec/get_bits.h" >>> +#include "libavcodec/golomb.h" >>> #include "libavcodec/internal.h" >>> #include "libavcodec/put_bits.h" >>> #include "libavcodec/vc1_common.h" >>> #include "libavcodec/raw.h" >>> +#include "libavcodec/sei.h" >>> #include "internal.h" >>> #include "libavutil/avstring.h" >>> #include "libavutil/channel_layout.h" >>> @@ -2537,7 +2541,9 @@ static int >>> mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) >>> if (!sgpd_entries) >>> return AVERROR(ENOMEM); >>> - av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || >>> track->par->codec_id == AV_CODEC_ID_AAC); >>> + av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || >>> + track->par->codec_id == AV_CODEC_ID_AAC || >>> + track->par->codec_id == AV_CODEC_ID_H264); >>> if (track->par->codec_id == AV_CODEC_ID_OPUS) { >>> for (i = 0; i < track->entry; i++) { >>> @@ -2545,7 +2551,7 @@ static int >>> mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) >>> int distance = 0; >>> for (j = i - 1; j >= 0; j--) { >>> roll_samples_remaining -= >>> get_cluster_duration(track, j); >>> - distance++; >>> + distance--; >>> if (roll_samples_remaining <= 0) >>> break; >>> } >>> @@ -2555,7 +2561,7 @@ static int >>> mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) >>> if (roll_samples_remaining > 0) >>> distance = 0; >>> /* Verify distance is a maximum of 32 (2.5ms) packets. */ >>> - if (distance > 32) >>> + if (distance < 32) >>> return AVERROR_INVALIDDATA; >>> if (i && distance == >>> sgpd_entries[entries].roll_distance) { >>> sgpd_entries[entries].count++; >>> @@ -2566,10 +2572,22 @@ static int >>> mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) >>> sgpd_entries[entries].group_description_index = >>> distance ? ++group : 0; >>> } >>> } >>> + } else if (track->par->codec_id == AV_CODEC_ID_H264) { >>> + for (i = 0; i < track->entry; i++) { >>> + int distance = track->cluster[i].roll_distance; >>> + if (i && distance == sgpd_entries[entries].roll_distance) { >>> + sgpd_entries[entries].count++; >>> + } else { >>> + entries++; >>> + sgpd_entries[entries].count = 1; >>> + sgpd_entries[entries].roll_distance = distance; >>> + sgpd_entries[entries].group_description_index = >>> distance ? ++group : 0; >>> + } >>> + } >>> } else { >>> entries++; >>> sgpd_entries[entries].count = track->sample_count; >>> - sgpd_entries[entries].roll_distance = 1; >>> + sgpd_entries[entries].roll_distance = -1; >> >> You seem to be negate roll_distance in this patch; shouldn't this be a >> separate patch? > > I'll split it. > >> >>> sgpd_entries[entries].group_description_index = ++group; >>> } >>> entries++; >>> @@ -2588,7 +2606,7 @@ static int >>> mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) >>> avio_wb32(pb, group); /* entry_count */ >>> for (i = 0; i < entries; i++) { >>> if (sgpd_entries[i].group_description_index) { >>> - avio_wb16(pb, -sgpd_entries[i].roll_distance); /* >>> roll_distance */ >>> + avio_wb16(pb, sgpd_entries[i].roll_distance); /* >>> roll_distance */ >>> } >>> } >>> @@ -2639,7 +2657,9 @@ static int mov_write_stbl_tag(AVFormatContext >>> *s, AVIOContext *pb, MOVMuxContext >>> if (track->cenc.aes_ctr) { >>> ff_mov_cenc_write_stbl_atoms(&track->cenc, pb); >>> } >>> - if (track->par->codec_id == AV_CODEC_ID_OPUS || >>> track->par->codec_id == AV_CODEC_ID_AAC) { >>> + if (track->par->codec_id == AV_CODEC_ID_OPUS || >>> + track->par->codec_id == AV_CODEC_ID_AAC || >>> + track->par->codec_id == AV_CODEC_ID_H264) { >>> mov_preroll_write_stbl_atoms(pb, track); >>> } >>> return update_size(pb, pos); >>> @@ -5150,6 +5170,96 @@ static int mov_parse_mpeg2_frame(AVPacket >>> *pkt, uint32_t *flags) >>> return 0; >>> } >>> +static int mov_parse_h264_frame(AVPacket *pkt, MOVTrack *trk) >>> +{ >>> + GetBitContext gb; >>> + const uint8_t *buf = pkt->data; >>> + const uint8_t *buf_end = pkt->data + pkt->size; >>> + uint32_t state = -1; >>> + unsigned roll_distance = 0; >>> + int nal_length_size = 0, nalsize; >>> + int idr = 0; >>> + >>> + if (!pkt->size) >>> + return 0; >>> + if (trk->vos_data && trk->vos_data[0] == 1) { >>> + if (trk->vos_len < 5) >>> + return 0; >>> + nal_length_size = (trk->vos_data[4] & 0x03) + 1; >>> + } >>> + >>> + while (buf_end - buf >= 4) { >>> + if (nal_length_size) { >>> + int i = 0; >>> + nalsize = get_nalsize(nal_length_size, buf, buf_end - >>> buf, &i, NULL); >>> + if (nalsize < 0) >>> + break; >>> + state = buf[nal_length_size]; >>> + buf += nal_length_size + 1; >>> + } else { >>> + buf = avpriv_find_start_code(buf, buf_end, &state); >> >> This checks the whole packet for start codes. But given that we have to >> convert annex B input to ISOBMFF format anyway, this separate codepath >> seems avoidable by working with already reformatted data here. >> (In any case: In case this is not an IDR access unit you really check >> the whole packet, even when you have already encountered a VLC NALU >> despite SEIs having to be in front of them.) > > I'll look into it. > >> >>> + if (buf >= buf_end) >>> + break; >>> + } >>> + >>> + switch (state & 0x1f) { >>> + case H264_NAL_IDR_SLICE: >>> + idr = 1; >>> + goto end; >>> + case H264_NAL_SEI: >>> + init_get_bits8(&gb, buf, buf_end - buf); >> >> Weird that you checked the init_get_bits8() below but not this one. > > The one below was copy-pasted. Will also check this one. > >> >>> + do { >>> + GetBitContext gb_payload; >>> + int ret, type = 0, size = 0; >>> + >>> + do { >>> + if (get_bits_left(&gb) < 8) >>> + goto end; >>> + type += show_bits(&gb, 8); >>> + } while (get_bits(&gb, 8) == 255); >>> + do { >>> + if (get_bits_left(&gb) < 8) >>> + goto end; >>> + size += show_bits(&gb, 8); >>> + } while (get_bits(&gb, 8) == 255); >>> + >>> + if (size > get_bits_left(&gb) / 8) >>> + goto end; >>> + >>> + ret = init_get_bits8(&gb_payload, gb.buffer + >>> get_bits_count(&gb) / 8, size); >>> + if (ret < 0) >>> + goto end; >>> + >>> + switch (type) { >>> + case SEI_TYPE_RECOVERY_POINT: >>> + roll_distance = get_ue_golomb_long(&gb_payload); >>> + break; >>> + default: >>> + break; >>> + } >>> + skip_bits_long(&gb, 8 * size); >>> + } while (get_bits_left(&gb) > 0 && show_bits(&gb, 8) != >>> 0x80); >> >> 1. You are using the outer bitreader gb only for byte-aligned >> reads/skips; better read bytes directly. > > Using GetBitContext is cleaner and clearer IMO, but I could use > GetByteContext instead i guess. I'd rather not work directly on bytes. >
I think we disagree on that: I always hate it when I have to use a bitreader in case the position is known to be byte-aligned. >> 2. You are not unescaping the buffer: The SEI size is the size of an >> unescaped SEI (after 0x03 has been stripped away). Knowing the size in >> advance comes in really handy for this; this is where only working with >> mp4-formatted data brings benefits. > > A SEI NALu may have more than one SEI message, and the Recovery Point > may not be the first. If i can't trust the size read in the loop above > because it does not take into account the emulation prevention bytes, > how can i feasibly skip each SEI message within a give NALU? > By unescaping the SEI NALU first. (Alternatively, one could write a bitreader (necessarily cached) that unescapes internally, but that is more effort; and it would be slower with bigger NALUs, so the fuzzer will probably report some new timeouts.) - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".