On date Sunday 2024-03-10 19:18:56 -0500, Marth64 wrote: > In MPEG-2 user data, there can be different types of Closed Captions > formats embedded (A53, SCTE-20, or DVD). The current behavior of the > CC extraction code in the MPEG-2 decoder is to not be aware of > multiple formats if multiple exist, therefore allowing one format > to overwrite the other during the extraction process since the CC > extraction shares one output buffer for the normalized bytes. > > This causes sources that have two CC formats to produce flawed output. > There exist real-world samples which contain both A53 and SCTE-20 captions > in the same MPEG-2 stream, and that manifest this problem. Example of symptom: > THANK YOU (expected) --> THTHANANK K YOYOUU (actual) > > The solution is to pick only the first CC substream observed with valid bytes, > and ignore the other types. Additionally, provide an option for users > to manually "force" a type in the event that this matters for a particular > source. > > In tandem, I am working on an improvement to src_movie to allow passing > decoder > options (as src_movie via lavfi is the "de facto" way to extract CCs right > now). > This way, users can realize the newly added option. > > Signed-off-by: Marth64 <mart...@proxyid.net> > --- > libavcodec/mpeg12dec.c | 49 +++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 46 insertions(+), 3 deletions(-) > > diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c > index 3a2f17e508..a42e1c661f 100644 > --- a/libavcodec/mpeg12dec.c > +++ b/libavcodec/mpeg12dec.c > @@ -62,6 +62,13 @@ > > #define A53_MAX_CC_COUNT 2000 > > +enum Mpeg2ClosedCaptionsFormat { > + CC_FORMAT_AUTO, > + CC_FORMAT_A53_PART4, > + CC_FORMAT_SCTE20, > + CC_FORMAT_DVD > +}; > + > typedef struct Mpeg1Context { > MpegEncContext mpeg_enc_ctx; > int mpeg_enc_ctx_allocated; /* true if decoding context allocated */ > @@ -70,6 +77,7 @@ typedef struct Mpeg1Context { > AVStereo3D stereo3d; > int has_stereo3d; > AVBufferRef *a53_buf_ref; > + enum Mpeg2ClosedCaptionsFormat cc_format; > uint8_t afd; > int has_afd; > int slice_count; > @@ -1908,7 +1916,8 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx, > { > Mpeg1Context *s1 = avctx->priv_data; > > - if (buf_size >= 6 && > + if ((!s1->cc_format || s1->cc_format == CC_FORMAT_A53_PART4) && > + buf_size >= 6 && > p[0] == 'G' && p[1] == 'A' && p[2] == '9' && p[3] == '4' && > p[4] == 3 && (p[5] & 0x40)) { > /* extract A53 Part 4 CC data */ > @@ -1927,9 +1936,15 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx, > memcpy(s1->a53_buf_ref->data + old_size, p + 7, cc_count * > UINT64_C(3)); > > avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS; > + > + if (!s1->cc_format) { > + s1->cc_format = CC_FORMAT_A53_PART4; > + av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is A53 > format\n"); > + } > } > return 1; > - } else if (buf_size >= 2 && > + } else if ((!s1->cc_format || s1->cc_format == CC_FORMAT_SCTE20) && > + buf_size >= 2 && > p[0] == 0x03 && (p[1]&0x7f) == 0x01) { > /* extract SCTE-20 CC data */ > GetBitContext gb; > @@ -1974,9 +1989,15 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx, > } > } > avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS; > + > + if (!s1->cc_format) { > + s1->cc_format = CC_FORMAT_SCTE20; > + av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is > SCTE-20 format\n"); > + } > } > return 1; > - } else if (buf_size >= 11 && > + } else if ((!s1->cc_format || s1->cc_format == CC_FORMAT_DVD) && > + buf_size >= 11 && > p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8) { > /* extract DVD CC data > * > @@ -2034,6 +2055,11 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx, > } > }
> avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS; > + > + if (!s1->cc_format) { > + s1->cc_format = CC_FORMAT_DVD; > + av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is DVD > format\n"); > + } nit: probably this might be factorized, either with a function or a macro > } > return 1; > } > @@ -2598,11 +2624,28 @@ const FFCodec ff_mpeg1video_decoder = { > }, > }; > > +#define M2V_OFFSET(x) offsetof(Mpeg1Context, x) > +#define M2V_PARAM AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM > + > +static const AVOption mpeg2video_options[] = { > + { "cc_format", "extract a specific Closed Captions format (0=auto)", > M2V_OFFSET(cc_format), AV_OPT_TYPE_INT, { .i64 = 0 }, CC_FORMAT_AUTO, > CC_FORMAT_DVD, M2V_PARAM }, > + { NULL } ideally this should be documented in the doc, but it is missing for this one so probably it's not needed [...] Patch looks good to me, but let's wait a few days in case there are more comments. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".