Pierre-Anthony Lemieux: > On Sun, Jan 30, 2022 at 2:16 PM Andreas Rheinhardt > <andreas.rheinha...@outlook.com> wrote: >> >> p...@sandflow.com: >>> From: Pierre-Anthony Lemieux <p...@palemieux.com> >>> >>> The IMF demuxer does not set the DTS and PTS of packets accurately in all >>> scenarios. Moreover, audio packets are not trimmed when they exceed the >>> duration of the underlying resource. >>> >>> Closes https://trac.ffmpeg.org/ticket/9611 >>> >>> --- >>> libavformat/imfdec.c | 225 +++++++++++++++++++++++++------------------ >>> 1 file changed, 132 insertions(+), 93 deletions(-) >>> >>> diff --git a/libavformat/imfdec.c b/libavformat/imfdec.c >>> index 6b50b582f6..05dcb6ff31 100644 >>> --- a/libavformat/imfdec.c >>> +++ b/libavformat/imfdec.c >>> @@ -65,6 +65,7 @@ >>> #include "avio_internal.h" >>> #include "imf.h" >>> #include "internal.h" >>> +#include "libavcodec/packet.h" >>> #include "libavutil/avstring.h" >>> #include "libavutil/bprint.h" >>> #include "libavutil/opt.h" >>> @@ -97,6 +98,9 @@ typedef struct IMFVirtualTrackResourcePlaybackCtx { >>> IMFAssetLocator *locator; >>> FFIMFTrackFileResource *resource; >>> AVFormatContext *ctx; >>> + AVRational start_time; >>> + AVRational end_time; >>> + AVRational ts_offset; >>> } IMFVirtualTrackResourcePlaybackCtx; >>> >>> typedef struct IMFVirtualTrackPlaybackCtx { >>> @@ -108,7 +112,6 @@ typedef struct IMFVirtualTrackPlaybackCtx { >>> IMFVirtualTrackResourcePlaybackCtx *resources; /**< Buffer holding the >>> resources */ >>> int32_t current_resource_index; /**< Index of the >>> current resource in resources, >>> or < 0 if a >>> current resource has yet to be selected */ >>> - int64_t last_pts; /**< Last timestamp */ >>> } IMFVirtualTrackPlaybackCtx; >>> >>> typedef struct IMFContext { >>> @@ -342,6 +345,7 @@ static int open_track_resource_context(AVFormatContext >>> *s, >>> int ret = 0; >>> int64_t entry_point; >>> AVDictionary *opts = NULL; >>> + AVStream *st; >>> >>> if (track_resource->ctx) { >>> av_log(s, >>> @@ -383,23 +387,28 @@ static int >>> open_track_resource_context(AVFormatContext *s, >>> } >>> av_dict_free(&opts); >>> >>> - /* Compare the source timebase to the resource edit rate, >>> - * considering the first stream of the source file >>> - */ >>> - if (av_cmp_q(track_resource->ctx->streams[0]->time_base, >>> - av_inv_q(track_resource->resource->base.edit_rate))) >>> + /* make sure there is only one stream in the file */ >>> + >>> + if (track_resource->ctx->nb_streams != 1) { >>> + ret = AVERROR_INVALIDDATA; >>> + goto cleanup; >>> + } >>> + >>> + st = track_resource->ctx->streams[0]; >>> + >>> + /* Warn if the resource time base does not match the file time base */ >>> + if (av_cmp_q(st->time_base, >>> av_inv_q(track_resource->resource->base.edit_rate))) >>> av_log(s, >>> AV_LOG_WARNING, >>> - "Incoherent source stream timebase %d/%d regarding resource >>> edit rate: %d/%d", >>> - track_resource->ctx->streams[0]->time_base.num, >>> - track_resource->ctx->streams[0]->time_base.den, >>> + "Incoherent source stream timebase " AVRATIONAL_FORMAT >>> + "regarding resource edit rate: " AVRATIONAL_FORMAT, >>> + st->time_base.num, >>> + st->time_base.den, >>> track_resource->resource->base.edit_rate.den, >>> track_resource->resource->base.edit_rate.num); >>> >>> - entry_point = (int64_t)track_resource->resource->base.entry_point >>> - * track_resource->resource->base.edit_rate.den >>> - * AV_TIME_BASE >>> - / track_resource->resource->base.edit_rate.num; >>> + entry_point = av_rescale_q(track_resource->resource->base.entry_point, >>> st->time_base, >>> + >>> av_inv_q(track_resource->resource->base.edit_rate)); >>> >>> if (entry_point) { >>> av_log(s, >>> @@ -407,7 +416,7 @@ static int open_track_resource_context(AVFormatContext >>> *s, >>> "Seek at resource %s entry point: %" PRIu32 "\n", >>> track_resource->locator->absolute_uri, >>> track_resource->resource->base.entry_point); >>> - ret = avformat_seek_file(track_resource->ctx, -1, entry_point, >>> entry_point, entry_point, 0); >>> + ret = avformat_seek_file(track_resource->ctx, 0, entry_point, >>> entry_point, entry_point, 0); >>> if (ret < 0) { >>> av_log(s, >>> AV_LOG_ERROR, >>> @@ -470,11 +479,16 @@ static int open_track_file_resource(AVFormatContext >>> *s, >>> vt_ctx.locator = asset_locator; >>> vt_ctx.resource = track_file_resource; >>> vt_ctx.ctx = NULL; >>> - track->resources[track->resource_count++] = vt_ctx; >>> - track->duration = av_add_q(track->duration, >>> + vt_ctx.start_time = track->duration; >>> + vt_ctx.ts_offset = av_sub_q(vt_ctx.start_time, >>> + >>> av_div_q(av_make_q((int)track_file_resource->base.entry_point, 1), >>> + >>> track_file_resource->base.edit_rate)); >>> + vt_ctx.end_time = av_add_q(track->duration, >>> >>> av_make_q((int)track_file_resource->base.duration >>> * >>> track_file_resource->base.edit_rate.den, >>> >>> track_file_resource->base.edit_rate.num)); >>> + track->resources[track->resource_count++] = vt_ctx; >>> + track->duration = vt_ctx.end_time; >>> } >>> >>> return 0; >>> @@ -701,11 +715,14 @@ static IMFVirtualTrackPlaybackCtx >>> *get_next_track_with_minimum_timestamp(AVForma >>> return track; >>> } >>> >>> -static IMFVirtualTrackResourcePlaybackCtx >>> *get_resource_context_for_timestamp(AVFormatContext *s, >>> - >>> IMFVirtualTrackPlaybackCtx *track) >>> +static int get_resource_context_for_timestamp(AVFormatContext *s, >>> IMFVirtualTrackPlaybackCtx *track, IMFVirtualTrackResourcePlaybackCtx >>> **resource) >>> { >>> - AVRational edit_unit_duration = >>> av_inv_q(track->resources[0].resource->base.edit_rate); >>> - AVRational cumulated_duration = av_make_q(0, edit_unit_duration.den); >>> + *resource = NULL; >>> + >>> + if (av_cmp_q(track->current_timestamp, track->duration) >= 0) { >>> + av_log(s, AV_LOG_DEBUG, "Reached the end of the virtual track\n"); >>> + return AVERROR_EOF; >>> + } >>> >>> av_log(s, >>> AV_LOG_DEBUG, >>> @@ -714,119 +731,141 @@ static IMFVirtualTrackResourcePlaybackCtx >>> *get_resource_context_for_timestamp(AV >>> av_q2d(track->current_timestamp), >>> av_q2d(track->duration)); >>> for (uint32_t i = 0; i < track->resource_count; ++i) { >>> - cumulated_duration = av_add_q(cumulated_duration, >>> - >>> av_make_q((int)track->resources[i].resource->base.duration >>> - * >>> edit_unit_duration.num, >>> - edit_unit_duration.den)); >>> >>> - if (av_cmp_q(av_add_q(track->current_timestamp, >>> edit_unit_duration), cumulated_duration) <= 0) { >>> + if (av_cmp_q(track->resources[i].end_time, >>> track->current_timestamp) > 0) { >>> av_log(s, >>> AV_LOG_DEBUG, >>> - "Found resource %d in track %d to read for timestamp >>> %lf " >>> - "(on cumulated=%lf): entry=%" PRIu32 >>> + "Found resource %d in track %d to read at timestamp >>> %lf: " >>> + "entry=%" PRIu32 >>> ", duration=%" PRIu32 >>> - ", editrate=" AVRATIONAL_FORMAT >>> - " | edit_unit_duration=%lf\n", >>> + ", editrate=" AVRATIONAL_FORMAT, >>> i, >>> track->index, >>> av_q2d(track->current_timestamp), >>> - av_q2d(cumulated_duration), >>> track->resources[i].resource->base.entry_point, >>> track->resources[i].resource->base.duration, >>> - >>> AVRATIONAL_ARG(track->resources[i].resource->base.edit_rate), >>> - av_q2d(edit_unit_duration)); >>> + >>> AVRATIONAL_ARG(track->resources[i].resource->base.edit_rate)); >>> >>> if (track->current_resource_index != i) { >>> + int ret; >>> + >>> av_log(s, >>> AV_LOG_DEBUG, >>> "Switch resource on track %d: re-open context\n", >>> track->index); >>> - if (open_track_resource_context(s, &(track->resources[i])) >>> != 0) >>> - return NULL; >>> + >>> + ret = open_track_resource_context(s, >>> &(track->resources[i])); >>> + if (ret != 0) >>> + return ret; >>> if (track->current_resource_index > 0) >>> >>> avformat_close_input(&track->resources[track->current_resource_index].ctx); >>> track->current_resource_index = i; >>> } >>> >>> - return &(track->resources[track->current_resource_index]); >>> + *resource = &(track->resources[track->current_resource_index]); >>> + return 0; >>> } >>> } >>> - return NULL; >>> + >>> + av_log(s, AV_LOG_ERROR, "Could not find IMF track resource to read\n"); >>> + return AVERROR_STREAM_NOT_FOUND; >>> +} >>> + >>> +static int imf_time_to_ts(int64_t *ts, AVRational t, AVRational time_base) >>> +{ >>> + int dst_num; >>> + int dst_den; >>> + AVRational r; >>> + >>> + r = av_div_q(t, time_base); >>> + >>> + if ((av_reduce(&dst_num, &dst_den, r.num, r.den, INT64_MAX) != 1)) >>> + return 0; >>> + >>> + if (dst_den != 1) >>> + return 0; >>> + >>> + *ts = dst_num; >>> + >>> + return 1; >>> } >>> >>> static int imf_read_packet(AVFormatContext *s, AVPacket *pkt) >>> { >>> - IMFContext *c = s->priv_data; >>> - IMFVirtualTrackResourcePlaybackCtx *resource_to_read = NULL; >>> - AVRational edit_unit_duration; >>> + IMFVirtualTrackResourcePlaybackCtx *resource = NULL; >>> int ret = 0; >>> IMFVirtualTrackPlaybackCtx *track; >>> - FFStream *track_stream; >>> + int64_t delta_ts; >>> + AVStream *st; >>> + AVRational next_timestamp; >>> >>> track = get_next_track_with_minimum_timestamp(s); >>> >>> - if (av_cmp_q(track->current_timestamp, track->duration) == 0) >>> - return AVERROR_EOF; >>> + ret = get_resource_context_for_timestamp(s, track, &resource); >>> + if (ret) >>> + return ret; >>> >>> - resource_to_read = get_resource_context_for_timestamp(s, track); >>> + ret = av_read_frame(resource->ctx, pkt); >>> + if (ret) { >>> + av_log(s, AV_LOG_ERROR, "Failed to read frame\n"); >>> + return ret; >>> + } >>> >>> - if (!resource_to_read) { >>> - edit_unit_duration >>> - = >>> av_inv_q(track->resources[track->current_resource_index].resource->base.edit_rate); >>> + av_log(s, AV_LOG_DEBUG, "Got packet: pts=%" PRId64 ", dts=%" PRId64 >>> + ", duration=%" PRId64 ", stream_index=%d, pos=%" PRId64 >>> + ", time_base=" AVRATIONAL_FORMAT "\n", pkt->pts, pkt->dts, >>> pkt->duration, >>> + pkt->stream_index, pkt->pos, pkt->time_base.num, >>> pkt->time_base.den); >>> >>> - if (av_cmp_q(av_add_q(track->current_timestamp, >>> edit_unit_duration), track->duration) > 0) >>> - return AVERROR_EOF; >>> + /* IMF resources contain only one stream */ >>> >>> - av_log(s, AV_LOG_ERROR, "Could not find IMF track resource to >>> read\n"); >>> - return AVERROR_STREAM_NOT_FOUND; >>> - } >>> + if (pkt->stream_index != 0) >>> + return AVERROR_INVALIDDATA; >>> + st = resource->ctx->streams[0]; >>> >>> - while (!ff_check_interrupt(c->interrupt_callback) && !ret) { >>> - ret = av_read_frame(resource_to_read->ctx, pkt); >>> - av_log(s, >>> - AV_LOG_DEBUG, >>> - "Got packet: pts=%" PRId64 >>> - ", dts=%" PRId64 >>> - ", duration=%" PRId64 >>> - ", stream_index=%d, pos=%" PRId64 >>> - "\n", >>> - pkt->pts, >>> - pkt->dts, >>> - pkt->duration, >>> - pkt->stream_index, >>> - pkt->pos); >>> - >>> - track_stream = ffstream(s->streams[track->index]); >>> - if (ret >= 0) { >>> - /* Update packet info from track */ >>> - if (pkt->dts < track_stream->cur_dts && track->last_pts > 0) >>> - pkt->dts = track_stream->cur_dts; >>> - >>> - pkt->pts = track->last_pts; >>> - pkt->dts = pkt->dts >>> - - >>> (int64_t)track->resources[track->current_resource_index].resource->base.entry_point; >>> - pkt->stream_index = track->index; >>> - >>> - /* Update track cursors */ >>> - track->current_timestamp >>> - = av_add_q(track->current_timestamp, >>> - av_make_q((int)pkt->duration >>> - * >>> resource_to_read->ctx->streams[0]->time_base.num, >>> - >>> resource_to_read->ctx->streams[0]->time_base.den)); >>> - track->last_pts += pkt->duration; >>> + pkt->stream_index = track->index; >>> >>> - return 0; >>> - } else if (ret != AVERROR_EOF) { >>> - av_log(s, >>> - AV_LOG_ERROR, >>> - "Could not get packet from track %d: %s\n", >>> - track->index, >>> - av_err2str(ret)); >>> - return ret; >>> + /* adjust the packet PTS and DTS based on the temporal position of the >>> resource within the timeline */ >>> + >>> + if ((imf_time_to_ts(&delta_ts, resource->ts_offset, st->time_base) == >>> 0)) >>> + av_log(s, AV_LOG_WARNING, "Incoherent time stamp " >>> AVRATIONAL_FORMAT " for time base " AVRATIONAL_FORMAT, >>> + resource->ts_offset.num, resource->ts_offset.den, >>> pkt->time_base.num, >>> + pkt->time_base.den); >>> + if (pkt->pts != AV_NOPTS_VALUE) >>> + pkt->pts += delta_ts; >>> + if (pkt->dts != AV_NOPTS_VALUE) >>> + pkt->dts += delta_ts; >>> + >>> + /* advance the track timestamp by the packet duration */ >>> + >>> + next_timestamp = av_add_q(track->current_timestamp, >>> + av_mul_q(av_make_q((int)pkt->duration, 1), >>> st->time_base)); >>> + >>> + /* if necessary, clamp the next timestamp to the end of the current >>> resource */ >>> + >>> + if (av_cmp_q(next_timestamp, resource->end_time) > 0) { >>> + >>> + next_timestamp = resource->end_time; >>> + >>> + /* shrink the packet duration */ >>> + >>> + if ((imf_time_to_ts(&pkt->duration, av_sub_q(resource->end_time, >>> track->current_timestamp), st->time_base) == 0)) >>> + av_log(s, AV_LOG_WARNING, "Incoherent time base during packet >>> duration calculation"); >>> + >>> + /* shrink the packet size itself for audio samples */ >>> + /* only AV_CODEC_ID_PCM_S24LE is supported in IMF */ >>> + >>> + if (st->codecpar->codec_id == AV_CODEC_ID_PCM_S24LE) { >>> + int bytes_per_sample = >>> av_get_exact_bits_per_sample(st->codecpar->codec_id) >> 3; >>> + int64_t nbsamples = av_rescale_q(pkt->duration, st->time_base, >>> av_make_q(1, st->codecpar->sample_rate)); >>> + av_shrink_packet(pkt, nbsamples * st->codecpar->channels * >>> bytes_per_sample); >>> + } else { >>> + av_log(s, AV_LOG_WARNING, "Cannot shrink packets for non-PCM >>> essence"); >> >> AV_PKT_DATA_SKIP_SAMPLES > > Ok. Would the "reason for end skip" be 1 (convergence)? >
I guess so given that this is not the end of the logical track. - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".