On date Monday 2014-08-11 15:22:59 +0200, Clément Bœsch encoded: > From: Clément Bœsch <clem...@stupeflix.com> > > The reasoning behind this addition is that various third party > applications are interested in getting some motion information out of a > video "for free" when it is available. > > It was considered to export other information as well (such as the intra > information about the block, or the quantization) but the structure > might have ended up into a half full-generic, half full of codec > specific cruft. If more information is necessary, it should either be > added in the "flags" field of the AVExportedMV structure, or in another > side-data. > > This commit also includes an example exporting them in a CSV stream. > --- > TODO: avcodec version bump & APIChanges entry > --- > .gitignore | 1 + > configure | 2 + > doc/Makefile | 1 + > doc/codecs.texi | 3 + > doc/examples/Makefile | 1 + > doc/examples/extract_mvs.c | 185 > +++++++++++++++++++++++++++++++++++++++++++++ > libavcodec/avcodec.h | 1 + > libavcodec/mpegvideo.c | 102 ++++++++++++++++++++++++- > libavcodec/options_table.h | 1 + > libavutil/frame.h | 6 ++
> libavutil/mvinfo.h | 49 ++++++++++++ You probably need to add this to the list of public headers in libavcodec/Makefile. > 11 files changed, 351 insertions(+), 1 deletion(-) > create mode 100644 doc/examples/extract_mvs.c > create mode 100644 libavutil/mvinfo.h > > diff --git a/.gitignore b/.gitignore > index cb370bb..480fbe0 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -39,6 +39,7 @@ > /doc/examples/avio_reading > /doc/examples/decoding_encoding > /doc/examples/demuxing_decoding > +/doc/examples/extract_mvs > /doc/examples/filter_audio > /doc/examples/filtering_audio > /doc/examples/filtering_video > diff --git a/configure b/configure > index 0ac6132..a93fc06 100755 > --- a/configure > +++ b/configure > @@ -1306,6 +1306,7 @@ EXAMPLE_LIST=" > avio_reading_example > decoding_encoding_example > demuxing_decoding_example > + extract_mvs_example > filter_audio_example > filtering_audio_example > filtering_video_example > @@ -2586,6 +2587,7 @@ zoompan_filter_deps="swscale" > avio_reading="avformat avcodec avutil" > avcodec_example_deps="avcodec avutil" > demuxing_decoding_example_deps="avcodec avformat avutil" > +extract_mvs_example_deps="avcodec avformat avutil" > filter_audio_example_deps="avfilter avutil" > filtering_audio_example_deps="avfilter avcodec avformat avutil" > filtering_video_example_deps="avfilter avcodec avformat avutil" > diff --git a/doc/Makefile b/doc/Makefile > index 99f588a..2fb9058 100644 > --- a/doc/Makefile > +++ b/doc/Makefile > @@ -39,6 +39,7 @@ DOCS = $(DOCS-yes) > DOC_EXAMPLES-$(CONFIG_AVIO_READING_EXAMPLE) += avio_reading > DOC_EXAMPLES-$(CONFIG_AVCODEC_EXAMPLE) += avcodec > DOC_EXAMPLES-$(CONFIG_DEMUXING_DECODING_EXAMPLE) += demuxing_decoding > +DOC_EXAMPLES-$(CONFIG_EXTRACT_MVS_EXAMPLE) += extract_mvs > DOC_EXAMPLES-$(CONFIG_FILTER_AUDIO_EXAMPLE) += filter_audio > DOC_EXAMPLES-$(CONFIG_FILTERING_AUDIO_EXAMPLE) += filtering_audio > DOC_EXAMPLES-$(CONFIG_FILTERING_VIDEO_EXAMPLE) += filtering_video > diff --git a/doc/codecs.texi b/doc/codecs.texi > index 1160e5d..7aaa229 100644 > --- a/doc/codecs.texi > +++ b/doc/codecs.texi > @@ -797,6 +797,9 @@ Frame data might be split into multiple chunks. > Show all frames before the first keyframe. > @item skiprd > Deprecated, use mpegvideo private options instead. > +@item export_mvs > +Export motion vectors into frame side-data (see @code{AV_FRAME_DATA_MV_INFO}) > +for codecs that support it. See also @file{doc/examples/export_mvs.c}. > @end table > > @item error @var{integer} (@emph{encoding,video}) > diff --git a/doc/examples/Makefile b/doc/examples/Makefile > index 03c7021..07251fe 100644 > --- a/doc/examples/Makefile > +++ b/doc/examples/Makefile > @@ -14,6 +14,7 @@ LDLIBS := $(shell pkg-config --libs $(FFMPEG_LIBS)) > $(LDLIBS) > EXAMPLES= avio_reading \ > decoding_encoding \ > demuxing_decoding \ > + extract_mvs \ > filtering_video \ > filtering_audio \ > metadata \ > diff --git a/doc/examples/extract_mvs.c b/doc/examples/extract_mvs.c > new file mode 100644 > index 0000000..69f76cd > --- /dev/null > +++ b/doc/examples/extract_mvs.c > @@ -0,0 +1,185 @@ > +/* > + * Copyright (c) 2012 Stefano Sabatini > + * Copyright (c) 2014 Clément Bœsch > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > deal > + * in the Software without restriction, including without limitation the > rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include <libavutil/mvinfo.h> > +#include <libavformat/avformat.h> > + > +static AVFormatContext *fmt_ctx = NULL; > +static AVCodecContext *video_dec_ctx = NULL; > +static AVStream *video_stream = NULL; > +static const char *src_filename = NULL; > + > +static int video_stream_idx = -1; > +static AVFrame *frame = NULL; > +static AVPacket pkt; > +static int video_frame_count = 0; > + > +static int decode_packet(int *got_frame, int cached) > +{ > + int decoded = pkt.size; > + > + *got_frame = 0; > + > + if (pkt.stream_index == video_stream_idx) { > + int ret = avcodec_decode_video2(video_dec_ctx, frame, got_frame, > &pkt); > + if (ret < 0) { > + fprintf(stderr, "Error decoding video frame (%s)\n", > av_err2str(ret)); > + return ret; > + } > + > + if (*got_frame) { > + int i; > + AVFrameSideData *sd; > + > + video_frame_count++; > + sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MV_INFO); > + if (sd) { > + const AVExportedMV *mvs = (const AVExportedMV *)sd->data; > + for (i = 0; i < sd->size / sizeof(*mvs); i++) { > + const AVExportedMV *mv = &mvs[i]; > + printf("%d,%2d,%2d,%2d,%4d,%4d,%4d,%4d,0x%016x\n", > + video_frame_count, mv->source, > + mv->w, mv->h, mv->src_x, mv->src_y, > + mv->dst_x, mv->dst_y, mv->flags); > + } > + } > + } > + } > + > + return decoded; > +} > + > +static int open_codec_context(int *stream_idx, > + AVFormatContext *fmt_ctx, enum AVMediaType > type) > +{ > + int ret; > + AVStream *st; > + AVCodecContext *dec_ctx = NULL; > + AVCodec *dec = NULL; > + AVDictionary *opts = NULL; > + > + ret = av_find_best_stream(fmt_ctx, type, -1, -1, NULL, 0); > + if (ret < 0) { > + fprintf(stderr, "Could not find %s stream in input file '%s'\n", > + av_get_media_type_string(type), src_filename); > + return ret; > + } else { > + *stream_idx = ret; > + st = fmt_ctx->streams[*stream_idx]; > + > + /* find decoder for the stream */ > + dec_ctx = st->codec; > + dec = avcodec_find_decoder(dec_ctx->codec_id); > + if (!dec) { > + fprintf(stderr, "Failed to find %s codec\n", > + av_get_media_type_string(type)); > + return AVERROR(EINVAL); > + } > + > + /* Init the video decoder */ > + av_dict_set(&opts, "flags2", "+export_mvs", 0); > + if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) { > + fprintf(stderr, "Failed to open %s codec\n", > + av_get_media_type_string(type)); > + return ret; > + } > + } > + > + return 0; > +} > + > +int main(int argc, char **argv) > +{ > + int ret = 0, got_frame; > + > + if (argc != 2) { > + fprintf(stderr, "Usage: %s <video>\n", argv[0]); > + exit(1); > + } > + src_filename = argv[1]; > + > + av_register_all(); > + > + if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0) { > + fprintf(stderr, "Could not open source file %s\n", src_filename); > + exit(1); > + } > + > + if (avformat_find_stream_info(fmt_ctx, NULL) < 0) { > + fprintf(stderr, "Could not find stream information\n"); > + exit(1); > + } > + > + if (open_codec_context(&video_stream_idx, fmt_ctx, AVMEDIA_TYPE_VIDEO) > >= 0) { > + video_stream = fmt_ctx->streams[video_stream_idx]; > + video_dec_ctx = video_stream->codec; > + } > + > + av_dump_format(fmt_ctx, 0, src_filename, 0); > + > + if (!video_stream) { > + fprintf(stderr, "Could not find video stream in the input, > aborting\n"); > + ret = 1; > + goto end; > + } > + > + frame = av_frame_alloc(); > + if (!frame) { > + fprintf(stderr, "Could not allocate frame\n"); > + ret = AVERROR(ENOMEM); > + goto end; > + } > + > + printf("framenum,source,blockw,blockh,srcx,srcy,dstx,dsty,flags\n"); > + > + /* initialize packet, set data to NULL, let the demuxer fill it */ > + av_init_packet(&pkt); > + pkt.data = NULL; > + pkt.size = 0; > + > + /* read frames from the file */ > + while (av_read_frame(fmt_ctx, &pkt) >= 0) { > + AVPacket orig_pkt = pkt; > + do { > + ret = decode_packet(&got_frame, 0); > + if (ret < 0) > + break; > + pkt.data += ret; > + pkt.size -= ret; > + } while (pkt.size > 0); > + av_free_packet(&orig_pkt); > + } > + > + /* flush cached frames */ > + pkt.data = NULL; > + pkt.size = 0; > + do { > + decode_packet(&got_frame, 1); > + } while (got_frame); > + > +end: > + avcodec_close(video_dec_ctx); > + avformat_close_input(&fmt_ctx); > + av_frame_free(&frame); > + return ret < 0; > +} Alternatively, hack demuxing_decoding.c since much code is shared (this has pluses - less code duplication, smaller maintainance cost, and cons - more complexity for the user). Also, do you think this could be exposed by ffprobe? > diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h > index 3b6a750..efe3dbd 100644 > --- a/libavcodec/avcodec.h > +++ b/libavcodec/avcodec.h > @@ -767,6 +767,7 @@ typedef struct RcOverride{ > > #define CODEC_FLAG2_CHUNKS 0x00008000 ///< Input bitstream might be > truncated at a packet boundaries instead of only at frame boundaries. > #define CODEC_FLAG2_SHOW_ALL 0x00400000 ///< Show all frames before the > first keyframe > +#define CODEC_FLAG2_EXPORT_MVS 0x10000000 ///< Export motion vectors > through frame side data > > /* Unsupported options : > * Syntax Arithmetic coding (SAC) > diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c > index 4672359..fe11089 100644 > --- a/libavcodec/mpegvideo.c > +++ b/libavcodec/mpegvideo.c > @@ -31,6 +31,7 @@ > #include "libavutil/avassert.h" > #include "libavutil/imgutils.h" > #include "libavutil/internal.h" > +#include "libavutil/mvinfo.h" > #include "libavutil/timer.h" > #include "avcodec.h" > #include "blockdsp.h" > @@ -596,7 +597,8 @@ static int alloc_picture_tables(MpegEncContext *s, > Picture *pic) > return AVERROR(ENOMEM); > } > > - if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv) { > + if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv || > + (s->avctx->flags2 & CODEC_FLAG2_EXPORT_MVS)) { > int mv_size = 2 * (b8_array_size + 4) * sizeof(int16_t); > int ref_index_size = 4 * mb_array_size; > > @@ -2104,6 +2106,24 @@ static void draw_arrow(uint8_t *buf, int sx, int sy, > int ex, > draw_line(buf, sx, sy, ex, ey, w, h, stride, color); > } > > +static int add_mb(AVExportedMV *mb, uint32_t mb_type, > + int dst_x, int dst_y, > + int src_x, int src_y, > + int direction) > +{ > + if (dst_x == src_x && dst_y == src_y) > + return 0; > + mb->w = IS_8X8(mb_type) || IS_8X16(mb_type) ? 8 : 16; > + mb->h = IS_8X8(mb_type) || IS_16X8(mb_type) ? 8 : 16; > + mb->src_x = src_x; > + mb->src_y = src_y; > + mb->dst_x = dst_x; > + mb->dst_y = dst_y; > + mb->source = direction ? 1 : -1; > + mb->flags = 0; // XXX: does mb_type contain extra information that could > be exported here? > + return 1; > +} > + > /** > * Print debugging info for the given picture. > */ > @@ -2112,6 +2132,86 @@ void ff_print_debug_info2(AVCodecContext *avctx, > AVFrame *pict, uint8_t *mbskip_ > int *low_delay, > int mb_width, int mb_height, int mb_stride, int > quarter_sample) > { > + if ((avctx->flags2 & CODEC_FLAG2_EXPORT_MVS) && mbtype_table && > motion_val[0]) { > + const int shift = 1 + quarter_sample; > + const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || > avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1; > + const int mv_stride = (mb_width << mv_sample_log2) + > + (avctx->codec->id == AV_CODEC_ID_H264 ? 0 > : 1); > + int mb_x, mb_y, mbcount = 0; > + > + /* width * height * directions * 4MB (4MB for IS_8x8) */ this comment together with the following "2 * 4" is confusing (especially for a naive reader - as me) > + AVExportedMV *mvs = av_malloc_array(mb_width * mb_height, 2 * 4 * > sizeof(AVExportedMV)); > + if (!mvs) > + return; > + > + for (mb_y = 0; mb_y < mb_height; mb_y++) { > + for (mb_x = 0; mb_x < mb_width; mb_x++) { > + int i, direction, mb_type = mbtype_table[mb_x + mb_y * > mb_stride]; > + for (direction = 0; direction < 2; direction++) { > + if (!USES_LIST(mb_type, direction)) > + continue; > + if (IS_8X8(mb_type)) { > + for (i = 0; i < 4; i++) { > + int sx = mb_x * 16 + 4 + 8 * (i & 1); > + int sy = mb_y * 16 + 4 + 8 * (i >> 1); > + int xy = (mb_x * 2 + (i & 1) + > + (mb_y * 2 + (i >> 1)) * mv_stride) << > (mv_sample_log2 - 1); > + int mx = (motion_val[direction][xy][0] >> shift) > + sx; > + int my = (motion_val[direction][xy][1] >> shift) > + sy; > + mbcount += add_mb(mvs + mbcount, mb_type, sx, > sy, mx, my, direction); > + } > + } else if (IS_16X8(mb_type)) { > + for (i = 0; i < 2; i++) { > + int sx = mb_x * 16 + 8; > + int sy = mb_y * 16 + 4 + 8 * i; > + int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) > << (mv_sample_log2 - 1); > + int mx = (motion_val[direction][xy][0] >> shift); > + int my = (motion_val[direction][xy][1] >> shift); > + > + if (IS_INTERLACED(mb_type)) > + my *= 2; > + > + mbcount += add_mb(mvs + mbcount, mb_type, sx, > sy, mx + sx, my + sy, direction); > + } > + } else if (IS_8X16(mb_type)) { > + for (i = 0; i < 2; i++) { > + int sx = mb_x * 16 + 4 + 8 * i; > + int sy = mb_y * 16 + 8; > + int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) > << (mv_sample_log2 - 1); > + int mx = motion_val[direction][xy][0] >> shift; > + int my = motion_val[direction][xy][1] >> shift; > + > + if (IS_INTERLACED(mb_type)) > + my *= 2; > + > + mbcount += add_mb(mvs + mbcount, mb_type, sx, > sy, mx + sx, my + sy, direction); > + } > + } else { > + int sx = mb_x * 16 + 8; > + int sy = mb_y * 16 + 8; > + int xy = (mb_x + mb_y * mv_stride) << > mv_sample_log2; > + int mx = (motion_val[direction][xy][0]>>shift) + > sx; > + int my = (motion_val[direction][xy][1]>>shift) + > sy; > + mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, > mx, my, direction); > + } > + } > + } > + } Uhm, duplicated non-trivial code, probably you can create a dedicated routine to factorize this with the code below in the function. > + if (mbcount) { > + AVFrameSideData *sd; > + > + av_log(avctx, AV_LOG_DEBUG, "Adding %d MVs info to frame %d\n", > mbcount, avctx->frame_number); > + sd = av_frame_new_side_data(pict, AV_FRAME_DATA_MV_INFO, mbcount > * sizeof(AVExportedMV)); > + if (!sd) > + return; > + memcpy(sd->data, mvs, mbcount * sizeof(AVExportedMV)); > + } > + > + av_freep(&mvs); > + } > + > + /* TODO: export all the following to make them accessible for users (and > filters) */ > if (avctx->hwaccel || !mbtype_table > || (avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)) > return; > diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h > index 2e9dfa0..7000531 100644 > --- a/libavcodec/options_table.h > +++ b/libavcodec/options_table.h > @@ -88,6 +88,7 @@ static const AVOption avcodec_options[] = { > {"local_header", "place global headers at every keyframe instead of in > extradata", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_LOCAL_HEADER }, > INT_MIN, INT_MAX, V|E, "flags2"}, > {"chunks", "Frame data might be split into multiple chunks", 0, > AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_CHUNKS }, INT_MIN, INT_MAX, V|D, > "flags2"}, > {"showall", "Show all frames before the first keyframe", 0, > AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_SHOW_ALL }, INT_MIN, INT_MAX, V|D, > "flags2"}, > +{"export_mvs", "export motion vectors through frame side data", 0, > AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_EXPORT_MVS}, INT_MIN, INT_MAX, V|D, > "flags2"}, > {"me_method", "set motion estimation method", OFFSET(me_method), > AV_OPT_TYPE_INT, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method"}, > {"zero", "zero motion estimation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = > ME_ZERO }, INT_MIN, INT_MAX, V|E, "me_method" }, > {"full", "full motion estimation (slowest)", 0, AV_OPT_TYPE_CONST, {.i64 = > ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" }, > diff --git a/libavutil/frame.h b/libavutil/frame.h > index dbbdd29..bd3be03 100644 > --- a/libavutil/frame.h > +++ b/libavutil/frame.h > @@ -87,6 +87,12 @@ enum AVFrameSideDataType { > * in ETSI TS 101 154 using AVActiveFormatDescription enum. > */ > AV_FRAME_DATA_AFD, > + /** > + * Motion vectors exported by some codecs (on demand through > + * -flags2 export_mvs). Nit, since this is library documentation: * Motion vectors exported by some codecs (on demand through the * export_mvs flag set in the libavcodec AVCodecContext flags2 * option). > + * The data is the AVExportedMV struct defined in libavutil/mvinfo.h. > + */ > + AV_FRAME_DATA_MV_INFO, Nit+: I'd prefer to color it as AV_FRAME_DATA_MV or AV_FRAME_DATA_MOTION_VECTOR, since "INFO" is really generic (and is never used in the other enum named constants). > }; > > enum AVActiveFormatDescription { > diff --git a/libavutil/mvinfo.h b/libavutil/mvinfo.h > new file mode 100644 > index 0000000..735f1b9 > --- /dev/null > +++ b/libavutil/mvinfo.h > @@ -0,0 +1,49 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef AVUTIL_MVINFO_H > +#define AVUTIL_MVINFO_H > + > +#include <stdint.h> > + > +typedef struct AVExportedMV { > + /** > + * Where the current comes from; negative value for past, positive value > future. > + * XXX: set exact relative ref frame reference instead of a +/- 1 > "direction". > + */ > + int32_t source; > + /** > + * Width and height of the block. > + */ > + uint8_t w, h; > + /** > + * Absolute source position. > + */ > + uint16_t src_x, src_y; > + /** > + * Absolute destination position. > + */ > + uint16_t dst_x, dst_y; > + /** > + * Extra flag information. > + * Currently unused. > + */ > + uint64_t flags; > +} AVExportedMV; > + > +#endif /* AVUTIL_MVINFO_H */ Nit++: AVMVInfo or simply AVMotionVector ("Exported" and "Info" are too generic). -- FFmpeg = Fostering and Forgiving Mysterious Proud Easy Generator _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel