[FFmpeg-cvslog] avcodec/utvideodec: add SIMD for restore_rgb_planes

2017-06-27 Thread Paul B Mahol
ffmpeg | branch: master | Paul B Mahol  | Mon Jun 26 21:31:12 
2017 +0200| [4ed7c2bbc3d04d5410433fd7038f076538e4a944] | committer: Paul B Mahol

avcodec/utvideodec: add SIMD for restore_rgb_planes

Signed-off-by: Paul B Mahol 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4ed7c2bbc3d04d5410433fd7038f076538e4a944
---

 libavcodec/Makefile  |   2 +-
 libavcodec/utvideo.h |   2 +
 libavcodec/utvideodec.c  |  53 +++-
 libavcodec/utvideodsp.c  |  82 +++
 libavcodec/utvideodsp.h  |  39 +++
 libavcodec/x86/Makefile  |   2 +
 libavcodec/x86/utvideodsp.asm| 103 +++
 libavcodec/x86/utvideodsp_init.c |  43 
 8 files changed, 279 insertions(+), 47 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index f0cba8843d..b440a00746 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -583,7 +583,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o 
ttaencdsp.o ttadata.o
 OBJS-$(CONFIG_TWINVQ_DECODER)  += twinvqdec.o twinvq.o
 OBJS-$(CONFIG_TXD_DECODER) += txd.o
 OBJS-$(CONFIG_ULTI_DECODER)+= ulti.o
-OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o
+OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o utvideodsp.o
 OBJS-$(CONFIG_UTVIDEO_ENCODER) += utvideoenc.o utvideo.o
 OBJS-$(CONFIG_V210_DECODER)+= v210dec.o
 OBJS-$(CONFIG_V210_ENCODER)+= v210enc.o
diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h
index 9559c831fe..a8117851a7 100644
--- a/libavcodec/utvideo.h
+++ b/libavcodec/utvideo.h
@@ -30,6 +30,7 @@
 #include "libavutil/common.h"
 #include "avcodec.h"
 #include "bswapdsp.h"
+#include "utvideodsp.h"
 #include "lossless_videodsp.h"
 #include "lossless_videoencdsp.h"
 
@@ -66,6 +67,7 @@ extern const int ff_ut_pred_order[5];
 typedef struct UtvideoContext {
 const AVClass *class;
 AVCodecContext *avctx;
+UTVideoDSPContext utdsp;
 BswapDSPContext bdsp;
 LLVidDSPContext llviddsp;
 LLVidEncDSPContext llvidencdsp;
diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index 0c6f89e83a..44841aaa65 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c
@@ -333,50 +333,6 @@ fail:
 return AVERROR_INVALIDDATA;
 }
 
-static void restore_rgb_planes(AVFrame *frame, int width, int height)
-{
-uint8_t *src_r = (uint8_t *)frame->data[2];
-uint8_t *src_g = (uint8_t *)frame->data[0];
-uint8_t *src_b = (uint8_t *)frame->data[1];
-uint8_t r, g, b;
-int i, j;
-
-for (j = 0; j < height; j++) {
-for (i = 0; i < width; i++) {
-r = src_r[i];
-g = src_g[i];
-b = src_b[i];
-src_r[i] = r + g - 0x80;
-src_b[i] = b + g - 0x80;
-}
-src_r += frame->linesize[2];
-src_g += frame->linesize[0];
-src_b += frame->linesize[1];
-}
-}
-
-static void restore_rgb_planes10(AVFrame *frame, int width, int height)
-{
-uint16_t *src_r = (uint16_t *)frame->data[2];
-uint16_t *src_g = (uint16_t *)frame->data[0];
-uint16_t *src_b = (uint16_t *)frame->data[1];
-int r, g, b;
-int i, j;
-
-for (j = 0; j < height; j++) {
-for (i = 0; i < width; i++) {
-r = src_r[i];
-g = src_g[i];
-b = src_b[i];
-src_r[i] = (r + g - 0x200) & 0x3FF;
-src_b[i] = (b + g - 0x200) & 0x3FF;
-}
-src_r += frame->linesize[2] / 2;
-src_g += frame->linesize[0] / 2;
-src_b += frame->linesize[1] / 2;
-}
-}
-
 #undef A
 #undef B
 #undef C
@@ -696,7 +652,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, 
int *got_frame,
 }
 }
 }
-restore_rgb_planes(frame.f, avctx->width, avctx->height);
+c->utdsp.restore_rgb_planes(frame.f->data[2], frame.f->data[0], 
frame.f->data[1],
+frame.f->linesize[2], 
frame.f->linesize[0], frame.f->linesize[1],
+avctx->width, avctx->height);
 break;
 case AV_PIX_FMT_GBRAP10:
 case AV_PIX_FMT_GBRP10:
@@ -709,7 +667,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, 
int *got_frame,
 if (ret)
 return ret;
 }
-restore_rgb_planes10(frame.f, avctx->width, avctx->height);
+c->utdsp.restore_rgb_planes10((uint16_t *)frame.f->data[2], (uint16_t 
*)frame.f->data[0], (uint16_t *)frame.f->data[1],
+  frame.f->linesize[2] / 2, 
frame.f->linesize[0] / 2, frame.f->linesize[1] / 2,
+  avctx->width, avctx->height);
 break;
 case AV_PIX_FMT_YUV420P:
 for (i = 0; i < 3; i++) {
@@ -830,6 +790,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
 c->avctx = avctx;
 
+ff_utvideo

[FFmpeg-cvslog] avformat: Fix Pro-MPEG non-square matrix

2017-06-27 Thread Andreas Håkon
ffmpeg | branch: master | Andreas Håkon  | Fri 
Jan 13 13:07:57 2017 +0100| [a29c7127297af7f72384cb2a96571853d16e6f82] | 
committer: Michael Niedermayer

avformat: Fix Pro-MPEG non-square matrix

Reviewed-by:vta...@mobibase.com
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a29c7127297af7f72384cb2a96571853d16e6f82
---

 libavformat/prompeg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/prompeg.c b/libavformat/prompeg.c
index cc1baa4ac0..9770a916a2 100644
--- a/libavformat/prompeg.c
+++ b/libavformat/prompeg.c
@@ -432,7 +432,7 @@ static int prompeg_write(URLContext *h, const uint8_t *buf, 
int size) {
 
 // FEC (column) send block-aligned
 if (!s->first && s->packet_idx % s->d == 0) {
-col_out_idx = s->packet_idx / s->l;
+col_out_idx = s->packet_idx / s->d;
 if ((ret = prompeg_write_fec(h, s->fec_col[col_out_idx], 
PROMPEG_FEC_COL)) < 0)
 goto end;
 written += ret;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec/ffv1enc: Allow less than 2 rows of slices for low vertical resolution

2017-06-27 Thread Michael Niedermayer
ffmpeg | branch: master | Michael Niedermayer  | Mon 
Jun 26 16:14:35 2017 +0200| [430d4f2bb52e6d1d7d375186cb5c710008a20603] | 
committer: Michael Niedermayer

avcodec/ffv1enc: Allow less than 2 rows of slices for low vertical resolution

Fixes: Ticket5548

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=430d4f2bb52e6d1d7d375186cb5c710008a20603
---

 libavcodec/ffv1enc.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index e59d540737..39b52d5a48 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -850,6 +850,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
 if (s->version > 1) {
 int plane_count = 1 + 2*s->chroma_planes + s->transparency;
 s->num_v_slices = (avctx->width > 352 || avctx->height > 288 || 
!avctx->slices) ? 2 : 1;
+
+if (avctx->height < 5)
+s->num_v_slices = 1;
+
 for (; s->num_v_slices < 32; s->num_v_slices++) {
 for (s->num_h_slices = s->num_v_slices; s->num_h_slices < 
2*s->num_v_slices; s->num_h_slices++) {
 int maxw = (avctx->width  + s->num_h_slices - 1) / 
s->num_h_slices;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec/ffv1enc: compute the max number of slices and limit by that

2017-06-27 Thread Michael Niedermayer
ffmpeg | branch: master | Michael Niedermayer  | Tue 
Jun 27 13:09:58 2017 +0200| [0f8d3d8a462c0152ac489dbb013f6df027edd6c4] | 
committer: Michael Niedermayer

avcodec/ffv1enc: compute the max number of slices and limit by that

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0f8d3d8a462c0152ac489dbb013f6df027edd6c4
---

 libavcodec/ffv1enc.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 39b52d5a48..c9a885ebfa 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -849,16 +849,17 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
 if (s->version > 1) {
 int plane_count = 1 + 2*s->chroma_planes + s->transparency;
+int max_h_slices = AV_CEIL_RSHIFT(avctx->width , s->chroma_h_shift);
+int max_v_slices = AV_CEIL_RSHIFT(avctx->height, s->chroma_v_shift);
 s->num_v_slices = (avctx->width > 352 || avctx->height > 288 || 
!avctx->slices) ? 2 : 1;
 
-if (avctx->height < 5)
-s->num_v_slices = 1;
+s->num_v_slices = FFMIN(s->num_v_slices, max_v_slices);
 
 for (; s->num_v_slices < 32; s->num_v_slices++) {
 for (s->num_h_slices = s->num_v_slices; s->num_h_slices < 
2*s->num_v_slices; s->num_h_slices++) {
 int maxw = (avctx->width  + s->num_h_slices - 1) / 
s->num_h_slices;
 int maxh = (avctx->height + s->num_v_slices - 1) / 
s->num_v_slices;
-if (s->num_h_slices > avctx->width || s->num_v_slices > 
avctx->height)
+if (s->num_h_slices > max_h_slices || s->num_v_slices > 
max_v_slices)
 continue;
 if (maxw * maxh * (int64_t)(s->bits_per_raw_sample+1) * 
plane_count > 8<<24)
 continue;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec/proresenc_kostya: add 4444XQ profile

2017-06-27 Thread Paul B Mahol
ffmpeg | branch: master | Paul B Mahol  | Mon Jun 26 16:30:43 
2017 +0200| [bbaca6e867999699c026f0de1267f7a5ae06684b] | committer: Paul B Mahol

avcodec/proresenc_kostya: add XQ profile

Signed-off-by: Paul B Mahol 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bbaca6e867999699c026f0de1267f7a5ae06684b
---

 doc/encoders.texi |  1 +
 libavcodec/proresenc_kostya.c | 18 +++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index a8bee092cf..9f03c0c608 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -2381,6 +2381,7 @@ Select the ProRes profile to encode
 @item standard
 @item hq
 @item 
+@item xq
 @end table
 
 @item quant_mat @var{integer}
diff --git a/libavcodec/proresenc_kostya.c b/libavcodec/proresenc_kostya.c
index 09bb6110d3..149dc81b3c 100644
--- a/libavcodec/proresenc_kostya.c
+++ b/libavcodec/proresenc_kostya.c
@@ -46,6 +46,7 @@ enum {
 PRORES_PROFILE_STANDARD,
 PRORES_PROFILE_HQ,
 PRORES_PROFILE_,
+PRORES_PROFILE_XQ,
 };
 
 enum {
@@ -124,7 +125,7 @@ static const struct prores_profile {
 int max_quant;
 int br_tab[NUM_MB_LIMITS];
 int quant;
-} prores_profile_info[5] = {
+} prores_profile_info[6] = {
 {
 .full_name = "proxy",
 .tag   = MKTAG('a', 'p', 'c', 'o'),
@@ -164,6 +165,14 @@ static const struct prores_profile {
 .max_quant = 6,
 .br_tab= { 2350, 1828, 1600, 1425 },
 .quant = QUANT_MAT_HQ,
+},
+{
+.full_name = "XQ",
+.tag   = MKTAG('a', 'p', '4', 'x'),
+.min_quant = 1,
+.max_quant = 6,
+.br_tab= { 3525, 2742, 2400, 2137 },
+.quant = QUANT_MAT_HQ,
 }
 };
 
@@ -1155,7 +1164,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
: "HQ profile to keep best quality");
 }
 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
-if (ctx->profile != PRORES_PROFILE_) {
+if (ctx->profile != PRORES_PROFILE_ &&
+ctx->profile != PRORES_PROFILE_XQ) {
 // force alpha and warn
 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
"encode alpha. Override with -profile if needed.\n");
@@ -1298,7 +1308,7 @@ static const AVOption options[] = {
 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
 { "profile",   NULL, OFFSET(profile), AV_OPT_TYPE_INT,
 { .i64 = PRORES_PROFILE_AUTO },
-PRORES_PROFILE_AUTO, PRORES_PROFILE_, VE, "profile" },
+PRORES_PROFILE_AUTO, PRORES_PROFILE_XQ, VE, "profile" },
 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO 
},
 0, 0, VE, "profile" },
 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
PRORES_PROFILE_PROXY },
@@ -1311,6 +1321,8 @@ static const AVOption options[] = {
 0, 0, VE, "profile" },
 { "",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
PRORES_PROFILE_ },
 0, 0, VE, "profile" },
+{ "xq",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
PRORES_PROFILE_XQ },
+0, 0, VE, "profile" },
 { "vendor", "vendor ID", OFFSET(vendor),
 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Interplay MVE: Implement MVE SEND_BUFFER operation

2017-06-27 Thread Hein-Pieter van Braam
ffmpeg | branch: master | Hein-Pieter van Braam  | Sun Jun 25 
21:59:00 2017 +0200| [ba2c385006e3100d6cd506f61c53186ba054a06d] | committer: 
Paul B Mahol

Interplay MVE: Implement MVE SEND_BUFFER operation

Interplay MVE movies have a SEND_BUFFER operation. Only after this
command does the current decoding buffer get displayed. This is required
for the other frame formats. They are fixed-size and can't always encode
a full frame worth of pixeldata.

This code prevents half-finished frames from being emitted.

Signed-off-by: Hein-Pieter van Braam 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ba2c385006e3100d6cd506f61c53186ba054a06d
---

 libavcodec/interplayvideo.c | 15 +--
 libavformat/ipmovie.c   | 16 +++-
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/libavcodec/interplayvideo.c b/libavcodec/interplayvideo.c
index df3314d4b7..7c699265dd 100644
--- a/libavcodec/interplayvideo.c
+++ b/libavcodec/interplayvideo.c
@@ -990,17 +990,20 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
 IpvideoContext *s = avctx->priv_data;
 AVFrame *frame = data;
 int ret;
+int send_buffer;
 
 if (av_packet_get_side_data(avpkt, AV_PKT_DATA_PARAM_CHANGE, NULL)) {
 av_frame_unref(s->last_frame);
 av_frame_unref(s->second_last_frame);
 }
 
-if (buf_size < 2)
+if (buf_size < 3)
 return AVERROR_INVALIDDATA;
 
+send_buffer = AV_RL8(avpkt->data);
+
 /* decoding map contains 4 bits of information per 8x8 block */
-s->decoding_map_size = AV_RL16(avpkt->data);
+s->decoding_map_size = AV_RL16(avpkt->data + 1);
 
 /* compressed buffer needs to be large enough to at least hold an entire
  * decoding map */
@@ -1008,9 +1011,9 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
 return buf_size;
 
 
-s->decoding_map = buf + 2;
-bytestream2_init(&s->stream_ptr, buf + 2 + s->decoding_map_size,
- buf_size - s->decoding_map_size);
+s->decoding_map = buf + 3;
+bytestream2_init(&s->stream_ptr, buf + 3 + s->decoding_map_size,
+ buf_size - s->decoding_map_size - 3);
 
 if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
 return ret;
@@ -1028,7 +1031,7 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
 
 ipvideo_decode_opcodes(s, frame);
 
-*got_frame = 1;
+*got_frame = send_buffer;
 
 /* shuffle frames */
 av_frame_unref(s->second_last_frame);
diff --git a/libavformat/ipmovie.c b/libavformat/ipmovie.c
index 29eeaf6b8b..0705d3381b 100644
--- a/libavformat/ipmovie.c
+++ b/libavformat/ipmovie.c
@@ -91,6 +91,7 @@ typedef struct IPMVEContext {
 uint32_t palette[256];
 int  has_palette;
 int  changed;
+uint8_t  send_buffer;
 
 unsigned int audio_bits;
 unsigned int audio_channels;
@@ -154,9 +155,9 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext 
*pb,
 
 } else if (s->decode_map_chunk_offset) {
 
-/* send both the decode map and the video data together */
+/* send the decode map, the video data, and the send_buffer flag 
together */
 
-if (av_new_packet(pkt, 2 + s->decode_map_chunk_size + 
s->video_chunk_size))
+if (av_new_packet(pkt, 3 + s->decode_map_chunk_size + 
s->video_chunk_size))
 return CHUNK_NOMEM;
 
 if (s->has_palette) {
@@ -178,8 +179,11 @@ static int load_ipmovie_packet(IPMVEContext *s, 
AVIOContext *pb,
 avio_seek(pb, s->decode_map_chunk_offset, SEEK_SET);
 s->decode_map_chunk_offset = 0;
 
-AV_WL16(pkt->data, s->decode_map_chunk_size);
-if (avio_read(pb, pkt->data + 2, s->decode_map_chunk_size) !=
+AV_WL8(pkt->data, s->send_buffer);
+s->send_buffer = 0;
+
+AV_WL16(pkt->data + 1, s->decode_map_chunk_size);
+if (avio_read(pb, pkt->data + 3, s->decode_map_chunk_size) !=
 s->decode_map_chunk_size) {
 av_packet_unref(pkt);
 return CHUNK_EOF;
@@ -188,7 +192,7 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext 
*pb,
 avio_seek(pb, s->video_chunk_offset, SEEK_SET);
 s->video_chunk_offset = 0;
 
-if (avio_read(pb, pkt->data + 2 + s->decode_map_chunk_size,
+if (avio_read(pb, pkt->data + 3 + s->decode_map_chunk_size,
 s->video_chunk_size) != s->video_chunk_size) {
 av_packet_unref(pkt);
 return CHUNK_EOF;
@@ -444,6 +448,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, 
AVIOContext *pb,
 case OPCODE_SEND_BUFFER:
 av_log(s->avf, AV_LOG_TRACE, "send buffer\n");
 avio_skip(pb, opcode_size);
+s->send_buffer = 1;
 break;
 
 case OPCODE_AUDIO_FRAME:
@@ -590,6 +595,7 @@ static int ipmovie_read_header(AVFormatContext *s)
 ipmovie->video_pts = ipmovie->audio_frame_count = 0;
 ipmovie->audio_chunk_offset = ipmovie->video

[FFmpeg-cvslog] Interplay MVE: Refactor IP packet format

2017-06-27 Thread Hein-Pieter van Braam
ffmpeg | branch: master | Hein-Pieter van Braam  | Sun Jun 25 
21:59:01 2017 +0200| [8f87bfb4b7ddeb27c318aa45144d7fc45930039b] | committer: 
Paul B Mahol

Interplay MVE: Refactor IP packet format

Interplay MVE can contain up to three different frame formats. They
require different streams of information to render a frame. This patch
changes the IP packet format to prepare for the extra frame formats.

Signed-off-by: Hein-Pieter van Braam 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8f87bfb4b7ddeb27c318aa45144d7fc45930039b
---

 libavcodec/interplayvideo.c | 33 +---
 libavformat/ipmovie.c   | 46 -
 2 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/libavcodec/interplayvideo.c b/libavcodec/interplayvideo.c
index 7c699265dd..5dfb0d6bff 100644
--- a/libavcodec/interplayvideo.c
+++ b/libavcodec/interplayvideo.c
@@ -991,29 +991,40 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
 AVFrame *frame = data;
 int ret;
 int send_buffer;
+int frame_format;
+int video_data_size;
 
 if (av_packet_get_side_data(avpkt, AV_PKT_DATA_PARAM_CHANGE, NULL)) {
 av_frame_unref(s->last_frame);
 av_frame_unref(s->second_last_frame);
 }
 
-if (buf_size < 3)
+if (buf_size < 6)
 return AVERROR_INVALIDDATA;
 
-send_buffer = AV_RL8(avpkt->data);
+frame_format = AV_RL8(buf);
+send_buffer  = AV_RL8(buf + 1);
+video_data_size  = AV_RL16(buf + 2);
+s->decoding_map_size = AV_RL16(buf + 4);
 
-/* decoding map contains 4 bits of information per 8x8 block */
-s->decoding_map_size = AV_RL16(avpkt->data + 1);
+if (frame_format != 0x11)
+av_log(avctx, AV_LOG_ERROR, "Frame type 0x%02X unsupported\n", 
frame_format);
 
-/* compressed buffer needs to be large enough to at least hold an entire
- * decoding map */
-if (buf_size < s->decoding_map_size + 2)
-return buf_size;
+if (! s->decoding_map_size) {
+av_log(avctx, AV_LOG_ERROR, "Empty decoding map\n");
+return AVERROR_INVALIDDATA;
+}
 
+bytestream2_init(&s->stream_ptr, buf + 6, video_data_size);
 
-s->decoding_map = buf + 3;
-bytestream2_init(&s->stream_ptr, buf + 3 + s->decoding_map_size,
- buf_size - s->decoding_map_size - 3);
+/* decoding map contains 4 bits of information per 8x8 block */
+s->decoding_map = buf + 6 + video_data_size;
+
+/* ensure we can't overread the packet */
+if (buf_size < 6 + s->decoding_map_size + video_data_size) {
+av_log(avctx, AV_LOG_ERROR, "Invalid IP packet size\n");
+return AVERROR_INVALIDDATA;
+}
 
 if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
 return ret;
diff --git a/libavformat/ipmovie.c b/libavformat/ipmovie.c
index 0705d3381b..a9ffca4872 100644
--- a/libavformat/ipmovie.c
+++ b/libavformat/ipmovie.c
@@ -69,7 +69,7 @@
 #define OPCODE_UNKNOWN_0E  0x0E
 #define OPCODE_SET_DECODING_MAP0x0F
 #define OPCODE_UNKNOWN_10  0x10
-#define OPCODE_VIDEO_DATA  0x11
+#define OPCODE_VIDEO_DATA_11   0x11
 #define OPCODE_UNKNOWN_12  0x12
 #define OPCODE_UNKNOWN_13  0x13
 #define OPCODE_UNKNOWN_14  0x14
@@ -92,6 +92,7 @@ typedef struct IPMVEContext {
 int  has_palette;
 int  changed;
 uint8_t  send_buffer;
+uint8_t  frame_format;
 
 unsigned int audio_bits;
 unsigned int audio_channels;
@@ -153,11 +154,11 @@ static int load_ipmovie_packet(IPMVEContext *s, 
AVIOContext *pb,
 
 chunk_type = CHUNK_VIDEO;
 
-} else if (s->decode_map_chunk_offset) {
+} else if (s->frame_format) {
 
-/* send the decode map, the video data, and the send_buffer flag 
together */
+/* send the frame format, decode map, the video data, and the 
send_buffer flag together */
 
-if (av_new_packet(pkt, 3 + s->decode_map_chunk_size + 
s->video_chunk_size))
+if (av_new_packet(pkt, 6 + s->decode_map_chunk_size + 
s->video_chunk_size))
 return CHUNK_NOMEM;
 
 if (s->has_palette) {
@@ -175,29 +176,38 @@ static int load_ipmovie_packet(IPMVEContext *s, 
AVIOContext *pb,
 ff_add_param_change(pkt, 0, 0, 0, s->video_width, s->video_height);
 s->changed = 0;
 }
-pkt->pos= s->decode_map_chunk_offset;
-avio_seek(pb, s->decode_map_chunk_offset, SEEK_SET);
-s->decode_map_chunk_offset = 0;
 
-AV_WL8(pkt->data, s->send_buffer);
+AV_WL8(pkt->data, s->frame_format);
+AV_WL8(pkt->data + 1, s->send_buffer);
+AV_WL16(pkt->data + 2, s->video_chunk_size);
+AV_WL16(pkt->data + 4, s->decode_map_chunk_size);
+
+s->frame_format = 0;
 s->send_buffer = 0;
 
-AV_WL16(pkt->data + 1, s->decode_map_chunk_size);
-if (avio_read(pb, pkt->data + 

[FFmpeg-cvslog] Interplay MVE: Implement frame format 0x06

2017-06-27 Thread Hein-Pieter van Braam
ffmpeg | branch: master | Hein-Pieter van Braam  | Sun Jun 25 
21:59:02 2017 +0200| [19f6fd199e46c5a56f09a768ece4246b48bd86dd] | committer: 
Paul B Mahol

Interplay MVE: Implement frame format 0x06

This implements the 0x06 frame format for Interplay MVE movies. The
format is relatively simple. The video data consists of two parts:

16 bits per 8x8 block movement data
a number of 8x8 blocks of pixel data

For each 8x8 block of pixel data the movement data is consulted. There
are 3 possible meanings of the movement data:
* zero : copy the 8x8 block from the pixel data
* negative : copy the 8x8 block from the previous frame from an offset
 determined by the actual value of the entry -0xC000.
* positive : copy the 8x8 block from the current frame from an offset
 determined by the actual value of the entry -0x4000

Decoding happens in two passes, in the fist pass only new pixeldata is
copied, during the second pass data is copied from the previous and
current frames.

The codec expects that the current frame being decoded to still has the
data from 2 frames ago on it when decoding starts.

Signed-off-by: Hein-Pieter van Braam 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=19f6fd199e46c5a56f09a768ece4246b48bd86dd
---

 libavcodec/interplayvideo.c | 125 
 libavformat/ipmovie.c   |  15 --
 2 files changed, 126 insertions(+), 14 deletions(-)

diff --git a/libavcodec/interplayvideo.c b/libavcodec/interplayvideo.c
index 5dfb0d6bff..431eeb12c7 100644
--- a/libavcodec/interplayvideo.c
+++ b/libavcodec/interplayvideo.c
@@ -903,7 +903,81 @@ static int (* const 
ipvideo_decode_block16[])(IpvideoContext *s, AVFrame *frame)
 ipvideo_decode_block_opcode_0xE_16, ipvideo_decode_block_opcode_0x1,
 };
 
-static void ipvideo_decode_opcodes(IpvideoContext *s, AVFrame *frame)
+static void ipvideo_format_06_firstpass(IpvideoContext *s, AVFrame *frame, 
short opcode)
+{
+int line;
+
+if (!opcode) {
+for (line = 0; line < 8; ++line) {
+bytestream2_get_buffer(&s->stream_ptr, s->pixel_ptr, 8);
+s->pixel_ptr += s->stride;
+}
+} else {
+/* Don't try to copy second_last_frame data on the first frames */
+if (s->avctx->frame_number > 2)
+copy_from(s, s->second_last_frame, frame, 0, 0);
+}
+}
+
+static void ipvideo_format_06_secondpass(IpvideoContext *s, AVFrame *frame, 
short opcode)
+{
+int off_x, off_y;
+
+if (opcode < 0) {
+off_x = ((unsigned short)opcode - 0xC000) % frame->linesize[0];
+off_y = ((unsigned short)opcode - 0xC000) / frame->linesize[0];
+copy_from(s, s->last_frame, frame, off_x, off_y);
+} else if (opcode > 0) {
+off_x = ((unsigned short)opcode - 0x4000) % frame->linesize[0];
+off_y = ((unsigned short)opcode - 0x4000) / frame->linesize[0];
+copy_from(s, frame, frame, off_x, off_y);
+}
+}
+
+static void (* const ipvideo_format_06_passes[])(IpvideoContext *s, AVFrame 
*frame, short op) = {
+ipvideo_format_06_firstpass, ipvideo_format_06_secondpass,
+};
+
+static void ipvideo_decode_format_06_opcodes(IpvideoContext *s, AVFrame *frame)
+{
+int pass, x, y;
+short opcode;
+GetByteContext decoding_map_ptr;
+
+/* this is PAL8, so make the palette available */
+memcpy(frame->data[1], s->pal, AVPALETTE_SIZE);
+s->stride = frame->linesize[0];
+
+s->line_inc = s->stride - 8;
+s->upper_motion_limit_offset = (s->avctx->height - 8) * frame->linesize[0]
+  + (s->avctx->width - 8) * (1 + s->is_16bpp);
+
+bytestream2_init(&decoding_map_ptr, s->decoding_map, s->decoding_map_size);
+
+for (pass = 0; pass < 2; ++pass) {
+bytestream2_seek(&decoding_map_ptr, 0, SEEK_SET);
+for (y = 0; y < s->avctx->height; y += 8) {
+for (x = 0; x < s->avctx->width; x += 8) {
+opcode = bytestream2_get_le16(&decoding_map_ptr);
+
+ff_tlog(s->avctx,
+"  block @ (%3d, %3d): opcode 0x%X, data ptr offset 
%d\n",
+x, y, opcode, bytestream2_tell(&s->stream_ptr));
+
+s->pixel_ptr = frame->data[0] + x + y * frame->linesize[0];
+ipvideo_format_06_passes[pass](s, frame, opcode);
+}
+}
+}
+
+if (bytestream2_get_bytes_left(&s->stream_ptr) > 1) {
+av_log(s->avctx, AV_LOG_DEBUG,
+   "decode finished with %d bytes left over\n",
+   bytestream2_get_bytes_left(&s->stream_ptr));
+}
+}
+
+static void ipvideo_decode_format_11_opcodes(IpvideoContext *s, AVFrame *frame)
 {
 int x, y;
 unsigned char opcode;
@@ -1007,18 +1081,40 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
 video_data_size  = AV_RL16(buf + 2);
 s->decoding_map_size = AV_RL16(buf + 4);
 
-if (frame_format != 0x11)
-av_log(avctx, AV_LOG_ERROR, "Frame type 0x%02X

[FFmpeg-cvslog] Interplay MVE: Implement frame format 0x10

2017-06-27 Thread Hein-Pieter van Braam
ffmpeg | branch: master | Hein-Pieter van Braam  | Sun Jun 25 
21:59:03 2017 +0200| [8f96da060a26f6cc2adb950269dae37feba2664e] | committer: 
Paul B Mahol

Interplay MVE: Implement frame format 0x10

This implements the 0x10 frame format for Interplay MVE movies. The
format is a variation on the 0x06 format with some changes. In addition
to the decoding map there's also a skip map. This skip map is used to
determine what 8x8 blocks can change in a particular frame.

This format expects to be able to copy an 8x8 block from before the last
time it was changed. This can be an arbitrary time in the past. In order
to implement this this decoder allocates two additional AVFrames where
actual decoding happens. At the end of a frame decoding changed blocks
are copied to a finished frame based on the skip map.

The skip map's encoding is a little convulted, I'll refer to the code
for details.

Values in the decoding map are the same as in format 0x06.

Signed-off-by: Hein-Pieter van Braam 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8f96da060a26f6cc2adb950269dae37feba2664e
---

 libavcodec/interplayvideo.c | 182 ++--
 libavformat/ipmovie.c   |  68 +
 2 files changed, 227 insertions(+), 23 deletions(-)

diff --git a/libavcodec/interplayvideo.c b/libavcodec/interplayvideo.c
index 431eeb12c7..421de26cb1 100644
--- a/libavcodec/interplayvideo.c
+++ b/libavcodec/interplayvideo.c
@@ -55,8 +55,15 @@ typedef struct IpvideoContext {
 HpelDSPContext hdsp;
 AVFrame *second_last_frame;
 AVFrame *last_frame;
+
+/* For format 0x10 */
+AVFrame *cur_decode_frame;
+AVFrame *prev_decode_frame;
+
 const unsigned char *decoding_map;
 int decoding_map_size;
+const unsigned char *skip_map;
+int skip_map_size;
 
 int is_16bpp;
 GetByteContext stream_ptr, mv_ptr;
@@ -977,6 +984,114 @@ static void 
ipvideo_decode_format_06_opcodes(IpvideoContext *s, AVFrame *frame)
 }
 }
 
+static void ipvideo_format_10_firstpass(IpvideoContext *s, AVFrame *frame, 
short opcode)
+{
+int line;
+
+if (!opcode) {
+for (line = 0; line < 8; ++line) {
+bytestream2_get_buffer(&s->stream_ptr, s->pixel_ptr, 8);
+s->pixel_ptr += s->stride;
+}
+}
+}
+
+static void ipvideo_format_10_secondpass(IpvideoContext *s, AVFrame *frame, 
short opcode)
+{
+int off_x, off_y;
+
+if (opcode < 0) {
+off_x = ((unsigned short)opcode - 0xC000) % 
s->cur_decode_frame->linesize[0];
+off_y = ((unsigned short)opcode - 0xC000) / 
s->cur_decode_frame->linesize[0];
+copy_from(s, s->prev_decode_frame, s->cur_decode_frame, off_x, off_y);
+} else if (opcode > 0) {
+off_x = ((unsigned short)opcode - 0x4000) % 
s->cur_decode_frame->linesize[0];
+off_y = ((unsigned short)opcode - 0x4000) / 
s->cur_decode_frame->linesize[0];
+copy_from(s, s->cur_decode_frame, s->cur_decode_frame, off_x, off_y);
+}
+}
+
+static void (* const ipvideo_format_10_passes[])(IpvideoContext *s, AVFrame 
*frame, short op) = {
+ipvideo_format_10_firstpass, ipvideo_format_10_secondpass,
+};
+
+static void ipvideo_decode_format_10_opcodes(IpvideoContext *s, AVFrame *frame)
+{
+int pass, x, y, changed_block;
+short opcode, skip;
+GetByteContext decoding_map_ptr;
+GetByteContext skip_map_ptr;
+
+bytestream2_skip(&s->stream_ptr, 14); /* data starts 14 bytes in */
+
+/* this is PAL8, so make the palette available */
+memcpy(frame->data[1], s->pal, AVPALETTE_SIZE);
+s->stride = frame->linesize[0];
+
+s->line_inc = s->stride - 8;
+s->upper_motion_limit_offset = (s->avctx->height - 8) * frame->linesize[0]
+  + (s->avctx->width - 8) * (1 + s->is_16bpp);
+
+bytestream2_init(&decoding_map_ptr, s->decoding_map, s->decoding_map_size);
+bytestream2_init(&skip_map_ptr, s->skip_map, s->skip_map_size);
+
+for (pass = 0; pass < 2; ++pass) {
+bytestream2_seek(&decoding_map_ptr, 0, SEEK_SET);
+bytestream2_seek(&skip_map_ptr, 0, SEEK_SET);
+skip = bytestream2_get_le16(&skip_map_ptr);
+
+for (y = 0; y < s->avctx->height; y += 8) {
+for (x = 0; x < s->avctx->width; x += 8) {
+s->pixel_ptr = s->cur_decode_frame->data[0] + x + y * 
s->cur_decode_frame->linesize[0];
+
+while (skip <= 0)  {
+if (skip != -0x8000 && skip) {
+opcode = bytestream2_get_le16(&decoding_map_ptr);
+ipvideo_format_10_passes[pass](s, frame, opcode);
+break;
+}
+skip = bytestream2_get_le16(&skip_map_ptr);
+}
+skip *= 2;
+}
+}
+}
+
+bytestream2_seek(&skip_map_ptr, 0, SEEK_SET);
+skip = bytestream2_get_le16(&skip_map_ptr);
+for (y = 0; y < s->avctx->height; y += 8) {
+for (x = 0

[FFmpeg-cvslog] avcodec/interplayvideo: properly check if there is enough bytes left

2017-06-27 Thread Paul B Mahol
ffmpeg | branch: master | Paul B Mahol  | Tue Jun 27 15:46:08 
2017 +0200| [feab761b73c37311a23a6cbbcee1ddf56439d5a4] | committer: Paul B Mahol

avcodec/interplayvideo: properly check if there is enough bytes left

Signed-off-by: Paul B Mahol 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=feab761b73c37311a23a6cbbcee1ddf56439d5a4
---

 libavcodec/interplayvideo.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/libavcodec/interplayvideo.c b/libavcodec/interplayvideo.c
index 421de26cb1..2ac2f991a6 100644
--- a/libavcodec/interplayvideo.c
+++ b/libavcodec/interplayvideo.c
@@ -1233,6 +1233,10 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
 s->decoding_map_size = ((s->avctx->width / 8) * (s->avctx->height 
/ 8)) * 2;
 s->decoding_map = buf + 8 + 14; /* 14 bits of op data */
 video_data_size -= s->decoding_map_size + 14;
+
+if (buf_size < 8 + s->decoding_map_size + 14 + video_data_size)
+return AVERROR_INVALIDDATA;
+
 bytestream2_init(&s->stream_ptr, buf + 8 + s->decoding_map_size + 
14, video_data_size);
 
 break;
@@ -1253,6 +1257,9 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
 return AVERROR_INVALIDDATA;
 }
 
+if (buf_size < 8 + video_data_size + s->decoding_map_size + 
s->skip_map_size)
+return AVERROR_INVALIDDATA;
+
 bytestream2_init(&s->stream_ptr, buf + 8, video_data_size);
 s->decoding_map = buf + 8 + video_data_size;
 s->skip_map = buf + 8 + video_data_size + s->decoding_map_size;
@@ -1270,6 +1277,9 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
 return AVERROR_INVALIDDATA;
 }
 
+if (buf_size < 8 + video_data_size + s->decoding_map_size)
+return AVERROR_INVALIDDATA;
+
 bytestream2_init(&s->stream_ptr, buf + 8, video_data_size);
 s->decoding_map = buf + 8 + video_data_size;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Interplay MVE: Changelog entry for changes

2017-06-27 Thread Hein-Pieter van Braam
ffmpeg | branch: master | Hein-Pieter van Braam  | Sun Jun 25 
21:59:04 2017 +0200| [c4cbaec6e3af24eb9f32c0864838b56339b7997f] | committer: 
Paul B Mahol

Interplay MVE: Changelog entry for changes

Signed-off-by: Hein-Pieter van Braam 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c4cbaec6e3af24eb9f32c0864838b56339b7997f
---

 Changelog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Changelog b/Changelog
index 4f46edaddb..24d2255183 100644
--- a/Changelog
+++ b/Changelog
@@ -24,6 +24,7 @@ version :
 - roberts video filter
 - The x86 assembler default switched from yasm to nasm, pass
   --x86asmexe=yasm to configure to restore the old behavior.
+- additional frame format support for Interplay MVE movies
 
 version 3.3:
 - CrystalHD decoder moved to new decode API

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] x86inc: don't use read-only data sections on COFF targets

2017-06-27 Thread James Almer
ffmpeg | branch: master | James Almer  | Mon Jun 26 00:29:16 
2017 -0300| [4d62ee674699645c7b7105213e7d339665144069] | committer: James Almer

x86inc: don't use read-only data sections on COFF targets

Yasm:
src/libavfilter/x86/af_volume.asm:24: warning: Standard COFF does not support 
read-only data sections
src/libavfilter/x86/af_volume.asm:24: warning: Unrecognized qualifier `align'

Nasm:
src/libavfilter/x86/af_volume.asm:24: error: standard COFF does not support 
section alignment specification
src/libavutil/x86/x86inc.asm:92: ... from macro `SECTION_RODATA' defined here

Tested-by: Clément Bœsch 
Signed-off-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4d62ee674699645c7b7105213e7d339665144069
---

 libavutil/x86/x86inc.asm | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index fa826e6d85..c4ec29bd9d 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -88,6 +88,8 @@
 %macro SECTION_RODATA 0-1 16
 %ifidn __OUTPUT_FORMAT__,aout
 section .text
+%elifidn __OUTPUT_FORMAT__,coff
+section .text
 %else
 SECTION .rodata align=%1
 %endif

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] lavu: add new D3D11 pixfmt and hwcontext

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Tue Jun  6 18:51:07 
2017 +0200| [3303511f33dcc1b708fc18072cd93bca62196676] | committer: wm4

lavu: add new D3D11 pixfmt and hwcontext

To be used with the new d3d11 hwaccel decode API.

With the new hwaccel API, we don't want surfaces to depend on the
decoder (other than the required dimension and format). The old D3D11VA
pixfmt uses ID3D11VideoDecoderOutputView pointers, which include the
decoder configuration, and thus is incompatible with the new hwaccel
API. This patch introduces AV_PIX_FMT_D3D11, which uses ID3D11Texture2D
and an index. It's simpler and compatible with the new hwaccel API.

The introduced hwcontext supports only the new pixfmt.

Frame upload code untested.

Significantly based on work by Steve Lhomme , but with
heavy changes/rewrites.

Merges Libav commit fff90422d181744cd75dbf011687ee7095f02875.

Signed-off-by: Diego Biurrun 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3303511f33dcc1b708fc18072cd93bca62196676
---

 doc/APIchanges |   3 +
 libavutil/Makefile |   3 +
 libavutil/hwcontext.c  |   4 +
 libavutil/hwcontext.h  |   1 +
 libavutil/hwcontext_d3d11va.c  | 490 +
 libavutil/hwcontext_d3d11va.h  | 160 ++
 libavutil/hwcontext_internal.h |   1 +
 libavutil/pixdesc.c|   4 +
 libavutil/pixfmt.h |  14 +-
 libavutil/version.h|   2 +-
 10 files changed, 680 insertions(+), 2 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index fc013fd513..857d44402d 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@ libavutil: 2015-08-28
 
 API changes, most recent first:
 
+2017-xx-xx - xxx - lavu 56.67.100 - hwcontext.h
+  Add AV_HWDEVICE_TYPE_D3D11VA and AV_PIX_FMT_D3D11.
+
 2017-06-24 - xxx - lavf 57.75.100 - avio.h
   Add AVIO_DATA_MARKER_FLUSH_POINT to signal preferred flush points to aviobuf.
 
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 4fe81fdd07..b4464b0d76 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -33,6 +33,7 @@ HEADERS = adler32.h   
  \
   hmac.h\
   hwcontext.h   \
   hwcontext_cuda.h  \
+  hwcontext_d3d11va.h   \
   hwcontext_dxva2.h \
   hwcontext_qsv.h   \
   hwcontext_vaapi.h \
@@ -156,6 +157,7 @@ OBJS = adler32.o
\
 OBJS-$(!HAVE_ATOMICS_NATIVE)+= atomic.o \
 
 OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o
+OBJS-$(CONFIG_D3D11VA)  += hwcontext_d3d11va.o
 OBJS-$(CONFIG_DXVA2)+= hwcontext_dxva2.o
 OBJS-$(CONFIG_QSV)   += hwcontext_qsv.o
 OBJS-$(CONFIG_LZO)  += lzo.o
@@ -171,6 +173,7 @@ SLIBOBJS-$(HAVE_GNU_WINDRES)+= avutilres.o
 
 SKIPHEADERS-$(HAVE_CUDA_H) += hwcontext_cuda.h
 SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda_internal.h
+SKIPHEADERS-$(CONFIG_D3D11VA)  += hwcontext_d3d11va.h
 SKIPHEADERS-$(CONFIG_DXVA2)+= hwcontext_dxva2.h
 SKIPHEADERS-$(CONFIG_QSV)   += hwcontext_qsv.h
 SKIPHEADERS-$(CONFIG_VAAPI)+= hwcontext_vaapi.h
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 5a22194716..2a755a6878 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -32,6 +32,9 @@ static const HWContextType *const hw_table[] = {
 #if CONFIG_CUDA
 &ff_hwcontext_type_cuda,
 #endif
+#if CONFIG_D3D11VA
+&ff_hwcontext_type_d3d11va,
+#endif
 #if CONFIG_DXVA2
 &ff_hwcontext_type_dxva2,
 #endif
@@ -53,6 +56,7 @@ static const HWContextType *const hw_table[] = {
 static const char *const hw_type_names[] = {
 [AV_HWDEVICE_TYPE_CUDA]   = "cuda",
 [AV_HWDEVICE_TYPE_DXVA2]  = "dxva2",
+[AV_HWDEVICE_TYPE_D3D11VA] = "d3d11va",
 [AV_HWDEVICE_TYPE_QSV]= "qsv",
 [AV_HWDEVICE_TYPE_VAAPI]  = "vaapi",
 [AV_HWDEVICE_TYPE_VDPAU]  = "vdpau",
diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index edf12cc631..afb0d80d59 100644
--- a/libavutil/hwcontext.h
+++ b/libavutil/hwcontext.h
@@ -32,6 +32,7 @@ enum AVHWDeviceType {
 AV_HWDEVICE_TYPE_QSV,
 AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
 AV_HWDEVICE_TYPE_NONE,
+AV_HWDEVICE_TYPE_D3D11VA,
 };
 
 typedef struct AVHWDeviceInternal AVHWDeviceInternal;
diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c
new file mode 100644
index 00..3940502868
--- /dev/null
+++ b/libavutil/hwcontext_d3d11va.c
@@ -0,0 +1,490 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav i

[FFmpeg-cvslog] lavc: set avctx->hwaccel before init

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Tue Jun  6 18:51:08 
2017 +0200| [865360ba633b09f1292d52ba6d07f16b6bcef6df] | committer: wm4

lavc: set avctx->hwaccel before init

So a hwaccel can access avctx->hwaccel in init for whatever reason. This
is for the new d3d hwaccel API. We could create separate entrypoints for
each of the 3 hwaccel types (dxva2, d3d11va, new d3d11va), but this
seems nicer.

Merges Libav commit bd747b9226414007f0207fa201976af7217e3b77.

Signed-off-by: Diego Biurrun 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=865360ba633b09f1292d52ba6d07f16b6bcef6df
---

 libavcodec/decode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index f7dea930ed..dccce01402 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -1248,16 +1248,16 @@ static int setup_hwaccel(AVCodecContext *avctx,
 return AVERROR(ENOMEM);
 }
 
+avctx->hwaccel = hwa;
 if (hwa->init) {
 ret = hwa->init(avctx);
 if (ret < 0) {
 av_freep(&avctx->internal->hwaccel_priv_data);
+avctx->hwaccel = NULL;
 return ret;
 }
 }
 
-avctx->hwaccel = hwa;
-
 return 0;
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] dxva: preparations for new hwaccel API

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Tue Jun  6 18:51:09 
2017 +0200| [ab28108a361196134704071b7b34c42fc7d747c7] | committer: wm4

dxva: preparations for new hwaccel API

The actual hwaccel code will need to access an internal context instead
of avctx->hwaccel_context, so add a new DXVA_CONTEXT() macro, that will
dispatch between the "old" external and the new internal context.

Also, the new API requires a new D3D11 pixfmt, so all places which check
for the pixfmt need to be adjusted. Introduce a ff_dxva2_is_d3d11()
function, which does the check.

Merges Libav commit 4dec101acc393fbfe9a8ce0237b9efbae3f20139.
Adds changes to vp9 over the Libav patch.

Signed-off-by: Diego Biurrun 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ab28108a361196134704071b7b34c42fc7d747c7
---

 libavcodec/dxva2.c  | 32 
 libavcodec/dxva2_h264.c | 14 +++---
 libavcodec/dxva2_hevc.c | 10 +-
 libavcodec/dxva2_internal.h | 22 +-
 libavcodec/dxva2_mpeg2.c| 10 +-
 libavcodec/dxva2_vc1.c  | 10 +-
 libavcodec/dxva2_vp9.c  | 10 +-
 7 files changed, 60 insertions(+), 48 deletions(-)

diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index 0036b5bec1..6b214c1877 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -71,7 +71,7 @@ int ff_dxva2_commit_buffer(AVCodecContext *avctx,
 HRESULT hr = 0;
 
 #if CONFIG_D3D11VA
-if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+if (ff_dxva2_is_d3d11(avctx))
 hr = 
ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
  D3D11VA_CONTEXT(ctx)->decoder,
  type,
@@ -91,7 +91,7 @@ int ff_dxva2_commit_buffer(AVCodecContext *avctx,
 memcpy(dxva_data, data, size);
 
 #if CONFIG_D3D11VA
-if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+if (ff_dxva2_is_d3d11(avctx)) {
 D3D11_VIDEO_DECODER_BUFFER_DESC *dsc11 = dsc;
 memset(dsc11, 0, sizeof(*dsc11));
 dsc11->BufferType   = type;
@@ -116,7 +116,7 @@ int ff_dxva2_commit_buffer(AVCodecContext *avctx,
 }
 
 #if CONFIG_D3D11VA
-if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+if (ff_dxva2_is_d3d11(avctx))
 hr = 
ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context, 
D3D11VA_CONTEXT(ctx)->decoder, type);
 #endif
 #if CONFIG_DXVA2
@@ -139,7 +139,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
   DECODER_BUFFER_DESC *bs,
   DECODER_BUFFER_DESC *slice))
 {
-AVDXVAContext *ctx = avctx->hwaccel_context;
+AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
 unsigned   buffer_count = 0;
 #if CONFIG_D3D11VA
 D3D11_VIDEO_DECODER_BUFFER_DESC buffer11[4];
@@ -154,7 +154,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 
 do {
 #if CONFIG_D3D11VA
-if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+if (ff_dxva2_is_d3d11(avctx)) {
 if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
 WaitForSingleObjectEx(D3D11VA_CONTEXT(ctx)->context_mutex, 
INFINITE, FALSE);
 hr = 
ID3D11VideoContext_DecoderBeginFrame(D3D11VA_CONTEXT(ctx)->video_context, 
D3D11VA_CONTEXT(ctx)->decoder,
@@ -171,7 +171,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 if (hr != E_PENDING || ++runs > 50)
 break;
 #if CONFIG_D3D11VA
-if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+if (ff_dxva2_is_d3d11(avctx))
 if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
 ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
 #endif
@@ -181,7 +181,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 if (FAILED(hr)) {
 av_log(avctx, AV_LOG_ERROR, "Failed to begin frame: 0x%x\n", hr);
 #if CONFIG_D3D11VA
-if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+if (ff_dxva2_is_d3d11(avctx))
 if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
 ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
 #endif
@@ -189,7 +189,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 }
 
 #if CONFIG_D3D11VA
-if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+if (ff_dxva2_is_d3d11(avctx)) {
 buffer = &buffer11[buffer_count];
 type = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS;
 }
@@ -212,7 +212,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 
 if (qm_size > 0) {
 #if CONFIG_D3D11VA
-if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+if (ff_dxva2_is_d3d11(avctx)) {
 buffer = &buffer11[buffer_count];
 type = D3D11_VIDEO_DECODER_BUFFER_INVE

[FFmpeg-cvslog] d3d11va: Link directly to dxgi.dll and d3d11.dll functions if LoadLibrary is unavailable

2017-06-27 Thread Martin Storsjö
ffmpeg | branch: master | Martin Storsjö  | Fri Jun  9 
12:05:00 2017 +0300| [3125a4a8a8fc7dde938bff6a668f53b26bf47860] | committer: wm4

d3d11va: Link directly to dxgi.dll and d3d11.dll functions if LoadLibrary is 
unavailable

When targeting the UWP API subset, the LoadLibrary function is not
available (and the fallback, LoadPackagedLibrary, can't be used to
load system DLLs). In these cases, link directly to the functions
in the DLLs instead of trying to load them dynamically at runtime.

Merges Libav commit fd1ffa1f10e940165035ccb79d4a6523da196062.

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3125a4a8a8fc7dde938bff6a668f53b26bf47860
---

 configure |  4 +++
 libavutil/hwcontext_d3d11va.c | 80 +--
 2 files changed, 51 insertions(+), 33 deletions(-)

diff --git a/configure b/configure
index aebd5d5da5..bf48472219 100755
--- a/configure
+++ b/configure
@@ -6120,6 +6120,10 @@ fi
 
 check_func_headers "windows.h" CreateDIBSection "$gdigrab_indev_extralibs"
 
+# d3d11va requires linking directly to dxgi and d3d11 if not building for
+# the desktop api partition
+enabled LoadLibrary || d3d11va_extralibs="-ldxgi -ld3d11"
+
 enabled vaapi &&
 check_lib vaapi va/va.h vaInitialize -lva
 
diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c
index 3940502868..5432dd8b51 100644
--- a/libavutil/hwcontext_d3d11va.c
+++ b/libavutil/hwcontext_d3d11va.c
@@ -18,6 +18,10 @@
 
 #include 
 
+// Include thread.h before redefining _WIN32_WINNT, to get
+// the right implementation for AVOnce
+#include "thread.h"
+
 #if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0600
 #undef _WIN32_WINNT
 #define _WIN32_WINNT 0x0600
@@ -39,6 +43,34 @@
 
 typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void 
**ppFactory);
 
+static AVOnce functions_loaded = AV_ONCE_INIT;
+
+static PFN_CREATE_DXGI_FACTORY mCreateDXGIFactory;
+static PFN_D3D11_CREATE_DEVICE mD3D11CreateDevice;
+
+static av_cold void load_functions(void)
+{
+#if HAVE_LOADLIBRARY
+// We let these "leak" - this is fine, as unloading has no great benefit, 
and
+// Windows will mark a DLL as loaded forever if its internal refcount 
overflows
+// from too many LoadLibrary calls.
+HANDLE d3dlib, dxgilib;
+
+d3dlib  = LoadLibrary("d3d11.dll");
+dxgilib = LoadLibrary("dxgi.dll");
+if (!d3dlib || !dxgilib)
+return;
+
+mD3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE) GetProcAddress(d3dlib, 
"D3D11CreateDevice");
+mCreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY) GetProcAddress(dxgilib, 
"CreateDXGIFactory");
+#else
+// In UWP (which lacks LoadLibrary), CreateDXGIFactory isn't available,
+// only CreateDXGIFactory1
+mD3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE) D3D11CreateDevice;
+mCreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY) CreateDXGIFactory1;
+#endif
+}
+
 typedef struct D3D11VAFramesContext {
 int nb_surfaces_used;
 
@@ -407,50 +439,32 @@ static int d3d11va_device_create(AVHWDeviceContext *ctx, 
const char *device,
  AVDictionary *opts, int flags)
 {
 AVD3D11VADeviceContext *device_hwctx = ctx->hwctx;
-HANDLE d3dlib;
 
 HRESULT hr;
-PFN_D3D11_CREATE_DEVICE createD3D;
 IDXGIAdapter   *pAdapter = NULL;
 ID3D10Multithread  *pMultithread;
 UINT creationFlags = D3D11_CREATE_DEVICE_VIDEO_SUPPORT;
+int ret;
 
-if (device) {
-PFN_CREATE_DXGI_FACTORY mCreateDXGIFactory;
-HMODULE dxgilib = LoadLibrary("dxgi.dll");
-if (!dxgilib)
-return AVERROR_UNKNOWN;
-
-mCreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY) GetProcAddress(dxgilib, 
"CreateDXGIFactory");
-if (mCreateDXGIFactory) {
-IDXGIFactory2 *pDXGIFactory;
-hr = mCreateDXGIFactory(&IID_IDXGIFactory2, (void 
**)&pDXGIFactory);
-if (SUCCEEDED(hr)) {
-int adapter = atoi(device);
-if (FAILED(IDXGIFactory2_EnumAdapters(pDXGIFactory, adapter, 
&pAdapter)))
-pAdapter = NULL;
-IDXGIFactory2_Release(pDXGIFactory);
-}
-}
-FreeLibrary(dxgilib);
-}
-
-// We let this "leak" - this is fine, as unloading has no great benefit, 
and
-// Windows will mark a DLL as loaded forever if its internal refcount 
overflows
-// from too many LoadLibrary calls.
-d3dlib = LoadLibrary("d3d11.dll");
-if (!d3dlib) {
-av_log(ctx, AV_LOG_ERROR, "Failed to load D3D11 library\n");
+if ((ret = ff_thread_once(&functions_loaded, load_functions)) != 0)
+return AVERROR_UNKNOWN;
+if (!mD3D11CreateDevice || !mCreateDXGIFactory) {
+av_log(ctx, AV_LOG_ERROR, "Failed to load D3D11 library or its 
functions\n");
 return AVERROR_UNKNOWN;
 }
 
-createD3D = (PFN_D3D11_CREATE_DEVICE) GetProcAddress(d3dlib, 
"D3D11CreateDevice");
-if (!createD3D) {
-av_log(

[FFmpeg-cvslog] dxva: add declarative profile checks

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Thu Jun 22 14:52:52 
2017 +0200| [e2afcc33e0bcba92ab6c767f09f17a67911a4928] | committer: wm4

dxva: add declarative profile checks

Make supported codec profiles part of each dxva_modes entry. Every DXVA2
mode is representative for a codec with a subset of supported profiles,
so reflecting that in dxva_modes seems appropriate.

In practice, this will more strictly check MPEG2 profiles, will stop
relying on the surface format checks for selecting the correct HEVC
profile, and remove the verbose messages for mismatching H264/HEVC
profiles. Instead of the latter, it will now print the more nebulous "No
decoder device for codec found" verbose message.

This also respects AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH. Move the
Main10 HEVC entry before the normal one to make this work better.

Originally inspired by VLC's code.

Merges Libav commit 70e5e7c02232d1e51c362d5572c567a9e2a8dcc2.

Signed-off-by: Luca Barbato 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e2afcc33e0bcba92ab6c767f09f17a67911a4928
---

 libavcodec/dxva2.c | 64 +-
 1 file changed, 44 insertions(+), 20 deletions(-)

diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index efb5328dcd..f90a7729a3 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -50,18 +50,34 @@ DEFINE_GUID(ff_IID_IDirectXVideoDecoderService, 
0xfc51a551,0xd5e7,0x11d9,0xaf,0x
 typedef struct dxva_mode {
 const GUID *guid;
 enum AVCodecID codec;
+// List of supported profiles, terminated by a FF_PROFILE_UNKNOWN entry.
+// If NULL, don't check profile.
+const int  *profiles;
 } dxva_mode;
 
+static const int prof_mpeg2_main[]   = {FF_PROFILE_MPEG2_SIMPLE,
+FF_PROFILE_MPEG2_MAIN,
+FF_PROFILE_UNKNOWN};
+static const int prof_h264_high[]= {FF_PROFILE_H264_CONSTRAINED_BASELINE,
+FF_PROFILE_H264_MAIN,
+FF_PROFILE_H264_HIGH,
+FF_PROFILE_UNKNOWN};
+static const int prof_hevc_main[]= {FF_PROFILE_HEVC_MAIN,
+FF_PROFILE_UNKNOWN};
+static const int prof_hevc_main10[]  = {FF_PROFILE_HEVC_MAIN,
+FF_PROFILE_HEVC_MAIN_10,
+FF_PROFILE_UNKNOWN};
+
 static const dxva_mode dxva_modes[] = {
 /* MPEG-2 */
-{ &ff_DXVA2_ModeMPEG2_VLD,   AV_CODEC_ID_MPEG2VIDEO },
-{ &ff_DXVA2_ModeMPEG2and1_VLD,   AV_CODEC_ID_MPEG2VIDEO },
+{ &ff_DXVA2_ModeMPEG2_VLD,   AV_CODEC_ID_MPEG2VIDEO, prof_mpeg2_main },
+{ &ff_DXVA2_ModeMPEG2and1_VLD,   AV_CODEC_ID_MPEG2VIDEO, prof_mpeg2_main },
 
 /* H.264 */
-{ &ff_DXVA2_ModeH264_F,  AV_CODEC_ID_H264 },
-{ &ff_DXVA2_ModeH264_E,  AV_CODEC_ID_H264 },
+{ &ff_DXVA2_ModeH264_F,  AV_CODEC_ID_H264, prof_h264_high },
+{ &ff_DXVA2_ModeH264_E,  AV_CODEC_ID_H264, prof_h264_high },
 /* Intel specific H.264 mode */
-{ &ff_DXVADDI_Intel_ModeH264_E,  AV_CODEC_ID_H264 },
+{ &ff_DXVADDI_Intel_ModeH264_E,  AV_CODEC_ID_H264, prof_h264_high },
 
 /* VC-1 / WMV3 */
 { &ff_DXVA2_ModeVC1_D2010,   AV_CODEC_ID_VC1 },
@@ -70,8 +86,8 @@ static const dxva_mode dxva_modes[] = {
 { &ff_DXVA2_ModeVC1_D,   AV_CODEC_ID_WMV3 },
 
 /* HEVC/H.265 */
-{ &ff_DXVA2_ModeHEVC_VLD_Main,   AV_CODEC_ID_HEVC },
-{ &ff_DXVA2_ModeHEVC_VLD_Main10, AV_CODEC_ID_HEVC },
+{ &ff_DXVA2_ModeHEVC_VLD_Main10, AV_CODEC_ID_HEVC, prof_hevc_main10 },
+{ &ff_DXVA2_ModeHEVC_VLD_Main,   AV_CODEC_ID_HEVC, prof_hevc_main },
 
 /* VP8/9 */
 { &ff_DXVA2_ModeVP9_VLD_Profile0,AV_CODEC_ID_VP9 },
@@ -164,6 +180,26 @@ static int dxva2_validate_output(void *decoder_service, 
GUID guid, const void *s
 }
 #endif
 
+static int dxva_check_codec_compatibility(AVCodecContext *avctx, const 
dxva_mode *mode)
+{
+if (mode->codec != avctx->codec_id)
+return 0;
+
+if (mode->profiles && !(avctx->hwaccel_flags & 
AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH)) {
+int i, found = 0;
+for (i = 0; mode->profiles[i] != FF_PROFILE_UNKNOWN; i++) {
+if (avctx->profile == mode->profiles[i]) {
+found = 1;
+break;
+}
+}
+if (!found)
+return 0;
+}
+
+return 1;
+}
+
 static int dxva_get_decoder_guid(AVCodecContext *avctx, void *service, void 
*surface_format,
  unsigned guid_count, const GUID *guid_list, 
GUID *decoder_guid)
 {
@@ -174,7 +210,7 @@ static int dxva_get_decoder_guid(AVCodecContext *avctx, 
void *service, void *sur
 for (i = 0; dxva_modes[i].guid; i++) {
 const dxva_mode *mode = &dxva_modes[i];
 int validate;
-if (mode->codec != avctx->codec_id)
+if (!dxva_check_codec_compatibility(

[FFmpeg-cvslog] dxva: fix some warnings

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Thu Jun 22 14:52:53 
2017 +0200| [39f201a0ec7913f7509a01fb0fa6634a24e52203] | committer: wm4

dxva: fix some warnings

Some existed since forever, some are new.

The cast in get_surface() is silly, but unless we change the av_log
function signature, or all callers of ff_dxva2_get_surface_index(), it's
needed to remove the const warning.

Merges Libav commit 752ddb45569ffe278393cd853b70f18ae017219e.

Signed-off-by: Luca Barbato 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=39f201a0ec7913f7509a01fb0fa6634a24e52203
---

 libavcodec/dxva2.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index f90a7729a3..e9a6605b5f 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -363,7 +363,6 @@ static int d3d11va_get_decoder_configuration(AVCodecContext 
*avctx,
  const D3D11_VIDEO_DECODER_DESC 
*desc,
  D3D11_VIDEO_DECODER_CONFIG 
*config)
 {
-FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
 unsigned cfg_count = 0;
 D3D11_VIDEO_DECODER_CONFIG *cfg_list = NULL;
 HRESULT hr;
@@ -629,7 +628,6 @@ int ff_dxva2_decode_init(AVCodecContext *avctx)
 if (sctx->pix_fmt == AV_PIX_FMT_D3D11) {
 AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
 AVD3D11VAContext *d3d11_ctx = &sctx->ctx.d3d11va;
-HRESULT hr;
 
 ff_dxva2_lock(avctx);
 ret = d3d11va_create_decoder(avctx);
@@ -696,7 +694,7 @@ int ff_dxva2_decode_uninit(AVCodecContext *avctx)
 return 0;
 }
 
-static void *get_surface(AVCodecContext *avctx, const AVFrame *frame)
+static void *get_surface(const AVCodecContext *avctx, const AVFrame *frame)
 {
 #if CONFIG_D3D11VA
 if (frame->format == AV_PIX_FMT_D3D11) {
@@ -704,7 +702,7 @@ static void *get_surface(AVCodecContext *avctx, const 
AVFrame *frame)
 intptr_t index = (intptr_t)frame->data[1];
 if (index < 0 || index >= sctx->nb_d3d11_views ||
 sctx->d3d11_texture != (ID3D11Texture2D *)frame->data[0]) {
-av_log(avctx, AV_LOG_ERROR, "get_buffer frame is invalid!\n");
+av_log((void *)avctx, AV_LOG_ERROR, "get_buffer frame is 
invalid!\n");
 return NULL;
 }
 return sctx->d3d11_views[index];
@@ -765,7 +763,7 @@ int ff_dxva2_commit_buffer(AVCodecContext *avctx,
 #endif
 if (FAILED(hr)) {
 av_log(avctx, AV_LOG_ERROR, "Failed to get a buffer for %u: 0x%x\n",
-   type, hr);
+   type, (unsigned)hr);
 return -1;
 }
 if (size <= dxva_size) {
@@ -807,7 +805,7 @@ int ff_dxva2_commit_buffer(AVCodecContext *avctx,
 if (FAILED(hr)) {
 av_log(avctx, AV_LOG_ERROR,
"Failed to release buffer type %u: 0x%x\n",
-   type, hr);
+   type, (unsigned)hr);
 result = -1;
 }
 return result;
@@ -877,7 +875,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 } while(1);
 
 if (FAILED(hr)) {
-av_log(avctx, AV_LOG_ERROR, "Failed to begin frame: 0x%x\n", hr);
+av_log(avctx, AV_LOG_ERROR, "Failed to begin frame: 0x%x\n", 
(unsigned)hr);
 ff_dxva2_unlock(avctx);
 return -1;
 }
@@ -972,7 +970,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 }
 #endif
 if (FAILED(hr)) {
-av_log(avctx, AV_LOG_ERROR, "Failed to execute: 0x%x\n", hr);
+av_log(avctx, AV_LOG_ERROR, "Failed to execute: 0x%x\n", (unsigned)hr);
 result = -1;
 }
 
@@ -987,7 +985,7 @@ end:
 #endif
 ff_dxva2_unlock(avctx);
 if (FAILED(hr)) {
-av_log(avctx, AV_LOG_ERROR, "Failed to end frame: 0x%x\n", hr);
+av_log(avctx, AV_LOG_ERROR, "Failed to end frame: 0x%x\n", 
(unsigned)hr);
 result = -1;
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] dxva: move d3d11 locking/unlocking to functions

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Tue Jun  6 18:51:10 
2017 +0200| [5659f7404731415c7e1cfdf4d8b0afeb6b1132de] | committer: wm4

dxva: move d3d11 locking/unlocking to functions

I want to make it non-mandatory to set a mutex in the D3D11 device
context, and replacing it with user callbacks seems like the best
solution. This is preparation for it. Also makes the code slightly more
readable.

Merges Libav commit 831cfe10b40414915fe7b6088158421fe02e2b2d.

Signed-off-by: Diego Biurrun 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5659f7404731415c7e1cfdf4d8b0afeb6b1132de
---

 libavcodec/dxva2.c | 46 --
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index 6b214c1877..9138216745 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -29,6 +29,28 @@
 #include "avcodec.h"
 #include "dxva2_internal.h"
 
+static void ff_dxva2_lock(AVCodecContext *avctx)
+{
+#if CONFIG_D3D11VA
+if (ff_dxva2_is_d3d11(avctx)) {
+AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
+if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
+WaitForSingleObjectEx(D3D11VA_CONTEXT(ctx)->context_mutex, 
INFINITE, FALSE);
+}
+#endif
+}
+
+static void ff_dxva2_unlock(AVCodecContext *avctx)
+{
+#if CONFIG_D3D11VA
+if (ff_dxva2_is_d3d11(avctx)) {
+AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
+if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
+ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
+}
+#endif
+}
+
 static void *get_surface(const AVFrame *frame)
 {
 return frame->data[3];
@@ -153,14 +175,12 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 unsigned type;
 
 do {
+ff_dxva2_lock(avctx);
 #if CONFIG_D3D11VA
-if (ff_dxva2_is_d3d11(avctx)) {
-if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
-WaitForSingleObjectEx(D3D11VA_CONTEXT(ctx)->context_mutex, 
INFINITE, FALSE);
+if (ff_dxva2_is_d3d11(avctx))
 hr = 
ID3D11VideoContext_DecoderBeginFrame(D3D11VA_CONTEXT(ctx)->video_context, 
D3D11VA_CONTEXT(ctx)->decoder,
   get_surface(frame),
   0, NULL);
-}
 #endif
 #if CONFIG_DXVA2
 if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
@@ -170,21 +190,13 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 #endif
 if (hr != E_PENDING || ++runs > 50)
 break;
-#if CONFIG_D3D11VA
-if (ff_dxva2_is_d3d11(avctx))
-if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
-ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
-#endif
+ff_dxva2_unlock(avctx);
 av_usleep(2000);
 } while(1);
 
 if (FAILED(hr)) {
 av_log(avctx, AV_LOG_ERROR, "Failed to begin frame: 0x%x\n", hr);
-#if CONFIG_D3D11VA
-if (ff_dxva2_is_d3d11(avctx))
-if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
-ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
-#endif
+ff_dxva2_unlock(avctx);
 return -1;
 }
 
@@ -284,16 +296,14 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 
 end:
 #if CONFIG_D3D11VA
-if (ff_dxva2_is_d3d11(avctx)) {
+if (ff_dxva2_is_d3d11(avctx))
 hr = 
ID3D11VideoContext_DecoderEndFrame(D3D11VA_CONTEXT(ctx)->video_context, 
D3D11VA_CONTEXT(ctx)->decoder);
-if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
-ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
-}
 #endif
 #if CONFIG_DXVA2
 if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
 hr = IDirectXVideoDecoder_EndFrame(DXVA2_CONTEXT(ctx)->decoder, NULL);
 #endif
+ff_dxva2_unlock(avctx);
 if (FAILED(hr)) {
 av_log(avctx, AV_LOG_ERROR, "Failed to end frame: 0x%x\n", hr);
 result = -1;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] dxva: add support for new dxva2 and d3d11 hwaccel APIs

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Wed Jun  7 17:11:17 
2017 +0200| [70143a3954e1c4412efb2bf1a3a818adea2d3abf] | committer: wm4

dxva: add support for new dxva2 and d3d11 hwaccel APIs

This also adds support to avconv (which is trivial due to the new
hwaccel API being generic enough).

The new decoder setup code in dxva2.c is significantly based on work by
Steve Lhomme , but with heavy changes/rewrites.

Merges Libav commit f9e7a2f95a7194a8736cc1416a03a1a0155a3e9f.
Also adds untested VP9 support.
The check for DXVA2 COBJs is removed. Just update your MinGW to
something newer than a 5 year old release.

Signed-off-by: Diego Biurrun 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=70143a3954e1c4412efb2bf1a3a818adea2d3abf
---

 Changelog   |   1 +
 Makefile|   1 -
 configure   |  30 +-
 doc/APIchanges  |   6 +
 ffmpeg.h|   2 +-
 ffmpeg_dxva2.c  | 444 -
 ffmpeg_opt.c|  10 +-
 libavcodec/allcodecs.c  |   6 +
 libavcodec/dxva2.c  | 669 +++-
 libavcodec/dxva2_h264.c |  22 ++
 libavcodec/dxva2_hevc.c |  22 ++
 libavcodec/dxva2_internal.h |  43 ++-
 libavcodec/dxva2_mpeg2.c|  22 ++
 libavcodec/dxva2_vc1.c  |  44 +++
 libavcodec/dxva2_vp9.c  |  22 ++
 libavcodec/h264_slice.c |   3 +-
 libavcodec/hevcdec.c|   4 +-
 libavcodec/mpeg12dec.c  |   1 +
 libavcodec/vc1dec.c |   1 +
 libavcodec/version.h|   2 +-
 libavcodec/vp9.c|   3 +-
 libavutil/hwcontext_dxva2.h |   3 +
 22 files changed, 885 insertions(+), 476 deletions(-)

diff --git a/Changelog b/Changelog
index 24d2255183..a8726c6736 100644
--- a/Changelog
+++ b/Changelog
@@ -25,6 +25,7 @@ version :
 - The x86 assembler default switched from yasm to nasm, pass
   --x86asmexe=yasm to configure to restore the old behavior.
 - additional frame format support for Interplay MVE movies
+- support for decoding through D3D11VA in ffmpeg
 
 version 3.3:
 - CrystalHD decoder moved to new decode API
diff --git a/Makefile b/Makefile
index aef18185d4..29870d7710 100644
--- a/Makefile
+++ b/Makefile
@@ -38,7 +38,6 @@ ifndef CONFIG_VIDEOTOOLBOX
 OBJS-ffmpeg-$(CONFIG_VDA) += ffmpeg_videotoolbox.o
 endif
 OBJS-ffmpeg-$(CONFIG_CUVID)   += ffmpeg_cuvid.o
-OBJS-ffmpeg-$(HAVE_DXVA2_LIB) += ffmpeg_dxva2.o
 OBJS-ffserver += ffserver_config.o
 
 TESTTOOLS   = audiogen videogen rotozoom tiny_psnr tiny_ssim base64 audiomatch
diff --git a/configure b/configure
index 6ca919be4a..aebd5d5da5 100755
--- a/configure
+++ b/configure
@@ -2056,8 +2056,6 @@ HAVE_LIST="
 alsa
 atomics_native
 dos_paths
-dxva2_lib
-dxva2api_cobj
 jack
 libc_msvcrt
 makeinfo
@@ -2594,9 +2592,8 @@ crystalhd_deps="libcrystalhd_libcrystalhd_if_h"
 cuda_deps_any="dlopen LoadLibrary"
 cuvid_deps="cuda"
 d3d11va_deps="d3d11_h dxva_h ID3D11VideoDecoder ID3D11VideoContext"
-dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode"
+dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32"
 dxva2_extralibs="-luser32"
-dxva2_lib_deps="dxva2"
 vda_framework_deps="VideoDecodeAcceleration_VDADecoder_h blocks_extension"
 vda_framework_extralibs="-framework VideoDecodeAcceleration"
 vda_deps="vda_framework pthreads"
@@ -2613,6 +2610,8 @@ h264_cuvid_hwaccel_deps="cuda cuvid"
 h264_cuvid_hwaccel_select="h264_cuvid_decoder"
 h264_d3d11va_hwaccel_deps="d3d11va"
 h264_d3d11va_hwaccel_select="h264_decoder"
+h264_d3d11va2_hwaccel_deps="d3d11va"
+h264_d3d11va2_hwaccel_select="h264_decoder"
 h264_dxva2_hwaccel_deps="dxva2"
 h264_dxva2_hwaccel_select="h264_decoder"
 h264_mediacodec_hwaccel_deps="mediacodec"
@@ -2633,6 +2632,8 @@ hevc_cuvid_hwaccel_select="hevc_cuvid_decoder"
 hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
 hevc_d3d11va_hwaccel_select="hevc_decoder"
 hevc_mediacodec_hwaccel_deps="mediacodec"
+hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
+hevc_d3d11va2_hwaccel_select="hevc_decoder"
 hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC"
 hevc_dxva2_hwaccel_select="hevc_decoder"
 hevc_qsv_hwaccel_deps="libmfx"
@@ -2656,6 +2657,8 @@ mpeg2_cuvid_hwaccel_deps="cuda cuvid"
 mpeg2_cuvid_hwaccel_select="mpeg2_cuvid_decoder"
 mpeg2_d3d11va_hwaccel_deps="d3d11va"
 mpeg2_d3d11va_hwaccel_select="mpeg2video_decoder"
+mpeg2_d3d11va2_hwaccel_deps="d3d11va"
+mpeg2_d3d11va2_hwaccel_select="mpeg2video_decoder"
 mpeg2_dxva2_hwaccel_deps="dxva2"
 mpeg2_dxva2_hwaccel_select="mpeg2video_decoder"
 mpeg2_mediacodec_hwaccel_deps="mediacodec"
@@ -2684,6 +2687,8 @@ vc1_cuvid_hwaccel_deps="cuda cuvid"
 vc1_cuvid_hwaccel_select="vc1_cuvid_decoder"
 vc1_d3d11va_hwaccel_deps="d3d11va"
 vc1_d3d11va_hwaccel_select="vc1_decoder"
+vc1_d3d11va2_hwaccel_deps="d3d11va"
+vc1_d3d11va2_hwaccel_select="vc1_decoder"
 vc1_dxva2_hwaccel_deps="dxva2"
 vc1_dxva2_hwaccel_select="vc1_decoder"
 vc1_mmal_hwaccel_deps="mmal"
@@ -2701,12 +2706,15 @@ vp8_mediacod

[FFmpeg-cvslog] hwcontext_d3d11va: fix crash on frames_init failure

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Thu Jun 22 14:52:54 
2017 +0200| [1509d739a036b9838e12f28dac9f09ac37bc3928] | committer: wm4

hwcontext_d3d11va: fix crash on frames_init failure

It appears in this case, frames_ininit is called twice (once by
av_hwframe_ctx_init(), and again by unreffing the frames ctx ref).

Merges Libav commit 086321c612185469ebb85a1887527c8915b488b7.

Signed-off-by: Luca Barbato 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1509d739a036b9838e12f28dac9f09ac37bc3928
---

 libavutil/hwcontext_d3d11va.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c
index 5432dd8b51..0b36a563f1 100644
--- a/libavutil/hwcontext_d3d11va.c
+++ b/libavutil/hwcontext_d3d11va.c
@@ -104,9 +104,11 @@ static void d3d11va_frames_uninit(AVHWFramesContext *ctx)
 
 if (frames_hwctx->texture)
 ID3D11Texture2D_Release(frames_hwctx->texture);
+frames_hwctx->texture = NULL;
 
 if (s->staging_texture)
 ID3D11Texture2D_Release(s->staging_texture);
+s->staging_texture = NULL;
 }
 
 static void free_texture(void *opaque, uint8_t *data)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] hwcontext_d3d11va: allocate staging texture lazily

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Thu Jun 22 14:52:55 
2017 +0200| [6f5ff3269b12776a32b12887128ba034ad86e08b] | committer: wm4

hwcontext_d3d11va: allocate staging texture lazily

Makes dealing with formats that can not be used for staging textures
easier (DXGI_FORMAT_420_OPAQUE). It also saves memory if the staging
texture is never needed, so this is a good thing.

Merges Libav commit 98d73e4174333b37d961b79e1182be5a02156c02.

Signed-off-by: Luca Barbato 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6f5ff3269b12776a32b12887128ba034ad86e08b
---

 libavutil/hwcontext_d3d11va.c | 46 ---
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c
index 0b36a563f1..523a6d2dc6 100644
--- a/libavutil/hwcontext_d3d11va.c
+++ b/libavutil/hwcontext_d3d11va.c
@@ -239,17 +239,6 @@ static int d3d11va_frames_init(AVHWFramesContext *ctx)
 }
 }
 
-texDesc.ArraySize   = 1;
-texDesc.Usage   = D3D11_USAGE_STAGING;
-texDesc.BindFlags   = 0;
-texDesc.CPUAccessFlags  = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
-texDesc.MiscFlags   = 0;
-hr = ID3D11Device_CreateTexture2D(device_hwctx->device, &texDesc, NULL, 
&s->staging_texture);
-if (FAILED(hr)) {
-av_log(ctx, AV_LOG_ERROR, "Could not create the staging texture 
(%lx)\n", (long)hr);
-return AVERROR_UNKNOWN;
-}
-
 ctx->internal->pool_internal = 
av_buffer_pool_init2(sizeof(AVD3D11FrameDescriptor),
 ctx, 
d3d11va_pool_alloc, NULL);
 if (!ctx->internal->pool_internal)
@@ -295,6 +284,31 @@ static int d3d11va_transfer_get_formats(AVHWFramesContext 
*ctx,
 return 0;
 }
 
+static int d3d11va_create_staging_texture(AVHWFramesContext *ctx)
+{
+AVD3D11VADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+D3D11VAFramesContext  *s = ctx->internal->priv;
+HRESULT hr;
+D3D11_TEXTURE2D_DESC texDesc = {
+.Width  = ctx->width,
+.Height = ctx->height,
+.MipLevels  = 1,
+.Format = s->format,
+.SampleDesc = { .Count = 1 },
+.ArraySize  = 1,
+.Usage  = D3D11_USAGE_STAGING,
+.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE,
+};
+
+hr = ID3D11Device_CreateTexture2D(device_hwctx->device, &texDesc, NULL, 
&s->staging_texture);
+if (FAILED(hr)) {
+av_log(ctx, AV_LOG_ERROR, "Could not create the staging texture 
(%lx)\n", (long)hr);
+return AVERROR_UNKNOWN;
+}
+
+return 0;
+}
+
 static void fill_texture_ptrs(uint8_t *data[4], int linesize[4],
   AVHWFramesContext *ctx,
   D3D11_TEXTURE2D_DESC *desc,
@@ -320,7 +334,7 @@ static int d3d11va_transfer_data(AVHWFramesContext *ctx, 
AVFrame *dst,
 // (The interface types are compatible.)
 ID3D11Resource *texture = (ID3D11Resource *)(ID3D11Texture2D 
*)frame->data[0];
 int index = (intptr_t)frame->data[1];
-ID3D11Resource *staging = (ID3D11Resource *)s->staging_texture;
+ID3D11Resource *staging;
 int w = FFMIN(dst->width,  src->width);
 int h = FFMIN(dst->height, src->height);
 uint8_t *map_data[4];
@@ -334,6 +348,14 @@ static int d3d11va_transfer_data(AVHWFramesContext *ctx, 
AVFrame *dst,
 
 device_hwctx->lock(device_hwctx->lock_ctx);
 
+if (!s->staging_texture) {
+int res = d3d11va_create_staging_texture(ctx);
+if (res < 0)
+return res;
+}
+
+staging = (ID3D11Resource *)s->staging_texture;
+
 ID3D11Texture2D_GetDesc(s->staging_texture, &desc);
 
 if (download) {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] dxva: support DXGI_FORMAT_420_OPAQUE decoding

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Thu Jun 22 14:52:56 
2017 +0200| [8d7fdba7b8670db9dd5023dde6fe3a06674933c4] | committer: wm4

dxva: support DXGI_FORMAT_420_OPAQUE decoding

Some devices (some phones, apparently) will support only this opaque
format. Of course this won't work with CLI, because copying data
directly is not supported.

Automatic frame allocation (setting AVCodecContext.hw_device_ctx) does
not support this mode, even if it's the only supported mode. But since
opaque surfaces are generally less useful, that's probably ok.

Merges Libav commit 5030e3856c2126fb829edb828f5aae011d178eb4.

Signed-off-by: Luca Barbato 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8d7fdba7b8670db9dd5023dde6fe3a06674933c4
---

 libavcodec/dxva2.c| 13 +++--
 libavutil/hwcontext_d3d11va.c | 12 ++--
 libavutil/hwcontext_d3d11va.h |  4 
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index e9a6605b5f..76e8249bef 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -393,19 +393,28 @@ static int 
d3d11va_get_decoder_configuration(AVCodecContext *avctx,
 return ret;
 }
 
+static DXGI_FORMAT d3d11va_map_sw_to_hw_format(enum AVPixelFormat pix_fmt)
+{
+switch (pix_fmt) {
+case AV_PIX_FMT_NV12:   return DXGI_FORMAT_NV12;
+case AV_PIX_FMT_P010:   return DXGI_FORMAT_P010;
+case AV_PIX_FMT_YUV420P:return DXGI_FORMAT_420_OPAQUE;
+default:return DXGI_FORMAT_UNKNOWN;
+}
+}
+
 static int d3d11va_create_decoder(AVCodecContext *avctx)
 {
 FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
 GUID *guid_list;
 unsigned guid_count, i;
 GUID decoder_guid;
-DXGI_FORMAT surface_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ?
- DXGI_FORMAT_P010 : DXGI_FORMAT_NV12;
 D3D11_VIDEO_DECODER_DESC desc = { 0 };
 D3D11_VIDEO_DECODER_CONFIG config;
 AVHWFramesContext *frames_ctx = (AVHWFramesContext 
*)avctx->hw_frames_ctx->data;
 AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
 AVD3D11VAFramesContext *frames_hwctx = frames_ctx->hwctx;
+DXGI_FORMAT surface_format = 
d3d11va_map_sw_to_hw_format(frames_ctx->sw_format);
 D3D11_TEXTURE2D_DESC texdesc;
 HRESULT hr;
 int ret;
diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c
index 523a6d2dc6..376c76e5cf 100644
--- a/libavutil/hwcontext_d3d11va.c
+++ b/libavutil/hwcontext_d3d11va.c
@@ -83,8 +83,11 @@ static const struct {
 DXGI_FORMAT d3d_format;
 enum AVPixelFormat pix_fmt;
 } supported_formats[] = {
-{ DXGI_FORMAT_NV12, AV_PIX_FMT_NV12 },
-{ DXGI_FORMAT_P010, AV_PIX_FMT_P010 },
+{ DXGI_FORMAT_NV12, AV_PIX_FMT_NV12 },
+{ DXGI_FORMAT_P010, AV_PIX_FMT_P010 },
+// Special opaque formats. The pix_fmt is merely a place holder, as the
+// opaque format cannot be accessed directly.
+{ DXGI_FORMAT_420_OPAQUE,   AV_PIX_FMT_YUV420P },
 };
 
 static void d3d11va_default_lock(void *ctx)
@@ -270,6 +273,7 @@ static int d3d11va_transfer_get_formats(AVHWFramesContext 
*ctx,
 enum AVHWFrameTransferDirection dir,
 enum AVPixelFormat **formats)
 {
+D3D11VAFramesContext *s = ctx->internal->priv;
 enum AVPixelFormat *fmts;
 
 fmts = av_malloc_array(2, sizeof(*fmts));
@@ -279,6 +283,10 @@ static int d3d11va_transfer_get_formats(AVHWFramesContext 
*ctx,
 fmts[0] = ctx->sw_format;
 fmts[1] = AV_PIX_FMT_NONE;
 
+// Don't signal support for opaque formats. Actual access would fail.
+if (s->format == DXGI_FORMAT_420_OPAQUE)
+fmts[0] = AV_PIX_FMT_NONE;
+
 *formats = fmts;
 
 return 0;
diff --git a/libavutil/hwcontext_d3d11va.h b/libavutil/hwcontext_d3d11va.h
index 676349d7b8..d41451580e 100644
--- a/libavutil/hwcontext_d3d11va.h
+++ b/libavutil/hwcontext_d3d11va.h
@@ -26,6 +26,10 @@
  * The default pool implementation will be fixed-size if initial_pool_size is
  * set (and allocate elements from an array texture). Otherwise it will 
allocate
  * individual textures. Be aware that decoding requires a single array texture.
+ *
+ * Using sw_format==AV_PIX_FMT_YUV420P has special semantics, and maps to
+ * DXGI_FORMAT_420_OPAQUE. av_hwframe_transfer_data() is not supported for
+ * this format. Refer to MSDN for details.
  */
 
 #include 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] dxva: verbose-log decoder GUID list

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Thu Jun 22 14:52:59 
2017 +0200| [f0bcedaf37ed4fdb082c88826b8bd3a5dd26382a] | committer: wm4

dxva: verbose-log decoder GUID list

Helpful for debugging.

Merges Libav commit 068eaa534e7ddb2155e2830818d5c3f1069c68d8.

Signed-off-by: Luca Barbato 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f0bcedaf37ed4fdb082c88826b8bd3a5dd26382a
---

 libavcodec/dxva2.c | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index 76e8249bef..542607ab3e 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -200,12 +200,59 @@ static int dxva_check_codec_compatibility(AVCodecContext 
*avctx, const dxva_mode
 return 1;
 }
 
+static void dxva_list_guids_debug(AVCodecContext *avctx, void *service,
+ unsigned guid_count, const GUID *guid_list)
+{
+FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+int i;
+
+av_log(avctx, AV_LOG_VERBOSE, "Decoder GUIDs reported as supported:\n");
+
+for (i = 0; i < guid_count; i++) {
+const GUID *guid = &guid_list[i];
+
+av_log(avctx, AV_LOG_VERBOSE,
+ "{%8.8x-%4.4x-%4.4x-%2.2x%2.2x-%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x}",
+ (unsigned) guid->Data1, guid->Data2, guid->Data3,
+ guid->Data4[0], guid->Data4[1],
+ guid->Data4[2], guid->Data4[3],
+ guid->Data4[4], guid->Data4[5],
+ guid->Data4[6], guid->Data4[7]);
+
+#if CONFIG_D3D11VA
+if (sctx->pix_fmt == AV_PIX_FMT_D3D11) {
+DXGI_FORMAT format;
+// We don't know the maximum valid DXGI_FORMAT, so use 200 as
+// arbitrary upper bound (that could become outdated).
+for (format = 0; format < 200; format++) {
+if (d3d11va_validate_output(service, *guid, &format))
+av_log(avctx, AV_LOG_VERBOSE, " %d", (int)format);
+}
+}
+#endif
+#if CONFIG_DXVA2
+if (sctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
+const D3DFORMAT formats[] = {MKTAG('N', 'V', '1', '2'),
+ MKTAG('P', '0', '1', '0')};
+int i;
+for (i = 0; i < FF_ARRAY_ELEMS(formats); i++) {
+if (dxva2_validate_output(service, *guid, &formats[i]))
+av_log(avctx, AV_LOG_VERBOSE, " %d", i);
+}
+}
+#endif
+av_log(avctx, AV_LOG_VERBOSE, "\n");
+}
+}
+
 static int dxva_get_decoder_guid(AVCodecContext *avctx, void *service, void 
*surface_format,
  unsigned guid_count, const GUID *guid_list, 
GUID *decoder_guid)
 {
 FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
 unsigned i, j;
 
+dxva_list_guids_debug(avctx, service, guid_count, guid_list);
+
 *decoder_guid = ff_GUID_NULL;
 for (i = 0; dxva_modes[i].guid; i++) {
 const dxva_mode *mode = &dxva_modes[i];

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] hwcontext_d3d11va: add option to enable debug mode

2017-06-27 Thread wm4
ffmpeg | branch: master | wm4  | Thu Jun 22 14:52:57 
2017 +0200| [289d387330d8a0a07ff98c9e09215627a70ace2d] | committer: wm4

hwcontext_d3d11va: add option to enable debug mode

Basically copied from VLC (LGPL):

http://git.videolan.org/?p=vlc.git;a=blob;f=modules/video_output/win32/direct3d11.c;h=e9fcb83dcabfe778f26e63d19f218caf06a7c3ae;hb=HEAD#l1482
http://git.videolan.org/?p=vlc.git;a=blob;f=modules/codec/avcodec/d3d11va.c;h=85e7d25caebc059a9770da2ef4bb8fe90816d76d;hb=HEAD#l599

Merges Libav commit cfc9e7c94eafa33e7f109099664ec4fb57ac5ca3.

Signed-off-by: Luca Barbato 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=289d387330d8a0a07ff98c9e09215627a70ace2d
---

 configure |  2 ++
 libavutil/hwcontext_d3d11va.c | 32 
 libavutil/hwcontext_d3d11va.h |  4 
 3 files changed, 38 insertions(+)

diff --git a/configure b/configure
index bf48472219..282114d268 100755
--- a/configure
+++ b/configure
@@ -1859,6 +1859,7 @@ HEADERS_LIST="
 direct_h
 dirent_h
 dlfcn_h
+dxgidebug_h
 dxva_h
 ES2_gl_h
 gsm_h
@@ -5683,6 +5684,7 @@ check_header d3d11.h
 check_header direct.h
 check_header dirent.h
 check_header dlfcn.h
+check_header dxgidebug.h
 check_header dxva.h
 check_header dxva2api.h -D_WIN32_WINNT=0x0600
 check_header io.h
diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c
index 376c76e5cf..75f78d8669 100644
--- a/libavutil/hwcontext_d3d11va.c
+++ b/libavutil/hwcontext_d3d11va.c
@@ -16,6 +16,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
+
 #include 
 
 // Include thread.h before redefining _WIN32_WINNT, to get
@@ -32,6 +34,10 @@
 #include 
 #include 
 
+#if HAVE_DXGIDEBUG_H
+#include 
+#endif
+
 #include "avassert.h"
 #include "common.h"
 #include "hwcontext.h"
@@ -476,8 +482,18 @@ static int d3d11va_device_create(AVHWDeviceContext *ctx, 
const char *device,
 IDXGIAdapter   *pAdapter = NULL;
 ID3D10Multithread  *pMultithread;
 UINT creationFlags = D3D11_CREATE_DEVICE_VIDEO_SUPPORT;
+int is_debug   = !!av_dict_get(opts, "debug", NULL, 0);
 int ret;
 
+// (On UWP we can't check this.)
+#if HAVE_LOADLIBRARY
+if (!LoadLibrary("d3d11_1sdklayers.dll"))
+is_debug = 0;
+#endif
+
+if (is_debug)
+creationFlags |= D3D11_CREATE_DEVICE_DEBUG;
+
 if ((ret = ff_thread_once(&functions_loaded, load_functions)) != 0)
 return AVERROR_UNKNOWN;
 if (!mD3D11CreateDevice || !mCreateDXGIFactory) {
@@ -511,6 +527,22 @@ static int d3d11va_device_create(AVHWDeviceContext *ctx, 
const char *device,
 ID3D10Multithread_Release(pMultithread);
 }
 
+#if HAVE_LOADLIBRARY && HAVE_DXGIDEBUG_H
+if (is_debug) {
+HANDLE dxgidebug_dll = LoadLibrary("dxgidebug.dll");
+if (dxgidebug_dll) {
+HRESULT (WINAPI  * pf_DXGIGetDebugInterface)(const GUID *riid, 
void **ppDebug)
+= (void *)GetProcAddress(dxgidebug_dll, 
"DXGIGetDebugInterface");
+if (pf_DXGIGetDebugInterface) {
+IDXGIDebug *dxgi_debug = NULL;
+hr = pf_DXGIGetDebugInterface(&IID_IDXGIDebug, 
(void**)&dxgi_debug);
+if (SUCCEEDED(hr) && dxgi_debug)
+IDXGIDebug_ReportLiveObjects(dxgi_debug, DXGI_DEBUG_ALL, 
DXGI_DEBUG_RLO_ALL);
+}
+}
+}
+#endif
+
 return 0;
 }
 
diff --git a/libavutil/hwcontext_d3d11va.h b/libavutil/hwcontext_d3d11va.h
index d41451580e..0ed3033452 100644
--- a/libavutil/hwcontext_d3d11va.h
+++ b/libavutil/hwcontext_d3d11va.h
@@ -30,6 +30,10 @@
  * Using sw_format==AV_PIX_FMT_YUV420P has special semantics, and maps to
  * DXGI_FORMAT_420_OPAQUE. av_hwframe_transfer_data() is not supported for
  * this format. Refer to MSDN for details.
+ *
+ * av_hwdevice_ctx_create() for this device type supports a key named "debug"
+ * for the AVDictionary entry. If this is set to any value, the device creation
+ * code will try to load various supported D3D debugging layers.
  */
 
 #include 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] hwframe: Allow hwaccel frame allocators to align surface sizes

2017-06-27 Thread Anton Khirnov
ffmpeg | branch: master | Anton Khirnov  | Thu Jun 22 
20:05:12 2017 +0200| [d14179e3d49eb0f2533db16151e01abb0018b165] | committer: wm4

hwframe: Allow hwaccel frame allocators to align surface sizes

Hardware accelerated decoding generally uses AVHWFramesContext for pool
allocation of hardware surfaces. These are setup to allocate surfaces
aligned to hardware and hwaccel API requirements. Due to the
architecture, av_hwframe_get_buffer() will return AVFrames with
the dimensions set to the aligned sizes.

This causes some decoders (like hevc) return these aligned size as
final frame size, instead of cropping them to the video's actual
dimensions. To make sure this doesn't happen, crop the frame to the
size the decoder expects when ff_get_buffer() is called.

Merges Libav commit 3fdf50f9e864c88da2139cf066832944de81acaa.

Signed-off-by: Luca Barbato 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d14179e3d49eb0f2533db16151e01abb0018b165
---

 libavcodec/decode.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index dccce01402..052f93d82f 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -1528,8 +1528,12 @@ int avcodec_default_get_buffer2(AVCodecContext *avctx, 
AVFrame *frame, int flags
 {
 int ret;
 
-if (avctx->hw_frames_ctx)
-return av_hwframe_get_buffer(avctx->hw_frames_ctx, frame, 0);
+if (avctx->hw_frames_ctx) {
+ret = av_hwframe_get_buffer(avctx->hw_frames_ctx, frame, 0);
+frame->width  = avctx->coded_width;
+frame->height = avctx->coded_height;
+return ret;
+}
 
 if ((ret = update_frame_pool(avctx, frame)) < 0)
 return ret;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] x86/vf_blend: add sse and ssse3 extremity functions

2017-06-27 Thread James Almer
ffmpeg | branch: master | James Almer  | Tue Jun 27 12:27:10 
2017 -0300| [fa50d9360ba36ba2ee8f85f2c59e8d6af20e833a] | committer: James Almer

x86/vf_blend: add sse and ssse3 extremity functions

Reviewed-by: Paul B Mahol 
Signed-off-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fa50d9360ba36ba2ee8f85f2c59e8d6af20e833a
---

 libavfilter/x86/vf_blend.asm| 25 +
 libavfilter/x86/vf_blend_init.c |  4 
 tests/checkasm/vf_blend.c   |  1 +
 3 files changed, 30 insertions(+)

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 33b1ad1496..25f6f5affc 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -286,6 +286,31 @@ BLEND_INIT difference, 3
 jl .loop
 BLEND_END
 
+BLEND_INIT extremity, 8
+pxor   m2, m2
+mova   m4, [pw_255]
+.nextrow:
+movxq, widthq
+
+.loop:
+movum0, [topq + xq]
+movum1, [bottomq + xq]
+punpckhbw   m5, m0, m2
+punpcklbw   m0, m2
+punpckhbw   m6, m1, m2
+punpcklbw   m1, m2
+psubw   m3, m4, m0
+psubw   m7, m4, m5
+psubw   m3, m1
+psubw   m7, m6
+ABS1m3, m1
+ABS1m7, m6
+packuswbm3, m7
+mova   [dstq + xq], m3
+add xq, mmsize
+jl .loop
+BLEND_END
+
 BLEND_INIT negation, 5
 pxor   m2, m2
 mova   m4, [pw_255]
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 96fe3d8baa..71f9b0a685 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -47,6 +47,8 @@ BLEND_FUNC(subtract, sse2)
 BLEND_FUNC(xor, sse2)
 BLEND_FUNC(difference, sse2)
 BLEND_FUNC(difference, ssse3)
+BLEND_FUNC(extremity, sse2)
+BLEND_FUNC(extremity, ssse3)
 BLEND_FUNC(negation, sse2)
 BLEND_FUNC(negation, ssse3)
 
@@ -72,12 +74,14 @@ av_cold void ff_blend_init_x86(FilterParams *param, int 
is_16bit)
 case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
 case BLEND_XOR:  param->blend = ff_blend_xor_sse2;  break;
 case BLEND_DIFFERENCE: param->blend = ff_blend_difference_sse2; break;
+case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_sse2; break;
 case BLEND_NEGATION:   param->blend = ff_blend_negation_sse2;   break;
 }
 }
 if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
 switch (param->mode) {
 case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
+case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_ssse3; break;
 case BLEND_NEGATION:   param->blend = ff_blend_negation_ssse3;   break;
 }
 }
diff --git a/tests/checkasm/vf_blend.c b/tests/checkasm/vf_blend.c
index aa568c0de0..4e018ac69e 100644
--- a/tests/checkasm/vf_blend.c
+++ b/tests/checkasm/vf_blend.c
@@ -117,6 +117,7 @@ void checkasm_check_blend(void)
 check_and_report(subtract, BLEND_SUBTRACT)
 check_and_report(xor, BLEND_XOR)
 check_and_report(difference, BLEND_DIFFERENCE)
+check_and_report(extremity, BLEND_EXTREMITY)
 check_and_report(negation, BLEND_NEGATION)
 
 report("8bit");

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] x86/vf_blend: optimize difference and negation functions

2017-06-27 Thread James Almer
ffmpeg | branch: master | James Almer  | Tue Jun 27 12:42:58 
2017 -0300| [0daa1cf0731830288b8cc875ca1ee641cfe422b2] | committer: James Almer

x86/vf_blend: optimize difference and negation functions

Process more pixels per loop.

Reviewed-by: Paul B Mahol 
Signed-off-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0daa1cf0731830288b8cc875ca1ee641cfe422b2
---

 libavfilter/x86/vf_blend.asm | 40 
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 25f6f5affc..d5e512e6e0 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -268,21 +268,25 @@ BLEND_INIT phoenix, 4
 BLEND_END
 
 %macro BLEND_ABS 0
-BLEND_INIT difference, 3
+BLEND_INIT difference, 5
 pxor   m2, m2
 .nextrow:
 movxq, widthq
 
 .loop:
-movhm0, [topq + xq]
-movhm1, [bottomq + xq]
+movum0, [topq + xq]
+movum1, [bottomq + xq]
+punpckhbw   m3, m0, m2
 punpcklbw   m0, m2
+punpckhbw   m4, m1, m2
 punpcklbw   m1, m2
 psubw   m0, m1
+psubw   m3, m4
 ABS1m0, m1
-packuswbm0, m0
-movh   [dstq + xq], m0
-add xq, mmsize / 2
+ABS1m3, m4
+packuswbm0, m3
+mova   [dstq + xq], m0
+add xq, mmsize
 jl .loop
 BLEND_END
 
@@ -311,26 +315,30 @@ BLEND_INIT extremity, 8
 jl .loop
 BLEND_END
 
-BLEND_INIT negation, 5
+BLEND_INIT negation, 8
 pxor   m2, m2
 mova   m4, [pw_255]
 .nextrow:
 movxq, widthq
 
 .loop:
-movhm0, [topq + xq]
-movhm1, [bottomq + xq]
+movum0, [topq + xq]
+movum1, [bottomq + xq]
+punpckhbw   m5, m0, m2
 punpcklbw   m0, m2
+punpckhbw   m6, m1, m2
 punpcklbw   m1, m2
-movam3, m4
-psubw   m3, m0
+psubw   m3, m4, m0
+psubw   m7, m4, m5
 psubw   m3, m1
+psubw   m7, m6
 ABS1m3, m1
-movam0, m4
-psubw   m0, m3
-packuswbm0, m0
-movh   [dstq + xq], m0
-add xq, mmsize / 2
+ABS1m7, m1
+psubw   m0, m4, m3
+psubw   m1, m4, m7
+packuswbm0, m1
+mova   [dstq + xq], m0
+add xq, mmsize
 jl .loop
 BLEND_END
 %endmacro

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec/vp9: add 64-bit ipred_dr_32x32_16 avx2 implementation

2017-06-27 Thread Ilia Valiakhmetov
ffmpeg | branch: master | Ilia Valiakhmetov  | Tue Jun 27 
15:06:19 2017 -0400| [35a5d9715dd82fd00f1d1401ec6be2d3e2eea81c] | committer: 
Ronald S. Bultje

avcodec/vp9: add 64-bit ipred_dr_32x32_16 avx2 implementation

vp9_diag_downright_32x32_12bpp_c: 429.7
vp9_diag_downright_32x32_12bpp_sse2: 158.9
vp9_diag_downright_32x32_12bpp_ssse3: 144.6
vp9_diag_downright_32x32_12bpp_avx: 141.0
vp9_diag_downright_32x32_12bpp_avx2: 73.8

Almost 50% faster than avx implementation

Signed-off-by: Ronald S. Bultje 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=35a5d9715dd82fd00f1d1401ec6be2d3e2eea81c
---

 libavcodec/x86/vp9dsp_init_16bpp.c|   6 +-
 libavcodec/x86/vp9intrapred_16bpp.asm | 103 +-
 2 files changed, 106 insertions(+), 3 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c 
b/libavcodec/x86/vp9dsp_init_16bpp.c
index cd931cb65f..4faee7b34c 100644
--- a/libavcodec/x86/vp9dsp_init_16bpp.c
+++ b/libavcodec/x86/vp9dsp_init_16bpp.c
@@ -52,8 +52,9 @@ decl_ipred_fns(dc,  16, mmxext, sse2);
 decl_ipred_fns(dc_top,  16, mmxext, sse2);
 decl_ipred_fns(dc_left, 16, mmxext, sse2);
 decl_ipred_fn(dl,   16, 16, avx2);
-decl_ipred_fn(dr,   16, 16, avx2);
 decl_ipred_fn(dl,   32, 16, avx2);
+decl_ipred_fn(dr,   16, 16, avx2);
+decl_ipred_fn(dr,   32, 16, avx2);
 
 #define decl_ipred_dir_funcs(type) \
 decl_ipred_fns(type, 16, sse2,  sse2); \
@@ -137,8 +138,9 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
 init_fpel_func(1, 1,  64, avg, _16, avx2);
 init_fpel_func(0, 1, 128, avg, _16, avx2);
 init_ipred_func(dl, DIAG_DOWN_LEFT, 16, 16, avx2);
-init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
 init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2);
+init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
+init_ipred_func(dr, DIAG_DOWN_RIGHT, 32, 16, avx2);
 }
 
 #endif /* HAVE_X86ASM */
diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm 
b/libavcodec/x86/vp9intrapred_16bpp.asm
index 6d4400ba8b..32b698243a 100644
--- a/libavcodec/x86/vp9intrapred_16bpp.asm
+++ b/libavcodec/x86/vp9intrapred_16bpp.asm
@@ -1221,8 +1221,109 @@ cglobal vp9_ipred_dr_16x16_16, 4, 5, 6, dst, stride, l, 
a
 mova  [dstq+strideq*0], m4 ; 0
 mova [dst3q+strideq*4], m5 ; 7
 RET
-%endif
 
+%if ARCH_X86_64
+cglobal vp9_ipred_dr_32x32_16, 4, 7, 10, dst, stride, l, a
+movam0, [lq+mmsize*0+0]; l[0-15]
+movam1, [lq+mmsize*1+0]; l[16-31]
+movum2, [aq+mmsize*0-2]; *abcdefghijklmno
+movam3, [aq+mmsize*0+0]; abcdefghijklmnop
+movam4, [aq+mmsize*1+0]; qrstuvwxyz012345
+vperm2i128  m5, m0, m1, q0201  ; lmnopqrstuvwxyz0
+vpalignrm6, m5, m0, 2  ; mnopqrstuvwxyz01
+vpalignrm7, m5, m0, 4  ; nopqrstuvwxyz012
+LOWPASS  0,  6,  7 ; L[0-15]
+vperm2i128  m7, m1, m2, q0201  ; stuvwxyz*abcdefg
+vpalignrm5, m7, m1, 2  ; lmnopqrstuvwxyz*
+vpalignrm6, m7, m1, 4  ; mnopqrstuvwxyz*a
+LOWPASS  1,  5,  6 ; L[16-31]#
+vperm2i128  m5, m3, m4, q0201  ; ijklmnopqrstuvwx
+vpalignrm6, m5, m3, 2  ; bcdefghijklmnopq
+LOWPASS  2,  3,  6 ; A[0-15]
+movum3, [aq+mmsize*1-2]; pqrstuvwxyz01234
+vperm2i128  m6, m4, m4, q2001  ; yz012345
+vpalignrm7, m6, m4, 2  ; rstuvwxyz012345.
+LOWPASS  3,  4,  7 ; A[16-31].
+vperm2i128  m4, m1, m2, q0201  ; TUVWXYZ#ABCDEFGH
+vperm2i128  m5, m0, m1, q0201  ; L[7-15]L[16-23]
+vperm2i128  m8, m2, m3, q0201  ; IJKLMNOPQRSTUVWX
+DEFINE_ARGS dst8, stride, stride3, stride7, stride5, dst24, cnt
+lea   stride3q, [strideq*3]
+lea   stride5q, [stride3q+strideq*2]
+lea   stride7q, [strideq*4+stride3q]
+lea dst24q, [dst8q+stride3q*8]
+lea  dst8q, [dst8q+strideq*8]
+mov   cntd, 2
+
+.loop:
+mova  [dst24q+stride7q+0 ], m0 ; 31 23 15 7
+mova  [dst24q+stride7q+32], m1
+mova[dst8q+stride7q+0], m1
+mova   [dst8q+stride7q+32], m2
+vpalignrm6, m4, m1, 2
+vpalignrm7, m5, m0, 2
+vpalignrm9, m8, m2, 2
+mova [dst24q+stride3q*2+0], m7 ; 30 22 14 6
+mova [dst24q+stride3q*2+32], m6
+mova  [dst8q+stride3q*2+0], m6
+mova [dst8q+strid

[FFmpeg-cvslog] libswresample/swresample: remove obsolete code

2017-06-27 Thread Michael Niedermayer
ffmpeg | branch: master | Michael Niedermayer  | Tue 
Jun 27 22:22:50 2017 +0200| [c578c9c229f010f5249255c13ee0c71c415e0ac8] | 
committer: Michael Niedermayer

libswresample/swresample: remove obsolete code

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c578c9c229f010f5249255c13ee0c71c415e0ac8
---

 libswresample/swresample.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/libswresample/swresample.h b/libswresample/swresample.h
index a8db5c2490..c7b84fbcac 100644
--- a/libswresample/swresample.h
+++ b/libswresample/swresample.h
@@ -126,10 +126,6 @@
 
 #include "libswresample/version.h"
 
-#if LIBSWRESAMPLE_VERSION_MAJOR < 1
-#define SWR_CH_MAX 32   ///< Maximum number of channels
-#endif
-
 /**
  * @name Option constants
  * These constants are used for the @ref avoptions interface for lswr.

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec/hevcdec: do basic validity check on delta_chroma_weight and offset

2017-06-27 Thread Michael Niedermayer
ffmpeg | branch: master | Michael Niedermayer  | Tue 
Jun 27 14:11:00 2017 +0200| [2c874548d663225a61b9c25a8b2ce490d26b65fa] | 
committer: Michael Niedermayer

avcodec/hevcdec: do basic validity check on delta_chroma_weight and offset

Fixes: runtime error: signed integer overflow: 2147483520 + 128 cannot be 
represented in type 'int'
Fixes: 2385/clusterfuzz-testcase-minimized-6594333576790016

Found-by: continuous fuzzing process 
https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2c874548d663225a61b9c25a8b2ce490d26b65fa
---

 libavcodec/hevcdec.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index f6bc5efa96..095564b368 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -139,7 +139,7 @@ fail:
 return AVERROR(ENOMEM);
 }
 
-static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
+static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
 {
 int i = 0;
 int j = 0;
@@ -182,6 +182,12 @@ static void pred_weight_table(HEVCContext *s, 
GetBitContext *gb)
 for (j = 0; j < 2; j++) {
 int delta_chroma_weight_l0 = get_se_golomb(gb);
 int delta_chroma_offset_l0 = get_se_golomb(gb);
+
+if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
+|| delta_chroma_offset_l0 < -(1<<17) || 
delta_chroma_offset_l0 > (1<<17)) {
+return AVERROR_INVALIDDATA;
+}
+
 s->sh.chroma_weight_l0[i][j] = (1 << 
s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 
- ((128 * s->sh.chroma_weight_l0[i][j])

 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
@@ -218,6 +224,12 @@ static void pred_weight_table(HEVCContext *s, 
GetBitContext *gb)
 for (j = 0; j < 2; j++) {
 int delta_chroma_weight_l1 = get_se_golomb(gb);
 int delta_chroma_offset_l1 = get_se_golomb(gb);
+
+if (   (int8_t)delta_chroma_weight_l1 != 
delta_chroma_weight_l1
+|| delta_chroma_offset_l1 < -(1<<17) || 
delta_chroma_offset_l1 > (1<<17)) {
+return AVERROR_INVALIDDATA;
+}
+
 s->sh.chroma_weight_l1[i][j] = (1 << 
s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 s->sh.chroma_offset_l1[i][j] = 
av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])

 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
@@ -230,6 +242,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext 
*gb)
 }
 }
 }
+return 0;
 }
 
 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
@@ -692,7 +705,9 @@ static int hls_slice_header(HEVCContext *s)
 
 if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == 
HEVC_SLICE_P) ||
 (s->ps.pps->weighted_bipred_flag && sh->slice_type == 
HEVC_SLICE_B)) {
-pred_weight_table(s, gb);
+int ret = pred_weight_table(s, gb);
+if (ret < 0)
+return ret;
 }
 
 sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] hwcontext_d3d11va: use correct license header

2017-06-27 Thread Hendrik Leppkes
ffmpeg | branch: master | Hendrik Leppkes  | Wed Jun 28 
00:19:55 2017 +0200| [15b00aea418b9f1c480f497278aee5cba3d98e84] | committer: 
Hendrik Leppkes

hwcontext_d3d11va: use correct license header

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=15b00aea418b9f1c480f497278aee5cba3d98e84
---

 libavutil/hwcontext_d3d11va.c | 8 
 libavutil/hwcontext_d3d11va.h | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c
index 75f78d8669..9a86d33b38 100644
--- a/libavutil/hwcontext_d3d11va.c
+++ b/libavutil/hwcontext_d3d11va.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavutil/hwcontext_d3d11va.h b/libavutil/hwcontext_d3d11va.h
index 0ed3033452..98db7ce343 100644
--- a/libavutil/hwcontext_d3d11va.h
+++ b/libavutil/hwcontext_d3d11va.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec/x86/vp9dsp_init_16bpp: Fix linking to missing ff_vp9_ipred_dr_32x32_16_avx2() on 32bit

2017-06-27 Thread Michael Niedermayer
ffmpeg | branch: master | Michael Niedermayer  | Wed 
Jun 28 00:16:18 2017 +0200| [516c213f089d797cbe98648c5bf957ae30ced162] | 
committer: Michael Niedermayer

avcodec/x86/vp9dsp_init_16bpp: Fix linking to missing 
ff_vp9_ipred_dr_32x32_16_avx2() on 32bit

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=516c213f089d797cbe98648c5bf957ae30ced162
---

 libavcodec/x86/vp9dsp_init_16bpp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c 
b/libavcodec/x86/vp9dsp_init_16bpp.c
index 4faee7b34c..60d10a12a3 100644
--- a/libavcodec/x86/vp9dsp_init_16bpp.c
+++ b/libavcodec/x86/vp9dsp_init_16bpp.c
@@ -140,7 +140,9 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
 init_ipred_func(dl, DIAG_DOWN_LEFT, 16, 16, avx2);
 init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2);
 init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
+#if ARCH_X86_64
 init_ipred_func(dr, DIAG_DOWN_RIGHT, 32, 16, avx2);
+#endif
 }
 
 #endif /* HAVE_X86ASM */

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] x86/vf_blend: use ABS2 macro

2017-06-27 Thread James Almer
ffmpeg | branch: master | James Almer  | Tue Jun 27 20:45:55 
2017 -0300| [d2ef9e6e7f9ef71aae15e9493189515a857928b1] | committer: James Almer

x86/vf_blend: use ABS2 macro

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d2ef9e6e7f9ef71aae15e9493189515a857928b1
---

 libavfilter/x86/vf_blend.asm | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index d5e512e6e0..cef479d995 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -282,8 +282,7 @@ BLEND_INIT difference, 5
 punpcklbw   m1, m2
 psubw   m0, m1
 psubw   m3, m4
-ABS1m0, m1
-ABS1m3, m4
+ABS2m0, m3, m1, m4
 packuswbm0, m3
 mova   [dstq + xq], m0
 add xq, mmsize
@@ -307,8 +306,7 @@ BLEND_INIT extremity, 8
 psubw   m7, m4, m5
 psubw   m3, m1
 psubw   m7, m6
-ABS1m3, m1
-ABS1m7, m6
+ABS2m3, m7, m1, m6
 packuswbm3, m7
 mova   [dstq + xq], m3
 add xq, mmsize
@@ -332,8 +330,7 @@ BLEND_INIT negation, 8
 psubw   m7, m4, m5
 psubw   m3, m1
 psubw   m7, m6
-ABS1m3, m1
-ABS1m7, m1
+ABS2m3, m7, m1, m6
 psubw   m0, m4, m3
 psubw   m1, m4, m7
 packuswbm0, m1

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog