[FFmpeg-devel] [PATCH] avcodec/mips: Improve avc bi-weighted mc msa functions

2017-10-09 Thread kaustubh.raste
From: Kaustubh Raste 

Replace generic with block size specific function.

Signed-off-by: Kaustubh Raste 
---
 libavcodec/mips/h264dsp_msa.c   |  469 +++
 libavutil/mips/generic_macros_msa.h |4 +
 2 files changed, 311 insertions(+), 162 deletions(-)

diff --git a/libavcodec/mips/h264dsp_msa.c b/libavcodec/mips/h264dsp_msa.c
index 5b06bd9..e50f5ca 100644
--- a/libavcodec/mips/h264dsp_msa.c
+++ b/libavcodec/mips/h264dsp_msa.c
@@ -223,217 +223,242 @@ static void avc_wgt_8x16_msa(uint8_t *data, int32_t 
stride, int32_t log2_denom,
 }
 }
 
-static void avc_biwgt_4x2_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
+static void avc_biwgt_4x2_msa(uint8_t *src, uint8_t *dst, int32_t stride,
   int32_t log2_denom, int32_t src_weight,
   int32_t dst_weight, int32_t offset_in)
 {
-uint32_t load0, load1, out0, out1;
-v16i8 src_wgt, dst_wgt, wgt;
-v16i8 src0, src1, dst0, dst1;
-v8i16 temp0, temp1, denom, offset, add_val;
-int32_t val = 128 * (src_weight + dst_weight);
+uint32_t tp0, tp1;
+v16i8 src_wgt, dst_wgt, wgt, vec0;
+v16u8 src0 = { 0 }, dst0 = { 0 };
+v8i16 tmp0, denom, offset, max255 = __msa_ldi_h(255);
 
-offset_in = ((offset_in + 1) | 1) << log2_denom;
+offset_in = (unsigned) ((offset_in + 1) | 1) << log2_denom;
+offset_in += (128 * (src_weight + dst_weight));
 
 src_wgt = __msa_fill_b(src_weight);
 dst_wgt = __msa_fill_b(dst_weight);
 offset = __msa_fill_h(offset_in);
 denom = __msa_fill_h(log2_denom + 1);
-add_val = __msa_fill_h(val);
-offset += add_val;
 
 wgt = __msa_ilvev_b(dst_wgt, src_wgt);
 
-load0 = LW(src);
-src += src_stride;
-load1 = LW(src);
-
-src0 = (v16i8) __msa_fill_w(load0);
-src1 = (v16i8) __msa_fill_w(load1);
-
-load0 = LW(dst);
-load1 = LW(dst + dst_stride);
-
-dst0 = (v16i8) __msa_fill_w(load0);
-dst1 = (v16i8) __msa_fill_w(load1);
+LW2(src, stride, tp0, tp1);
+INSERT_W2_UB(tp0, tp1, src0);
+LW2(dst, stride, tp0, tp1);
+INSERT_W2_UB(tp0, tp1, dst0);
+XORI_B2_128_UB(src0, dst0);
+vec0 = (v16i8) __msa_ilvr_b((v16i8) dst0, (v16i8) src0);
+tmp0 = __msa_dpadd_s_h(offset, wgt, vec0);
+tmp0 >>= denom;
+tmp0 = __msa_maxi_s_h(tmp0, 0);
+tmp0 = __msa_min_s_h(max255, tmp0);
+dst0 = (v16u8) __msa_pckev_b((v16i8) tmp0, (v16i8) tmp0);
+ST4x2_UB(dst0, dst, stride);
+}
 
-XORI_B4_128_SB(src0, src1, dst0, dst1);
-ILVR_B2_SH(dst0, src0, dst1, src1, temp0, temp1);
+static void avc_biwgt_4x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
+  int32_t log2_denom, int32_t src_weight,
+  int32_t dst_weight, int32_t offset_in)
+{
+uint32_t tp0, tp1, tp2, tp3;
+v16i8 src_wgt, dst_wgt, wgt, vec0, vec1;
+v16u8 src0, dst0;
+v8i16 tmp0, tmp1, denom, offset;
 
-temp0 = __msa_dpadd_s_h(offset, wgt, (v16i8) temp0);
-temp1 = __msa_dpadd_s_h(offset, wgt, (v16i8) temp1);
+offset_in = (unsigned) ((offset_in + 1) | 1) << log2_denom;
+offset_in += (128 * (src_weight + dst_weight));
 
-temp0 >>= denom;
-temp1 >>= denom;
+src_wgt = __msa_fill_b(src_weight);
+dst_wgt = __msa_fill_b(dst_weight);
+offset = __msa_fill_h(offset_in);
+denom = __msa_fill_h(log2_denom + 1);
 
-CLIP_SH2_0_255(temp0, temp1);
-PCKEV_B2_SB(temp0, temp0, temp1, temp1, dst0, dst1);
+wgt = __msa_ilvev_b(dst_wgt, src_wgt);
 
-out0 = __msa_copy_u_w((v4i32) dst0, 0);
-out1 = __msa_copy_u_w((v4i32) dst1, 0);
-SW(out0, dst);
-dst += dst_stride;
-SW(out1, dst);
+LW4(src, stride, tp0, tp1, tp2, tp3);
+INSERT_W4_UB(tp0, tp1, tp2, tp3, src0);
+LW4(dst, stride, tp0, tp1, tp2, tp3);
+INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+XORI_B2_128_UB(src0, dst0);
+ILVRL_B2_SB(dst0, src0, vec0, vec1);
+tmp0 = __msa_dpadd_s_h(offset, wgt, vec0);
+tmp1 = __msa_dpadd_s_h(offset, wgt, vec1);
+tmp0 >>= denom;
+tmp1 >>= denom;
+CLIP_SH2_0_255(tmp0, tmp1);
+dst0 = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
 }
 
-static void avc_biwgt_4x4multiple_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
-  int32_t height, int32_t log2_denom,
-  int32_t src_weight, int32_t dst_weight,
-  int32_t offset_in)
+static void avc_biwgt_4x8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
+  int32_t log2_denom, int32_t src_weight,
+  int32_t dst_weight, int32_t offset_in)
 {
-uint8_t cnt;
-uint32_t load0, load1, load2, load3;
-v16i8 src_wgt, dst_wgt, wgt;
-v16i8 src0, src1, src2, src3;
-v16i8 dst0, dst1, 

[FFmpeg-devel] [V3 1/4] ffmpeg: remove hwaccel_lax_profile_check opt.

2017-10-09 Thread Jun Zhao
V3: Remove hwaccel_lax_profile_check opt, and add new pre-stream
hwaccel_flags option
From 2b1585fd6e6e68c81761ace0a8503385067086e0 Mon Sep 17 00:00:00 2001
From: Jun Zhao 
Date: Mon, 9 Oct 2017 02:13:14 -0400
Subject: [V3 1/4] ffmpeg: remove hwaccel_lax_profile_check opt.

remove hwaccel_lax_profile_check, will use pre-stream hwaccel_flags
option.

Signed-off-by: Jun Zhao 
---
 fftools/ffmpeg.h | 1 -
 fftools/ffmpeg_opt.c | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index f6c76bcc55..888f77223a 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -624,7 +624,6 @@ extern const AVIOInterruptCB int_cb;
 
 extern const OptionDef options[];
 extern const HWAccel hwaccels[];
-extern int hwaccel_lax_profile_check;
 extern AVBufferRef *hw_device_ctx;
 #if CONFIG_QSV
 extern char *qsv_device;
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index 100fa76e46..500920326b 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -100,7 +100,6 @@ const HWAccel hwaccels[] = {
 #endif
 { 0 },
 };
-int hwaccel_lax_profile_check = 0;
 AVBufferRef *hw_device_ctx;
 HWDevice *filter_hw_device;
 
@@ -3640,8 +3639,6 @@ const OptionDef options[] = {
 { "autorotate",   HAS_ARG | OPT_BOOL | OPT_SPEC |
   OPT_EXPERT | OPT_INPUT,  
  { .off = OFFSET(autorotate) },
 "automatically insert correct rotate filters" },
-{ "hwaccel_lax_profile_check", OPT_BOOL | OPT_EXPERT,  
  { &hwaccel_lax_profile_check},
-"attempt to decode anyway if HW accelerated decoder's supported 
profiles do not exactly match the stream" },
 
 /* audio options */
 { "aframes",OPT_AUDIO | HAS_ARG  | OPT_PERFILE | OPT_OUTPUT,   
{ .func_arg = opt_audio_frames },
-- 
2.11.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [V3 3/4] doc/codecs: document the -hwaccel_flags option.

2017-10-09 Thread Jun Zhao

From 2c5609811a1bad27fed802c674c9fb095edbb59b Mon Sep 17 00:00:00 2001
From: Jun Zhao 
Date: Mon, 9 Oct 2017 02:41:37 -0400
Subject: [V3 3/4] doc/codecs: document the -hwaccel_flags option.

Signed-off-by: Jun Zhao 
---
 doc/codecs.texi | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/doc/codecs.texi b/doc/codecs.texi
index 40f64fe4c8..2d23a11b7f 100644
--- a/doc/codecs.texi
+++ b/doc/codecs.texi
@@ -1279,6 +1279,20 @@ ffprobe -dump_separator "
 Maximum number of pixels per image. This value can be used to avoid out of
 memory failures due to large images.
 
+@item  -hwaccel_flags  @var{flags} (@emph{decoding,video})
+Possible values:
+@table @samp
+@item ignore_level
+ignore level even if the codec level used is unknown or higher than the maximum
+supported level reported by the hardware driver
+@item allow_high_depth
+allow to output YUV pixel formats with a different chroma sampling than 4:2:0
+and/or other than 8 bits per component
+@item allow_profile_mismatch
+attempt to decode anyway if HW accelerated decoder's supported profiles do not
+exactly match the stream
+@end table
+
 @item apply_cropping @var{bool} (@emph{decoding,video})
 Enable cropping if cropping parameters are multiples of the required
 alignment for the left and top parameters. If the alignment is not met the
-- 
2.11.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [V3 2/4] lavc: enable hwaccel_flags option

2017-10-09 Thread Jun Zhao

From ba5f522929f5232132121f339c288fb6464fdd39 Mon Sep 17 00:00:00 2001
From: Jun Zhao 
Date: Mon, 9 Oct 2017 02:22:38 -0400
Subject: [V3 2/4] lavc: enable hwaccel_flags option

enable pre-stream hwaccel_flags.

Signed-off-by: Jun Zhao 
---
 libavcodec/options_table.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 12712fb541..2ac37c3ff1 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -576,6 +576,10 @@ static const AVOption avcodec_options[] = {
 {"pixel_format", "set pixel format", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, 
{.i64=AV_PIX_FMT_NONE}, -1, INT_MAX, 0 },
 {"video_size", "set video size", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, 
{.str=NULL}, 0, INT_MAX, 0 },
 {"max_pixels", "Maximum number of pixels", OFFSET(max_pixels), 
AV_OPT_TYPE_INT64, {.i64 = INT_MAX }, 0, INT_MAX, A|V|S|D|E },
+{"hwaccel_flags", NULL, OFFSET(hwaccel_flags), AV_OPT_TYPE_FLAGS, {.i64 = 
AV_HWACCEL_FLAG_IGNORE_LEVEL }, 0, UINT_MAX, V|D, "hwaccel_flags"},
+{"ignore_level", "ignore level even if the codec level used is unknown or 
higher than the maximum supported level reported by the hardware driver", 0, 
AV_OPT_TYPE_CONST, { .i64 = AV_HWACCEL_FLAG_IGNORE_LEVEL }, INT_MIN, INT_MAX, V 
| D, "hwaccel_flags" },
+{"allow_high_depth", "allow to output YUV pixel formats with a different 
chroma sampling than 4:2:0 and/or other than 8 bits per component", 0, 
AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_HIGH_DEPTH }, INT_MIN, 
INT_MAX, V | D, "hwaccel_flags"},
+{"allow_profile_mismatch", "attempt to decode anyway if HW accelerated 
decoder's supported profiles do not exactly match the stream", 0, 
AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH }, INT_MIN, 
INT_MAX, V | D, "hwaccel_flags"},
 {NULL},
 };
 
-- 
2.11.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [V3 4/4] lavc/vaapi_decode: fix profile search when disable exact profile match.

2017-10-09 Thread Jun Zhao

From ef75f07943ff51c63bf7735a90c38a11559cad33 Mon Sep 17 00:00:00 2001
From: Jun Zhao 
Date: Thu, 21 Sep 2017 02:44:42 -0400
Subject: [V3 4/4] lavc/vaapi_decode: fix profile search when disable exact
 profile match.

when disable exact profile, use the alt_profile for VAAPI HWAccel
decoder.

Signed-off-by: Jun Zhao 
---
 libavcodec/vaapi_decode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
index 5a555b2bd3..27ef33837c 100644
--- a/libavcodec/vaapi_decode.c
+++ b/libavcodec/vaapi_decode.c
@@ -281,7 +281,7 @@ static int vaapi_decode_make_config(AVCodecContext *avctx)
 VAStatus vas;
 int err, i, j;
 const AVCodecDescriptor *codec_desc;
-VAProfile profile, *profile_list = NULL;
+VAProfile profile, va_profile, *profile_list = NULL;
 int profile_count, exact_match, alt_profile;
 const AVPixFmtDescriptor *sw_desc, *desc;
 
@@ -328,6 +328,7 @@ static int vaapi_decode_make_config(AVCodecContext *avctx)
 if (exact_match)
 break;
 alt_profile = vaapi_profile_map[i].codec_profile;
+va_profile = vaapi_profile_map[i].va_profile;
 }
 }
 av_freep(&profile_list);
@@ -347,6 +348,7 @@ static int vaapi_decode_make_config(AVCodecContext *avctx)
 av_log(avctx, AV_LOG_WARNING, "Using possibly-"
"incompatible profile %d instead.\n",
alt_profile);
+profile = va_profile;
 } else {
 av_log(avctx, AV_LOG_VERBOSE, "Codec %s profile %d not "
"supported for hardware decode.\n",
-- 
2.11.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/mips: preload data in hevc sao edge 135 degree filter msa functions

2017-10-09 Thread Manojkumar Bhosale
LGTM

-Original Message-
From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf Of 
kaustubh.ra...@imgtec.com
Sent: Monday, October 9, 2017 11:31 AM
To: ffmpeg-devel@ffmpeg.org
Cc: Kaustubh Raste
Subject: [FFmpeg-devel] [PATCH] avcodec/mips: preload data in hevc sao edge 135 
degree filter msa functions

From: Kaustubh Raste 

Signed-off-by: Kaustubh Raste 
---
 libavcodec/mips/hevc_lpf_sao_msa.c |  194 
 1 file changed, 132 insertions(+), 62 deletions(-)

diff --git a/libavcodec/mips/hevc_lpf_sao_msa.c 
b/libavcodec/mips/hevc_lpf_sao_msa.c
index c192265..5b5537a 100644
--- a/libavcodec/mips/hevc_lpf_sao_msa.c
+++ b/libavcodec/mips/hevc_lpf_sao_msa.c
@@ -2226,23 +2226,24 @@ static void 
hevc_sao_edge_filter_135degree_4width_msa(uint8_t *dst,
   int32_t height)  {
 uint8_t *src_orig;
-int32_t h_cnt;
 uint32_t dst_val0, dst_val1;
-v8i16 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0 };
+v16i8 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 
+ };
 v16u8 const1 = (v16u8) __msa_ldi_b(1);
-v16i8 zero = { 0 };
+v16i8 offset, sao_offset = LD_SB(sao_offset_val);
 v16i8 src_zero0, src_zero1, dst0;
 v16u8 cmp_minus10, diff_minus10, cmp_minus11, diff_minus11;
 v16u8 src_minus10, src10, src_minus11, src11;
-v8i16 offset_mask0, offset_mask1, sao_offset, src00, src01;
+v8i16 offset_mask0, offset_mask1;
 
-sao_offset = LD_SH(sao_offset_val);
+sao_offset = __msa_pckev_b(sao_offset, sao_offset);
 src_orig = src - 1;
 
+/* load in advance */
 LD_UB2(src_orig - src_stride, src_stride, src_minus10, src_minus11);
+LD_UB2(src_orig + src_stride, src_stride, src10, src11);
 
-for (h_cnt = (height >> 1); h_cnt--;) {
-LD_UB2(src_orig + src_stride, src_stride, src10, src11);
+for (height -= 2; height; height -= 2) {
+src_orig += (src_stride << 1);
 
 SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
 SLDI_B2_0_UB(src_minus10, src_minus11, src_minus10, src_minus11, 2); 
@@ -2265,19 +2266,22 @@ static void 
hevc_sao_edge_filter_135degree_4width_msa(uint8_t *dst,
 offset_mask0 = (v8i16) (__msa_hadd_u_h(diff_minus10, diff_minus10) + 
2);
 offset_mask1 = (v8i16) (__msa_hadd_u_h(diff_minus11, diff_minus11) + 
2);
 
-VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask0,
-   offset_mask0, offset_mask0, offset_mask0);
-VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask1,
-   offset_mask1, offset_mask1, offset_mask1);
-ILVEV_B2_SH(src_zero0, zero, src_zero1, zero, src00, src01);
-ADD2(offset_mask0, src00, offset_mask1, src01, offset_mask0,
- offset_mask1);
-CLIP_SH2_0_255(offset_mask0, offset_mask1);
-dst0 = __msa_pckev_b((v16i8) offset_mask1, (v16i8) offset_mask0);
+offset = __msa_pckev_b((v16i8) offset_mask1, (v16i8) offset_mask0);
+dst0 = __msa_pckev_b((v16i8) src_zero1, (v16i8) src_zero0);
+
+VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset, offset, offset,
+   offset, offset);
+
+dst0 = (v16i8) __msa_xori_b((v16u8) dst0, 128);
+dst0 = __msa_adds_s_b(dst0, offset);
+dst0 = (v16i8) __msa_xori_b((v16u8) dst0, 128);
 
 src_minus10 = src10;
 src_minus11 = src11;
 
+/* load in advance */
+LD_UB2(src_orig + src_stride, src_stride, src10, src11);
+
 dst_val0 = __msa_copy_u_w((v4i32) dst0, 0);
 dst_val1 = __msa_copy_u_w((v4i32) dst0, 2);
 
@@ -2286,8 +2290,46 @@ static void 
hevc_sao_edge_filter_135degree_4width_msa(uint8_t *dst,
 SW(dst_val1, dst);
 
 dst += dst_stride;
-src_orig += (src_stride << 1);
 }
+
+SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
+SLDI_B2_0_UB(src_minus10, src_minus11, src_minus10, src_minus11, 
+ 2);
+
+ILVR_B2_UB(src10, src_minus10, src11, src_minus11, src_minus10,
+   src_minus11);
+ILVR_B2_SB(src_zero0, src_zero0, src_zero1, src_zero1, src_zero0,
+   src_zero1);
+
+cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+
+cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+
+offset_mask0 = (v8i16) (__msa_hadd_u_h(diff_minus10, diff_minus10) + 2);
+offset_mask1 = (v8i16) (__msa_hadd_u_h(diff_minus11, diff_minus11) 
+ + 2);
+
+offset = __msa_pckev_b((v16i8) offset_mask1, (v16i8) offset_mask0);
+dst0 = __msa_pckev_b((v16i8) src_zero1, (v16i8) src_zero0);
+
+VSHF_B2_SB(edge_idx, edg

[FFmpeg-devel] [PATCH] avcodec/mips: Improve avc chroma hv mc msa functions

2017-10-09 Thread kaustubh.raste
From: Kaustubh Raste 

Replace generic with block size specific function.

Signed-off-by: Kaustubh Raste 
---
 libavcodec/mips/h264chroma_msa.c |  309 --
 1 file changed, 166 insertions(+), 143 deletions(-)

diff --git a/libavcodec/mips/h264chroma_msa.c b/libavcodec/mips/h264chroma_msa.c
index 16e2fe4..b8fcf6d 100644
--- a/libavcodec/mips/h264chroma_msa.c
+++ b/libavcodec/mips/h264chroma_msa.c
@@ -526,8 +526,7 @@ static void avc_chroma_vt_8w_msa(uint8_t *src, uint8_t 
*dst, int32_t stride,
 }
 }
 
-static void avc_chroma_hv_2x2_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
+static void avc_chroma_hv_2x2_msa(uint8_t *src, uint8_t *dst, int32_t stride,
   uint32_t coef_hor0, uint32_t coef_hor1,
   uint32_t coef_ver0, uint32_t coef_ver1)
 {
@@ -544,7 +543,7 @@ static void avc_chroma_hv_2x2_msa(uint8_t *src, int32_t 
src_stride,
 
 mask = LD_SB(&chroma_mask_arr[48]);
 
-LD_UB3(src, src_stride, src0, src1, src2);
+LD_UB3(src, stride, src0, src1, src2);
 VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
 DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
 MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
@@ -558,12 +557,11 @@ static void avc_chroma_hv_2x2_msa(uint8_t *src, int32_t 
src_stride,
 out1 = __msa_copy_u_h(res_vert, 1);
 
 SH(out0, dst);
-dst += dst_stride;
+dst += stride;
 SH(out1, dst);
 }
 
-static void avc_chroma_hv_2x4_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
+static void avc_chroma_hv_2x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
   uint32_t coef_hor0, uint32_t coef_hor1,
   uint32_t coef_ver0, uint32_t coef_ver1)
 {
@@ -580,7 +578,7 @@ static void avc_chroma_hv_2x4_msa(uint8_t *src, int32_t 
src_stride,
 
 mask = LD_SB(&chroma_mask_arr[48]);
 
-LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+LD_UB5(src, stride, src0, src1, src2, src3, src4);
 
 VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
 VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3);
@@ -591,83 +589,27 @@ static void avc_chroma_hv_2x4_msa(uint8_t *src, int32_t 
src_stride,
 res_vt0 += res_vt1;
 res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
 res_vt0 = __msa_sat_u_h(res_vt0, 7);
-res = (v8i16) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
-
-ST2x4_UB(res, 0, dst, dst_stride);
-}
-
-static void avc_chroma_hv_2x8_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
-  uint32_t coef_hor0, uint32_t coef_hor1,
-  uint32_t coef_ver0, uint32_t coef_ver1)
-{
-v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-v16u8 tmp0, tmp1, tmp2, tmp3;
-v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
-v8i16 res;
-v16i8 mask;
-v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
-v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
-v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
-v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
-v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
-
-mask = LD_SB(&chroma_mask_arr[48]);
-
-LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
-src += (5 * src_stride);
-LD_UB4(src, src_stride, src5, src6, src7, src8);
-
-VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
-VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3);
-ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
-VSHF_B2_UB(src4, src5, src6, src7, mask, mask, tmp0, tmp1);
-VSHF_B2_UB(src5, src6, src7, src8, mask, mask, tmp2, tmp3);
-ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src4, src5);
-DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
-MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
-
-res_vt0 += res_vt1;
-res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
-res_vt0 = __msa_sat_u_h(res_vt0, 7);
 
 res = (v8i16) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
 
-ST2x4_UB(res, 0, dst, dst_stride);
-dst += (4 * dst_stride);
-
-DOTP_UB2_UH(src4, src5, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
-MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
-
-res_vt0 += res_vt1;
-res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
-res_vt0 = __msa_sat_u_h(res_vt0, 7);
-
-res = (v8i16) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
-
-ST2x4_UB(res, 0, dst, dst_stride);
+ST2x4_UB(res, 0, dst, stride);
 }
 
-static void avc_chroma_hv_2w_msa(uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
+static void 

Re: [FFmpeg-devel] [PATCH] avcodec/mips: Improve avc bi-weighted mc msa functions

2017-10-09 Thread Manojkumar Bhosale
LGTM

-Original Message-
From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf Of 
kaustubh.ra...@imgtec.com
Sent: Monday, October 9, 2017 12:49 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Kaustubh Raste
Subject: [FFmpeg-devel] [PATCH] avcodec/mips: Improve avc bi-weighted mc msa 
functions

From: Kaustubh Raste 

Replace generic with block size specific function.

Signed-off-by: Kaustubh Raste 
---
 libavcodec/mips/h264dsp_msa.c   |  469 +++
 libavutil/mips/generic_macros_msa.h |4 +
 2 files changed, 311 insertions(+), 162 deletions(-)

diff --git a/libavcodec/mips/h264dsp_msa.c b/libavcodec/mips/h264dsp_msa.c 
index 5b06bd9..e50f5ca 100644
--- a/libavcodec/mips/h264dsp_msa.c
+++ b/libavcodec/mips/h264dsp_msa.c
@@ -223,217 +223,242 @@ static void avc_wgt_8x16_msa(uint8_t *data, int32_t 
stride, int32_t log2_denom,
 }
 }
 
-static void avc_biwgt_4x2_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
+static void avc_biwgt_4x2_msa(uint8_t *src, uint8_t *dst, int32_t 
+stride,
   int32_t log2_denom, int32_t src_weight,
   int32_t dst_weight, int32_t offset_in)  {
-uint32_t load0, load1, out0, out1;
-v16i8 src_wgt, dst_wgt, wgt;
-v16i8 src0, src1, dst0, dst1;
-v8i16 temp0, temp1, denom, offset, add_val;
-int32_t val = 128 * (src_weight + dst_weight);
+uint32_t tp0, tp1;
+v16i8 src_wgt, dst_wgt, wgt, vec0;
+v16u8 src0 = { 0 }, dst0 = { 0 };
+v8i16 tmp0, denom, offset, max255 = __msa_ldi_h(255);
 
-offset_in = ((offset_in + 1) | 1) << log2_denom;
+offset_in = (unsigned) ((offset_in + 1) | 1) << log2_denom;
+offset_in += (128 * (src_weight + dst_weight));
 
 src_wgt = __msa_fill_b(src_weight);
 dst_wgt = __msa_fill_b(dst_weight);
 offset = __msa_fill_h(offset_in);
 denom = __msa_fill_h(log2_denom + 1);
-add_val = __msa_fill_h(val);
-offset += add_val;
 
 wgt = __msa_ilvev_b(dst_wgt, src_wgt);
 
-load0 = LW(src);
-src += src_stride;
-load1 = LW(src);
-
-src0 = (v16i8) __msa_fill_w(load0);
-src1 = (v16i8) __msa_fill_w(load1);
-
-load0 = LW(dst);
-load1 = LW(dst + dst_stride);
-
-dst0 = (v16i8) __msa_fill_w(load0);
-dst1 = (v16i8) __msa_fill_w(load1);
+LW2(src, stride, tp0, tp1);
+INSERT_W2_UB(tp0, tp1, src0);
+LW2(dst, stride, tp0, tp1);
+INSERT_W2_UB(tp0, tp1, dst0);
+XORI_B2_128_UB(src0, dst0);
+vec0 = (v16i8) __msa_ilvr_b((v16i8) dst0, (v16i8) src0);
+tmp0 = __msa_dpadd_s_h(offset, wgt, vec0);
+tmp0 >>= denom;
+tmp0 = __msa_maxi_s_h(tmp0, 0);
+tmp0 = __msa_min_s_h(max255, tmp0);
+dst0 = (v16u8) __msa_pckev_b((v16i8) tmp0, (v16i8) tmp0);
+ST4x2_UB(dst0, dst, stride);
+}
 
-XORI_B4_128_SB(src0, src1, dst0, dst1);
-ILVR_B2_SH(dst0, src0, dst1, src1, temp0, temp1);
+static void avc_biwgt_4x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
+  int32_t log2_denom, int32_t src_weight,
+  int32_t dst_weight, int32_t offset_in) {
+uint32_t tp0, tp1, tp2, tp3;
+v16i8 src_wgt, dst_wgt, wgt, vec0, vec1;
+v16u8 src0, dst0;
+v8i16 tmp0, tmp1, denom, offset;
 
-temp0 = __msa_dpadd_s_h(offset, wgt, (v16i8) temp0);
-temp1 = __msa_dpadd_s_h(offset, wgt, (v16i8) temp1);
+offset_in = (unsigned) ((offset_in + 1) | 1) << log2_denom;
+offset_in += (128 * (src_weight + dst_weight));
 
-temp0 >>= denom;
-temp1 >>= denom;
+src_wgt = __msa_fill_b(src_weight);
+dst_wgt = __msa_fill_b(dst_weight);
+offset = __msa_fill_h(offset_in);
+denom = __msa_fill_h(log2_denom + 1);
 
-CLIP_SH2_0_255(temp0, temp1);
-PCKEV_B2_SB(temp0, temp0, temp1, temp1, dst0, dst1);
+wgt = __msa_ilvev_b(dst_wgt, src_wgt);
 
-out0 = __msa_copy_u_w((v4i32) dst0, 0);
-out1 = __msa_copy_u_w((v4i32) dst1, 0);
-SW(out0, dst);
-dst += dst_stride;
-SW(out1, dst);
+LW4(src, stride, tp0, tp1, tp2, tp3);
+INSERT_W4_UB(tp0, tp1, tp2, tp3, src0);
+LW4(dst, stride, tp0, tp1, tp2, tp3);
+INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+XORI_B2_128_UB(src0, dst0);
+ILVRL_B2_SB(dst0, src0, vec0, vec1);
+tmp0 = __msa_dpadd_s_h(offset, wgt, vec0);
+tmp1 = __msa_dpadd_s_h(offset, wgt, vec1);
+tmp0 >>= denom;
+tmp1 >>= denom;
+CLIP_SH2_0_255(tmp0, tmp1);
+dst0 = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
 }
 
-static void avc_biwgt_4x4multiple_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
-  int32_t height, int32_t log2_denom,
-  int32_t src_weight, int32_t dst_weight,
-  int32_t offset_in)
+static void avc_biwgt_4x8_msa(uint8_t *src, uint8_t *dst, in

Re: [FFmpeg-devel] [PATCH] avcodec/mips: Improve avc chroma hv mc msa functions

2017-10-09 Thread Manojkumar Bhosale
LGTM

-Original Message-
From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf Of 
kaustubh.ra...@imgtec.com
Sent: Monday, October 9, 2017 2:16 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Kaustubh Raste
Subject: [FFmpeg-devel] [PATCH] avcodec/mips: Improve avc chroma hv mc msa 
functions

From: Kaustubh Raste 

Replace generic with block size specific function.

Signed-off-by: Kaustubh Raste 
---
 libavcodec/mips/h264chroma_msa.c |  309 --
 1 file changed, 166 insertions(+), 143 deletions(-)

diff --git a/libavcodec/mips/h264chroma_msa.c b/libavcodec/mips/h264chroma_msa.c
index 16e2fe4..b8fcf6d 100644
--- a/libavcodec/mips/h264chroma_msa.c
+++ b/libavcodec/mips/h264chroma_msa.c
@@ -526,8 +526,7 @@ static void avc_chroma_vt_8w_msa(uint8_t *src, uint8_t 
*dst, int32_t stride,
 }
 }
 
-static void avc_chroma_hv_2x2_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
+static void avc_chroma_hv_2x2_msa(uint8_t *src, uint8_t *dst, int32_t 
+stride,
   uint32_t coef_hor0, uint32_t coef_hor1,
   uint32_t coef_ver0, uint32_t coef_ver1)  { 
@@ -544,7 +543,7 @@ static void avc_chroma_hv_2x2_msa(uint8_t *src, int32_t 
src_stride,
 
 mask = LD_SB(&chroma_mask_arr[48]);
 
-LD_UB3(src, src_stride, src0, src1, src2);
+LD_UB3(src, stride, src0, src1, src2);
 VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
 DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
 MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1); @@ 
-558,12 +557,11 @@ static void avc_chroma_hv_2x2_msa(uint8_t *src, int32_t 
src_stride,
 out1 = __msa_copy_u_h(res_vert, 1);
 
 SH(out0, dst);
-dst += dst_stride;
+dst += stride;
 SH(out1, dst);
 }
 
-static void avc_chroma_hv_2x4_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
+static void avc_chroma_hv_2x4_msa(uint8_t *src, uint8_t *dst, int32_t 
+stride,
   uint32_t coef_hor0, uint32_t coef_hor1,
   uint32_t coef_ver0, uint32_t coef_ver1)  { 
@@ -580,7 +578,7 @@ static void avc_chroma_hv_2x4_msa(uint8_t *src, int32_t 
src_stride,
 
 mask = LD_SB(&chroma_mask_arr[48]);
 
-LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+LD_UB5(src, stride, src0, src1, src2, src3, src4);
 
 VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
 VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3); @@ -591,83 
+589,27 @@ static void avc_chroma_hv_2x4_msa(uint8_t *src, int32_t src_stride,
 res_vt0 += res_vt1;
 res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
 res_vt0 = __msa_sat_u_h(res_vt0, 7);
-res = (v8i16) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
-
-ST2x4_UB(res, 0, dst, dst_stride);
-}
-
-static void avc_chroma_hv_2x8_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
-  uint32_t coef_hor0, uint32_t coef_hor1,
-  uint32_t coef_ver0, uint32_t coef_ver1)
-{
-v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-v16u8 tmp0, tmp1, tmp2, tmp3;
-v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
-v8i16 res;
-v16i8 mask;
-v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
-v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
-v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
-v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
-v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
-
-mask = LD_SB(&chroma_mask_arr[48]);
-
-LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
-src += (5 * src_stride);
-LD_UB4(src, src_stride, src5, src6, src7, src8);
-
-VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
-VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3);
-ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
-VSHF_B2_UB(src4, src5, src6, src7, mask, mask, tmp0, tmp1);
-VSHF_B2_UB(src5, src6, src7, src8, mask, mask, tmp2, tmp3);
-ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src4, src5);
-DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
-MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
-
-res_vt0 += res_vt1;
-res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
-res_vt0 = __msa_sat_u_h(res_vt0, 7);
 
 res = (v8i16) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
 
-ST2x4_UB(res, 0, dst, dst_stride);
-dst += (4 * dst_stride);
-
-DOTP_UB2_UH(src4, src5, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
-MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
-
-res_vt0 += res_vt1;
-res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
-res_vt0 = __msa_sat_u_h(res

[FFmpeg-devel] [PATCH] avcodec/ffv1dec: Fix out of array read in slice counting

2017-10-09 Thread Michael Niedermayer
Fixes: test-201710.mp4

Found-by: 连一汉  and Zhibin Hu
Signed-off-by: Michael Niedermayer 
---
 libavcodec/ffv1dec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index d2bfee784f..5eadb6b158 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -717,7 +717,7 @@ static int read_header(FFV1Context *f)
 } else {
 const uint8_t *p = c->bytestream_end;
 for (f->slice_count = 0;
- f->slice_count < MAX_SLICES && 3 < p - c->bytestream_start;
+ f->slice_count < MAX_SLICES && 3 + 5*!!f->ec < p - 
c->bytestream_start;
  f->slice_count++) {
 int trailer = 3 + 5*!!f->ec;
 int size = AV_RB24(p-trailer);
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix visual glitch with XvMC, caused by wrong idct permutation.

2017-10-09 Thread Ivan Kalvachev
On 10/9/17, Ronald S. Bultje  wrote:
> Hi,
>
> On Sun, Oct 8, 2017 at 6:52 PM, Ivan Kalvachev  wrote:
> [..]
>
> Indentation is off in the second hunk, can you fix that?

You want it 4 spaces to the right
or to start from the first position?

BTW, I think it would be better to use "127" number.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/6] vaapi: Remove H.264 baseline profile

2017-10-09 Thread Moritz Barsnick
On Sun, Oct 08, 2017 at 16:49:58 +0100, Mark Thompson wrote:
>  switch (avctx->profile) {
> +case FF_PROFILE_H264_BASELINE:
> +av_log(avctx, AV_LOG_WARNING, "H.264 baseline profile is not "
> +   "supported, using constrained baseline profile instead.\n");
> +avctx->profile = FF_PROFILE_H264_CONSTRAINED_BASELINE;
>  case FF_PROFILE_H264_CONSTRAINED_BASELINE:

I recall a discussion that linting/analysis tools such as Coverty
require a fall-through to be marked as such.

> /* fall-through */
> case FF_PROFILE_H264_CONSTRAINED_BASELINE:

Cheers,
Moritz
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] Mjpeg to RTP without re-encoding

2017-10-09 Thread Louis-Paul CORDIER

Hi everybody,

I'm currently using ffmpeg API to send video stream through RTP. The codec used 
in the RTP protocol is MJPEG. Below is my workflow:

+--+   +--+   +--+   
+--+
| Camera Frame (JPEG)  +-->+  JPEG to RGB +-->+ RGB to MJPEG encoder +-->+ RTP 
Format container |
+--+   +--+   +--+   
+--+

Actually I'm instantiating an MJPEG encoder that encodes the frame into a packet, then 
provide this packet to the "format container" for transmitting though the 
network.

The video frames I want to send are already in JPEG format, so I'm wondering if 
it is possible to avoid converting from JPEG to RGB, and reencoding using 
FFmpeg MJPEG encoder.
To my point of view, the ideal would be creating an AvPacket by hand and fill 
the buffer with my camera frame JPEG directly.

+--+   ++   +--+
| Camera Frame (MJPEG) +-->+ Writing to Packet  +-->+ RTP Format container |
+--+   ++   +--+

Any clues?

Thanks!



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/6] vaapi: Remove H.264 baseline profile

2017-10-09 Thread wm4
On Sun, 8 Oct 2017 16:49:58 +0100
Mark Thompson  wrote:

> This has been deprecated in libva2 because hardware does not and will not
> support it.  Therefore never consider it for decode, and for encode assume
> the user meant constrained baseline profile instead.
> ---
> On 08/10/17 16:44, Derek Buitenhuis wrote:
> > On 10/8/2017 4:11 PM, Mark Thompson wrote:  
> >> +case FF_PROFILE_H264_BASELINE:
> >> +// Baseline profile is not supported, assume the user meant
> >> +// constrained baseline instead.
> >> +avctx->profile = FF_PROFILE_H264_CONSTRAINED_BASELINE;  
> > 
> > Trying to automatically (and silently!) guess what the user wanted
> > is never a good idea, IMO. At the very least, print a warning.  
> 
> Yeah, ok, I agree.  Patch changed as enclosing.
> 
> 
>  libavcodec/vaapi_decode.c  |  1 -
>  libavcodec/vaapi_encode_h264.c | 12 
>  2 files changed, 4 insertions(+), 9 deletions(-)
> 
> diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
> index cf58aae4c6..4f0ff84e01 100644
> --- a/libavcodec/vaapi_decode.c
> +++ b/libavcodec/vaapi_decode.c
> @@ -246,7 +246,6 @@ static const struct {
>  MAP(MPEG4,   MPEG4_MAIN,  MPEG4Main   ),
>  MAP(H264,H264_CONSTRAINED_BASELINE,
> H264ConstrainedBaseline),
> -MAP(H264,H264_BASELINE,   H264Baseline),
>  MAP(H264,H264_MAIN,   H264Main),
>  MAP(H264,H264_HIGH,   H264High),
>  #if VA_CHECK_VERSION(0, 37, 0)
> diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
> index 549867ef3f..efde80b08e 100644
> --- a/libavcodec/vaapi_encode_h264.c
> +++ b/libavcodec/vaapi_encode_h264.c
> @@ -1175,6 +1175,10 @@ static av_cold int 
> vaapi_encode_h264_init(AVCodecContext *avctx)
>  ctx->codec = &vaapi_encode_type_h264;
>  
>  switch (avctx->profile) {
> +case FF_PROFILE_H264_BASELINE:
> +av_log(avctx, AV_LOG_WARNING, "H.264 baseline profile is not "
> +   "supported, using constrained baseline profile instead.\n");
> +avctx->profile = FF_PROFILE_H264_CONSTRAINED_BASELINE;
>  case FF_PROFILE_H264_CONSTRAINED_BASELINE:
>  ctx->va_profile = VAProfileH264ConstrainedBaseline;
>  if (avctx->max_b_frames != 0) {
> @@ -1183,14 +1187,6 @@ static av_cold int 
> vaapi_encode_h264_init(AVCodecContext *avctx)
> "doesn't support encoding with B frames, disabling 
> them.\n");
>  }
>  break;
> -case FF_PROFILE_H264_BASELINE:
> -ctx->va_profile = VAProfileH264Baseline;
> -if (avctx->max_b_frames != 0) {
> -avctx->max_b_frames = 0;
> -av_log(avctx, AV_LOG_WARNING, "H.264 baseline profile "
> -   "doesn't support encoding with B frames, disabling 
> them.\n");
> -}
> -break;
>  case FF_PROFILE_H264_MAIN:
>  ctx->va_profile = VAProfileH264Main;
>  break;

Shouldn't trying to decode baseline video just fall back to sw decoding?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] FFmpeg 3.4

2017-10-09 Thread wm4
On Sun, 8 Oct 2017 13:53:13 +0200
Michael Niedermayer  wrote:

> On Sat, Oct 07, 2017 at 12:06:23AM +0200, wm4 wrote:
> > On Fri, 6 Oct 2017 16:53:17 +0200
> > Michael Niedermayer  wrote:
> >   
> > > Hi all
> > > 
> > > if there are no objections i will branch release/3.4 in the next days
> > > and make the 3.4 release a few days after that
> > > 
> > > If people prefer a specific name, suggest one now, otherwise i will
> > > pick a random one from past suggestions
> > > 
> > > If there are features you want in, please push them to
> > > master before the release is branched
> > > if there are bug fixes you want in please ensure they end in
> > > release/3.4 (eiter via master before branching or backport after)  
> > 
> > I want hardware decoding things in that release (frame pool info,
> > cuvid). I vote the release until this has happened.  
> 
> Iam not sure what you mean by "I vote the release ..."
> please clarify

"I vote to delay the release"

> But i do not think that delaying the release further is what most
> developers want. In case thats what you suggest.
> The release is already several months behind shedule
> 
> Releases should be done "early and often", waiting for "in development"
> features will delay a release forever, theres always another feature
> on the horizon that someone wants in.
> 
> Of course if the majority wants me to wait with the release, its easy
> to wait for as long as people want me to wait ...
> 
> Thanks
> 
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> Those who are best at talking, realize last or never when they are wrong.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 6/6] hwcontext_vaapi: Add support for mapping to DRM objects

2017-10-09 Thread wm4
On Sun, 8 Oct 2017 17:13:24 +0100
Derek Buitenhuis  wrote:

> On 10/8/2017 5:11 PM, Mark Thompson wrote:
> > This is just how hardware surfaces are stored in AVFrames - they have their 
> > own API-specific handles in the data[] pointers because that's the only 
> > place to put them.
> > 
> > See
> > 
> > and
> > 
> > (and others).
> > 
> > Thanks,  
> 
> Eugh, well OK. That's arguably not exactly OK by the C standard. Oh well.

How come? As long as you're strictly casting through intptr_t it should
be fine.

But yeah, personally I'd prefer having hw surfaces point to some sort
of struct instead. Would make more sense with AVBufferRef semantics as
well. But a bit too late for that.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 00/20] Coded bitstream editing (v3)

2017-10-09 Thread wm4
On Sun,  8 Oct 2017 21:01:34 +0100
Mark Thompson  wrote:

> Incorporating all review comments from last time:
> * Change all CBS users to hold a pointer rather than the whole structure.
> * Rearrange the MPEG-2 framerate stuff so that it doesn't add code and then 
> remove it in the series.
> * Add a type for the coded bitstream unit type.
> * Miscellaneous fixups.
> 

Patches 1-20 LGTM.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] build: prevent SDL2 from polluting global cflags and extralibs

2017-10-09 Thread wm4
On Sun,  8 Oct 2017 17:16:21 -0300
James Almer  wrote:

> Remove the SDL_main define from the global cflags but not from the
> ffplay cflags, and the -mwindows linker option from extralibs instead
> of overriding it with the addition of -mconsole.
> 
> Signed-off-by: James Almer 
> ---
>  configure   | 4 ++--
>  ffbuild/library.mak | 1 -
>  2 files changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/configure b/configure
> index 77c9a18c3c..0fdf4bcbe3 100755
> --- a/configure
> +++ b/configure
> @@ -6107,10 +6107,10 @@ if enabled sdl2; then
>  enable sdl2
>  fi
>  if test $target_os = "mingw32"; then
> -sdl2_extralibs="$sdl2_extralibs -mconsole"
> +sdl2_extralibs=$(filter_out '-mwindows' $sdl2_extralibs)
>  fi
>  fi
> -enabled sdl2 && add_cflags $sdl2_cflags && add_extralibs $sdl2_extralibs
> +enabled sdl2 && add_cflags $(filter_out '-Dmain=SDL_main' $sdl2_cflags) && 
> add_extralibs $sdl2_extralibs
>  
>  if enabled decklink; then
>  case $target_os in
> diff --git a/ffbuild/library.mak b/ffbuild/library.mak
> index ee19c3c797..4191edcf9c 100644
> --- a/ffbuild/library.mak
> +++ b/ffbuild/library.mak
> @@ -16,7 +16,6 @@ all-$(CONFIG_SHARED): $(SUBDIR)$(SLIBNAME) 
> $(SUBDIR)lib$(FULLNAME).pc
>  
>  LIBOBJS := $(OBJS) $(SUBDIR)%.h.o $(TESTOBJS)
>  $(LIBOBJS) $(LIBOBJS:.o=.s) $(LIBOBJS:.o=.i):   CPPFLAGS += 
> -DHAVE_AV_CONFIG_H
> -$(TESTOBJS) $(TESTOBJS:.o=.i): CFLAGS += -Umain
>  
>  $(SUBDIR)$(LIBNAME): $(OBJS)
>   $(RM) $@

LGTM, though I'd argue dropping SDL instead.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 09/20] lavc: Add h264_redundant_pps bitstream filter

2017-10-09 Thread Moritz Barsnick
On Sun, Oct 08, 2017 at 21:01:43 +0100, Mark Thompson wrote:
> This applies a specific fixup to some Bluray streams which contain
[...]
> +This applies a specific fixup to some Bluray streams which contain

Nit: Blu-ray

Moritz
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix crash if av_vdpau_bind_context() is not used.

2017-10-09 Thread wm4
On Mon, 9 Oct 2017 03:04:53 +0300
Ivan Kalvachev  wrote:

> The public functions av_alloc_vdpaucontext() and
> av_vdpau_alloc_context() are allocating AVVDPAUContext
> structure that is supposed to be placed in avctx->hwaccel_context.
> 
> However the rest of libavcodec/vdpau.c uses avctx->hwaccel_context
> as struct VDPAUHWContext, that is bigger and does contain
> AVVDPAUContext as first member.
> 
> The usage includes write to the new variables in the bigger stuct,
> without checking for block size.
> 
> Fix by always allocating the bigger structure.
> 
> BTW,
> I have no idea why the new fields haven't simply been added to the
> existing struct...
> It seems that the programmer who wrote this has been aware of the problem,
> because av_vdpau_bind_context reallocates the structure.
> 
> It might be good idea to check the other usages of this reallocation function.
> 
> Best Regards
>Ivan Kalvachev

IMO not really worth fixing at this point, because this is the old-old
vdpau API. Even av_vdpau_bind_context() (which does not require using
av_alloc_vdpaucontext()) is deprecated. Or rather should be - I just
haven't bothered deprecating it because the deprecation dance is too
messy. In any case, you shouldn't use any of those APIs - use the
generic hwaccel API instead (setting hw_frames_ctx or hw_device_ctx).
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [V3 1/4] ffmpeg: remove hwaccel_lax_profile_check opt.

2017-10-09 Thread Mark Thompson
On 09/10/17 08:49, Jun Zhao wrote:
> V3: Remove hwaccel_lax_profile_check opt, and add new pre-stream
> hwaccel_flags option
> 

> From 2b1585fd6e6e68c81761ace0a8503385067086e0 Mon Sep 17 00:00:00 2001
> From: Jun Zhao 
> Date: Mon, 9 Oct 2017 02:13:14 -0400
> Subject: [V3 1/4] ffmpeg: remove hwaccel_lax_profile_check opt.
> 
> remove hwaccel_lax_profile_check, will use pre-stream hwaccel_flags
> option.
> 
> Signed-off-by: Jun Zhao 
> ---
>  fftools/ffmpeg.h | 1 -
>  fftools/ffmpeg_opt.c | 3 ---
>  2 files changed, 4 deletions(-)
> 
> diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
> index f6c76bcc55..888f77223a 100644
> --- a/fftools/ffmpeg.h
> +++ b/fftools/ffmpeg.h
> @@ -624,7 +624,6 @@ extern const AVIOInterruptCB int_cb;
>  
>  extern const OptionDef options[];
>  extern const HWAccel hwaccels[];
> -extern int hwaccel_lax_profile_check;
>  extern AVBufferRef *hw_device_ctx;
>  #if CONFIG_QSV
>  extern char *qsv_device;
> diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
> index 100fa76e46..500920326b 100644
> --- a/fftools/ffmpeg_opt.c
> +++ b/fftools/ffmpeg_opt.c
> @@ -100,7 +100,6 @@ const HWAccel hwaccels[] = {
>  #endif
>  { 0 },
>  };
> -int hwaccel_lax_profile_check = 0;
>  AVBufferRef *hw_device_ctx;
>  HWDevice *filter_hw_device;
>  
> @@ -3640,8 +3639,6 @@ const OptionDef options[] = {
>  { "autorotate",   HAS_ARG | OPT_BOOL | OPT_SPEC |
>OPT_EXPERT | OPT_INPUT,
> { .off = OFFSET(autorotate) },
>  "automatically insert correct rotate filters" },
> -{ "hwaccel_lax_profile_check", OPT_BOOL | OPT_EXPERT,
> { &hwaccel_lax_profile_check},
> -"attempt to decode anyway if HW accelerated decoder's supported 
> profiles do not exactly match the stream" },
>  
>  /* audio options */
>  { "aframes",OPT_AUDIO | HAS_ARG  | OPT_PERFILE | OPT_OUTPUT, 
>   { .func_arg = opt_audio_frames },
> -- 
> 2.11.0
> 

Would anyone else care to comment on whether we are allowed to do this?  I 
realise the option lost any effect some time ago, but I was waiting for the 
version bump to actually remove it.

- Mark
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [V3 2/4] lavc: enable hwaccel_flags option

2017-10-09 Thread Mark Thompson
On 09/10/17 08:49, Jun Zhao wrote:
> 
> From ba5f522929f5232132121f339c288fb6464fdd39 Mon Sep 17 00:00:00 2001
> From: Jun Zhao 
> Date: Mon, 9 Oct 2017 02:22:38 -0400
> Subject: [V3 2/4] lavc: enable hwaccel_flags option
> 
> enable pre-stream hwaccel_flags.

"per-stream"

> 
> Signed-off-by: Jun Zhao 
> ---
>  libavcodec/options_table.h | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
> index 12712fb541..2ac37c3ff1 100644
> --- a/libavcodec/options_table.h
> +++ b/libavcodec/options_table.h
> @@ -576,6 +576,10 @@ static const AVOption avcodec_options[] = {
>  {"pixel_format", "set pixel format", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, 
> {.i64=AV_PIX_FMT_NONE}, -1, INT_MAX, 0 },
>  {"video_size", "set video size", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, 
> {.str=NULL}, 0, INT_MAX, 0 },
>  {"max_pixels", "Maximum number of pixels", OFFSET(max_pixels), 
> AV_OPT_TYPE_INT64, {.i64 = INT_MAX }, 0, INT_MAX, A|V|S|D|E },
> +{"hwaccel_flags", NULL, OFFSET(hwaccel_flags), AV_OPT_TYPE_FLAGS, {.i64 = 
> AV_HWACCEL_FLAG_IGNORE_LEVEL }, 0, UINT_MAX, V|D, "hwaccel_flags"},
> +{"ignore_level", "ignore level even if the codec level used is unknown or 
> higher than the maximum supported level reported by the hardware driver", 0, 
> AV_OPT_TYPE_CONST, { .i64 = AV_HWACCEL_FLAG_IGNORE_LEVEL }, INT_MIN, INT_MAX, 
> V | D, "hwaccel_flags" },
> +{"allow_high_depth", "allow to output YUV pixel formats with a different 
> chroma sampling than 4:2:0 and/or other than 8 bits per component", 0, 
> AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_HIGH_DEPTH }, INT_MIN, 
> INT_MAX, V | D, "hwaccel_flags"},
> +{"allow_profile_mismatch", "attempt to decode anyway if HW accelerated 
> decoder's supported profiles do not exactly match the stream", 0, 
> AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH }, INT_MIN, 
> INT_MAX, V | D, "hwaccel_flags"},
>  {NULL},
>  };
>  
> -- 
> 2.11.0
> 

LGTM, will push later if noone else has any comments.

- Mark
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [V3 3/4] doc/codecs: document the -hwaccel_flags option.

2017-10-09 Thread Mark Thompson
On 09/10/17 08:50, Jun Zhao wrote:
> 
> From 2c5609811a1bad27fed802c674c9fb095edbb59b Mon Sep 17 00:00:00 2001
> From: Jun Zhao 
> Date: Mon, 9 Oct 2017 02:41:37 -0400
> Subject: [V3 3/4] doc/codecs: document the -hwaccel_flags option.
> 
> Signed-off-by: Jun Zhao 
> ---
>  doc/codecs.texi | 14 ++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/doc/codecs.texi b/doc/codecs.texi
> index 40f64fe4c8..2d23a11b7f 100644
> --- a/doc/codecs.texi
> +++ b/doc/codecs.texi
> @@ -1279,6 +1279,20 @@ ffprobe -dump_separator "
>  Maximum number of pixels per image. This value can be used to avoid out of
>  memory failures due to large images.
>  
> +@item  -hwaccel_flags  @var{flags} (@emph{decoding,video})
> +Possible values:
> +@table @samp
> +@item ignore_level
> +ignore level even if the codec level used is unknown or higher than the 
> maximum
> +supported level reported by the hardware driver
> +@item allow_high_depth
> +allow to output YUV pixel formats with a different chroma sampling than 4:2:0
> +and/or other than 8 bits per component
> +@item allow_profile_mismatch
> +attempt to decode anyway if HW accelerated decoder's supported profiles do 
> not
> +exactly match the stream
> +@end table

These should be whole sentences like the rest of the file, rather than just the 
summary fragments from the options.

A specific example for the allow-profile-mismatch case would be nice, too.  
(Unlike the other two it's not very obvious what the use of it is.)

> +
>  @item apply_cropping @var{bool} (@emph{decoding,video})
>  Enable cropping if cropping parameters are multiples of the required
>  alignment for the left and top parameters. If the alignment is not met the
> -- 
> 2.11.0
> 
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Mjpeg to RTP without re-encoding

2017-10-09 Thread Moritz Barsnick
On Mon, Oct 09, 2017 at 13:18:51 +0200, Louis-Paul CORDIER wrote:
> I'm currently using ffmpeg API to send video stream through RTP. The codec 
> used in the RTP protocol is MJPEG. Below is my workflow:

For questions regarding the use of the libav* libraries, please contact
the libav-user mailing list, as described here:
https://www.ffmpeg.org/contact.html

ffmpeg-devel, the list you have contacted, is strictly for the
development *of* ffmpeg and its libraries.

That said:
> The video frames I want to send are already in JPEG format, so I'm wondering 
> if it is possible to avoid converting from JPEG to RGB, and reencoding using 
> FFmpeg MJPEG encoder.
> To my point of view, the ideal would be creating an AvPacket by hand and fill 
> the buffer with my camera frame JPEG directly.

ffmpeg, the command line tool, is capable of doing that without
reencoding, so it should absolutely be possible through AvPacket.

$ ffmpeg -i %02d.jpg -c copy mjpeg.mkv

Cheers,
Moritz
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [V3 4/4] lavc/vaapi_decode: fix profile search when disable exact profile match.

2017-10-09 Thread Mark Thompson
On 09/10/17 08:50, Jun Zhao wrote:
> 
> From ef75f07943ff51c63bf7735a90c38a11559cad33 Mon Sep 17 00:00:00 2001
> From: Jun Zhao 
> Date: Thu, 21 Sep 2017 02:44:42 -0400
> Subject: [V3 4/4] lavc/vaapi_decode: fix profile search when disable exact
>  profile match.
> 
> when disable exact profile, use the alt_profile for VAAPI HWAccel
> decoder.
> 
> Signed-off-by: Jun Zhao 
> ---
>  libavcodec/vaapi_decode.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
> index 5a555b2bd3..27ef33837c 100644
> --- a/libavcodec/vaapi_decode.c
> +++ b/libavcodec/vaapi_decode.c
> @@ -281,7 +281,7 @@ static int vaapi_decode_make_config(AVCodecContext *avctx)
>  VAStatus vas;
>  int err, i, j;
>  const AVCodecDescriptor *codec_desc;
> -VAProfile profile, *profile_list = NULL;
> +VAProfile profile, va_profile, *profile_list = NULL;
>  int profile_count, exact_match, alt_profile;
>  const AVPixFmtDescriptor *sw_desc, *desc;
>  
> @@ -328,6 +328,7 @@ static int vaapi_decode_make_config(AVCodecContext *avctx)
>  if (exact_match)
>  break;
>  alt_profile = vaapi_profile_map[i].codec_profile;
> +va_profile = vaapi_profile_map[i].va_profile;
>  }
>  }
>  av_freep(&profile_list);
> @@ -347,6 +348,7 @@ static int vaapi_decode_make_config(AVCodecContext *avctx)
>  av_log(avctx, AV_LOG_WARNING, "Using possibly-"
> "incompatible profile %d instead.\n",
> alt_profile);
> +profile = va_profile;
>  } else {
>  av_log(avctx, AV_LOG_VERBOSE, "Codec %s profile %d not "
> "supported for hardware decode.\n",
> -- 
> 2.11.0
> 

LGTM, will push later.

Thanks,

- Mark
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] FFmpeg 3.4

2017-10-09 Thread Tobias Rapp

On 08.10.2017 13:58, Reto Kromer wrote:

Michael Niedermayer wrote:


Of course if the majority wants me to wait with the release,
its easy to wait for as long as people want me to wait ...


Form an user's perspective, I would be delighted to have a new
release. Thank you very much indeed! Reto


+1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec/mips: Improve avc put mc 21, 23 and 02 msa functions

2017-10-09 Thread kaustubh.raste
From: Kaustubh Raste 

Remove loops and unroll as block sizes are known.

Signed-off-by: Kaustubh Raste 
---
 libavcodec/mips/h264qpel_msa.c | 1219 ++--
 1 file changed, 802 insertions(+), 417 deletions(-)

diff --git a/libavcodec/mips/h264qpel_msa.c b/libavcodec/mips/h264qpel_msa.c
index afc0183..a22a482 100644
--- a/libavcodec/mips/h264qpel_msa.c
+++ b/libavcodec/mips/h264qpel_msa.c
@@ -45,25 +45,6 @@
 out0_m;  \
 } )
 
-#define AVC_HORZ_FILTER_SH(in, mask0, mask1, mask2) \
-( { \
-v8i16 out0_m, out1_m;   \
-v16i8 tmp0_m, tmp1_m;   \
-v16i8 minus5b = __msa_ldi_b(-5);\
-v16i8 plus20b = __msa_ldi_b(20);\
-\
-tmp0_m = __msa_vshf_b((v16i8) mask0, in, in);   \
-out0_m = __msa_hadd_s_h(tmp0_m, tmp0_m);\
-\
-tmp0_m = __msa_vshf_b((v16i8) mask1, in, in);   \
-out0_m = __msa_dpadd_s_h(out0_m, minus5b, tmp0_m);  \
-\
-tmp1_m = __msa_vshf_b((v16i8) (mask2), in, in); \
-out1_m = __msa_dpadd_s_h(out0_m, plus20b, tmp1_m);  \
-\
-out1_m; \
-} )
-
 static const uint8_t luma_mask_arr[16 * 8] = {
 /* 8 width cases */
 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12,
@@ -148,6 +129,25 @@ static const uint8_t luma_mask_arr[16 * 8] = {
 hz_out_m;\
 } )
 
+#define AVC_HORZ_FILTER_SH(in0, in1, mask0, mask1, mask2)  \
+( {\
+v8i16 out0_m;  \
+v16i8 tmp0_m;  \
+v16i8 minus5b = __msa_ldi_b(-5);   \
+v16i8 plus20b = __msa_ldi_b(20);   \
+   \
+tmp0_m = __msa_vshf_b((v16i8) mask0, in1, in0);\
+out0_m = __msa_hadd_s_h(tmp0_m, tmp0_m);   \
+   \
+tmp0_m = __msa_vshf_b((v16i8) mask1, in1, in0);\
+out0_m = __msa_dpadd_s_h(out0_m, minus5b, tmp0_m); \
+   \
+tmp0_m = __msa_vshf_b((v16i8) mask2, in1, in0);\
+out0_m = __msa_dpadd_s_h(out0_m, plus20b, tmp0_m); \
+   \
+out0_m;\
+} )
+
 #define AVC_DOT_SH3_SH(in0, in1, in2, coeff0, coeff1, coeff2)   \
 ( { \
 v8i16 out0_m;   \
@@ -159,175 +159,17 @@ static const uint8_t luma_mask_arr[16 * 8] = {
 out0_m; \
 } )
 
-static void avc_luma_vt_4w_msa(const uint8_t *src, int32_t src_stride,
-   uint8_t *dst, int32_t dst_stride,
-   int32_t height)
-{
-int32_t loop_cnt;
-int16_t filt_const0 = 0xfb01;
-int16_t filt_const1 = 0x1414;
-int16_t filt_const2 = 0x1fb;
-v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
-v16i8 src87_r, src2110, src4332, src6554, src8776;
-v16i8 filt0, filt1, filt2;
-v8i16 out10, out32;
-v16u8 out;
-
-filt0 = (v16i8) __msa_fill_h(filt_const0);
-filt1 = (v16i8) __msa_fill_h(filt_const1);
-filt2 = (v16i8) __msa_fill_h(filt_const2);
-
-LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-src += (5 * src_stride);
-
-ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
-   src10_r, src21_r, src32_r, src43_r);
-ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
-XORI_B2_128_SB(src2110, src4332);
-
-for (loop_cnt = (height >> 2); loop_cnt--;) {
-LD_SB4(src, src_stride, src5, src6, src7, src8);
-src += (4 * src_stride);
-
-ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
-   src54_r, src65_r, src76_r, src87_r);
-ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
-XORI_B2_128_SB(src6554, src8776);
-out10 = DPADD_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
-out32 = DPADD_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
-SRARI_H2_SH(out10, out32, 5);
-SAT_SH2_SH(out10, out32, 7);
-out = PCKEV_XORI128_UB(out10, out32);
-ST4x4_UB(out, out, 0, 1, 

[FFmpeg-devel] [PATCH] avcodec/mips: Improve hevc uni-w horiz mc msa functions

2017-10-09 Thread kaustubh.raste
From: Kaustubh Raste 

Load the specific destination bytes instead of MSA load and pack.
Pack the data to half word before clipping.
Use immediate unsigned saturation for clip to max saving one vector register.

Signed-off-by: Kaustubh Raste 
---
 libavcodec/mips/hevc_macros_msa.h  |   13 +-
 libavcodec/mips/hevc_mc_uniw_msa.c |  641 +---
 2 files changed, 386 insertions(+), 268 deletions(-)

diff --git a/libavcodec/mips/hevc_macros_msa.h 
b/libavcodec/mips/hevc_macros_msa.h
index b06c5ad..7dcfea0 100644
--- a/libavcodec/mips/hevc_macros_msa.h
+++ b/libavcodec/mips/hevc_macros_msa.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ * Copyright (c) 2015 - 2017 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
  *
  * This file is part of FFmpeg.
  *
@@ -58,6 +58,17 @@
 out2 = (v4i32) __msa_pckev_b((v16i8) tmp5_m, (v16i8) tmp4_m);  \
 }
 
+#define HEVC_FILT_8TAP_SH(in0, in1, in2, in3,\
+  filt0, filt1, filt2, filt3)\
+( {  \
+v8i16 out_m; \
+ \
+out_m = __msa_dotp_s_h((v16i8) in0, (v16i8) filt0);  \
+out_m = __msa_dpadd_s_h(out_m, (v16i8) in1, (v16i8) filt1);  \
+DPADD_SB2_SH(in2, in3, filt2, filt3, out_m, out_m);  \
+out_m;   \
+} )
+
 #define HEVC_FILT_8TAP(in0, in1, in2, in3,   \
filt0, filt1, filt2, filt3)   \
 ( {  \
diff --git a/libavcodec/mips/hevc_mc_uniw_msa.c 
b/libavcodec/mips/hevc_mc_uniw_msa.c
index 38a8844..7c01c32 100644
--- a/libavcodec/mips/hevc_mc_uniw_msa.c
+++ b/libavcodec/mips/hevc_mc_uniw_msa.c
@@ -22,6 +22,13 @@
 #include "libavcodec/mips/hevcdsp_mips.h"
 #include "libavcodec/mips/hevc_macros_msa.h"
 
+static const uint8_t ff_hevc_mask_arr[16 * 2] __attribute__((aligned(0x40))) = 
{
+/* 8 width cases */
+0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+/* 4 width cases */
+0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20
+};
+
 #define HEVC_HV_UNIW_RND_CLIP4(in0, in1, in2, in3, wgt, offset, rnd,   \
out0, out1, out2, out3) \
 {  \
@@ -624,28 +631,35 @@ static void hevc_hz_uniwgt_8t_4w_msa(uint8_t *src,
  int32_t rnd_val)
 {
 uint32_t loop_cnt;
+v16u8 out0, out1;
 v8i16 filt0, filt1, filt2, filt3;
 v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-v16i8 mask1, mask2, mask3;
-v8i16 filter_vec, const_vec;
-v16i8 vec0, vec1, vec2, vec3;
-v8i16 dst0, dst1, dst2, dst3;
-v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-v4i32 weight_vec, offset_vec, rnd_vec;
-v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10;
+v16i8 mask0, mask1, mask2, mask3, vec11, vec12, vec13, vec14, vec15;
+v8i16 filter_vec, dst01, dst23, dst45, dst67;
+v8i16 dst0, dst1, dst2, dst3, weight_vec_h, offset_vec, denom_vec;
+v4i32 weight_vec, rnd_vec;
 
 src -= 3;
 weight = weight & 0x;
-const_vec = __msa_ldi_h(128);
-const_vec <<= 6;
 
 weight_vec = __msa_fill_w(weight);
-offset_vec = __msa_fill_w(offset);
 rnd_vec = __msa_fill_w(rnd_val);
 
+weight *= 128;
+rnd_val -= 6;
+
+weight_vec_h = __msa_fill_h(weight);
+offset_vec = __msa_fill_h(offset);
+denom_vec = __msa_fill_h(rnd_val);
+
+weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
 filter_vec = LD_SH(filter);
 SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
+mask0 = LD_SB(&ff_hevc_mask_arr[16]);
 mask1 = mask0 + 2;
 mask2 = mask0 + 4;
 mask3 = mask0 + 6;
@@ -657,34 +671,27 @@ static void hevc_hz_uniwgt_8t_4w_msa(uint8_t *src,
 
 VSHF_B4_SB(src0, src1, mask0, mask1, mask2, mask3,
vec0, vec1, vec2, vec3);
-
-dst0 = const_vec;
-DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
- dst0, dst0, dst0, dst0);
 VSHF_B4_SB(src2, src3, mask0, mask1, mask2, mask3,
-   vec0, vec1, vec2, vec3);
-dst1 = const_vec;
-DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
- dst1, dst1, dst1, dst1);
+   vec4, vec5, vec6, vec7);
 VSHF_B4_SB(src4, src5, mask0, mask1, mask2, mask3,
-   vec0, vec1, vec2, vec3);
-dst2 = const_vec;
-DPADD_SB4_SH(vec0, vec1, v

[FFmpeg-devel] [PATCH] avcodec/mips: Improve avc uni copy mc msa functions

2017-10-09 Thread kaustubh.raste
From: Kaustubh Raste 

Load the specific bytes instead of MSA load.

Signed-off-by: Kaustubh Raste 
---
 libavcodec/mips/hevc_mc_uni_msa.c |  245 +++--
 1 file changed, 100 insertions(+), 145 deletions(-)

diff --git a/libavcodec/mips/hevc_mc_uni_msa.c 
b/libavcodec/mips/hevc_mc_uni_msa.c
index cf22e7f..eead591 100644
--- a/libavcodec/mips/hevc_mc_uni_msa.c
+++ b/libavcodec/mips/hevc_mc_uni_msa.c
@@ -28,83 +28,39 @@ static void copy_width8_msa(uint8_t *src, int32_t 
src_stride,
 {
 int32_t cnt;
 uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-
-if (0 == height % 12) {
-for (cnt = (height / 12); cnt--;) {
-LD_UB8(src, src_stride,
-   src0, src1, src2, src3, src4, src5, src6, src7);
-src += (8 * src_stride);
-
-out0 = __msa_copy_u_d((v2i64) src0, 0);
-out1 = __msa_copy_u_d((v2i64) src1, 0);
-out2 = __msa_copy_u_d((v2i64) src2, 0);
-out3 = __msa_copy_u_d((v2i64) src3, 0);
-out4 = __msa_copy_u_d((v2i64) src4, 0);
-out5 = __msa_copy_u_d((v2i64) src5, 0);
-out6 = __msa_copy_u_d((v2i64) src6, 0);
-out7 = __msa_copy_u_d((v2i64) src7, 0);
 
-SD4(out0, out1, out2, out3, dst, dst_stride);
-dst += (4 * dst_stride);
-SD4(out4, out5, out6, out7, dst, dst_stride);
-dst += (4 * dst_stride);
-
-LD_UB4(src, src_stride, src0, src1, src2, src3);
+if (2 == height) {
+LD2(src, src_stride, out0, out1);
+SD(out0, dst);
+dst += dst_stride;
+SD(out1, dst);
+} else if (6 == height) {
+LD4(src, src_stride, out0, out1, out2, out3);
+src += (4 * src_stride);
+SD4(out0, out1, out2, out3, dst, dst_stride);
+dst += (4 * dst_stride);
+LD2(src, src_stride, out0, out1);
+SD(out0, dst);
+dst += dst_stride;
+SD(out1, dst);
+} else if (0 == (height % 8)) {
+for (cnt = (height >> 3); cnt--;) {
+LD4(src, src_stride, out0, out1, out2, out3);
+src += (4 * src_stride);
+LD4(src, src_stride, out4, out5, out6, out7);
 src += (4 * src_stride);
-
-out0 = __msa_copy_u_d((v2i64) src0, 0);
-out1 = __msa_copy_u_d((v2i64) src1, 0);
-out2 = __msa_copy_u_d((v2i64) src2, 0);
-out3 = __msa_copy_u_d((v2i64) src3, 0);
-
-SD4(out0, out1, out2, out3, dst, dst_stride);
-dst += (4 * dst_stride);
-}
-} else if (0 == height % 8) {
-for (cnt = height >> 3; cnt--;) {
-LD_UB8(src, src_stride,
-   src0, src1, src2, src3, src4, src5, src6, src7);
-src += (8 * src_stride);
-
-out0 = __msa_copy_u_d((v2i64) src0, 0);
-out1 = __msa_copy_u_d((v2i64) src1, 0);
-out2 = __msa_copy_u_d((v2i64) src2, 0);
-out3 = __msa_copy_u_d((v2i64) src3, 0);
-out4 = __msa_copy_u_d((v2i64) src4, 0);
-out5 = __msa_copy_u_d((v2i64) src5, 0);
-out6 = __msa_copy_u_d((v2i64) src6, 0);
-out7 = __msa_copy_u_d((v2i64) src7, 0);
-
 SD4(out0, out1, out2, out3, dst, dst_stride);
 dst += (4 * dst_stride);
 SD4(out4, out5, out6, out7, dst, dst_stride);
 dst += (4 * dst_stride);
 }
-} else if (0 == height % 4) {
-for (cnt = (height / 4); cnt--;) {
-LD_UB4(src, src_stride, src0, src1, src2, src3);
+} else if (0 == (height % 4)) {
+for (cnt = (height >> 2); cnt--;) {
+LD4(src, src_stride, out0, out1, out2, out3);
 src += (4 * src_stride);
-out0 = __msa_copy_u_d((v2i64) src0, 0);
-out1 = __msa_copy_u_d((v2i64) src1, 0);
-out2 = __msa_copy_u_d((v2i64) src2, 0);
-out3 = __msa_copy_u_d((v2i64) src3, 0);
-
 SD4(out0, out1, out2, out3, dst, dst_stride);
 dst += (4 * dst_stride);
 }
-} else if (0 == height % 2) {
-for (cnt = (height / 2); cnt--;) {
-LD_UB2(src, src_stride, src0, src1);
-src += (2 * src_stride);
-out0 = __msa_copy_u_d((v2i64) src0, 0);
-out1 = __msa_copy_u_d((v2i64) src1, 0);
-
-SD(out0, dst);
-dst += dst_stride;
-SD(out1, dst);
-dst += dst_stride;
-}
 }
 }
 
@@ -122,33 +78,6 @@ static void copy_width12_msa(uint8_t *src, int32_t 
src_stride,
 ST12x8_UB(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
 }
 
-static void copy_16multx8mult_msa(uint8_t *src, int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride,
-  int32_t height, int32_t width)
-{
-int32_t cnt, loop_cnt;
-uint8_t *src_tmp, *dst_tmp;
-v16u8 s

Re: [FFmpeg-devel] [PATCH] Fix visual glitch with XvMC, caused by wrong idct permutation.

2017-10-09 Thread Ronald S. Bultje
Hi,

On Mon, Oct 9, 2017 at 6:46 AM, Ivan Kalvachev  wrote:

> On 10/9/17, Ronald S. Bultje  wrote:
> > On Sun, Oct 8, 2017 at 6:52 PM, Ivan Kalvachev 
> wrote:
> > [..]
> >
> > Indentation is off in the second hunk, can you fix that?
>
> You want it 4 spaces to the right
>

Yes, please.

BTW, I think it would be better to use "127" number.
>

I don't really mind either way. The number 128 suggests it may have been
intended as a bitmask. Michael is probably better positioned to comment on
this.

Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [V3 1/4] ffmpeg: remove hwaccel_lax_profile_check opt.

2017-10-09 Thread wm4
On Mon, 9 Oct 2017 12:40:41 +0100
Mark Thompson  wrote:

> On 09/10/17 08:49, Jun Zhao wrote:
> > V3: Remove hwaccel_lax_profile_check opt, and add new pre-stream
> > hwaccel_flags option
> >   
> 
> > From 2b1585fd6e6e68c81761ace0a8503385067086e0 Mon Sep 17 00:00:00 2001
> > From: Jun Zhao 
> > Date: Mon, 9 Oct 2017 02:13:14 -0400
> > Subject: [V3 1/4] ffmpeg: remove hwaccel_lax_profile_check opt.
> > 
> > remove hwaccel_lax_profile_check, will use pre-stream hwaccel_flags
> > option.
> > 
> > Signed-off-by: Jun Zhao 
> > ---
> >  fftools/ffmpeg.h | 1 -
> >  fftools/ffmpeg_opt.c | 3 ---
> >  2 files changed, 4 deletions(-)
> > 
> > diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
> > index f6c76bcc55..888f77223a 100644
> > --- a/fftools/ffmpeg.h
> > +++ b/fftools/ffmpeg.h
> > @@ -624,7 +624,6 @@ extern const AVIOInterruptCB int_cb;
> >  
> >  extern const OptionDef options[];
> >  extern const HWAccel hwaccels[];
> > -extern int hwaccel_lax_profile_check;
> >  extern AVBufferRef *hw_device_ctx;
> >  #if CONFIG_QSV
> >  extern char *qsv_device;
> > diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
> > index 100fa76e46..500920326b 100644
> > --- a/fftools/ffmpeg_opt.c
> > +++ b/fftools/ffmpeg_opt.c
> > @@ -100,7 +100,6 @@ const HWAccel hwaccels[] = {
> >  #endif
> >  { 0 },
> >  };
> > -int hwaccel_lax_profile_check = 0;
> >  AVBufferRef *hw_device_ctx;
> >  HWDevice *filter_hw_device;
> >  
> > @@ -3640,8 +3639,6 @@ const OptionDef options[] = {
> >  { "autorotate",   HAS_ARG | OPT_BOOL | OPT_SPEC |
> >OPT_EXPERT | OPT_INPUT,  
> >   { .off = OFFSET(autorotate) },
> >  "automatically insert correct rotate filters" },
> > -{ "hwaccel_lax_profile_check", OPT_BOOL | OPT_EXPERT,  
> >   { &hwaccel_lax_profile_check},
> > -"attempt to decode anyway if HW accelerated decoder's supported 
> > profiles do not exactly match the stream" },
> >  
> >  /* audio options */
> >  { "aframes",OPT_AUDIO | HAS_ARG  | OPT_PERFILE | OPT_OUTPUT,   
> > { .func_arg = opt_audio_frames },
> > -- 
> > 2.11.0
> >   
> 
> Would anyone else care to comment on whether we are allowed to do this?  I 
> realise the option lost any effect some time ago, but I was waiting for the 
> version bump to actually remove it.

That's a good question. Unfortunately, I'd guess "maybe not" without
deprecation. But not sure.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix visual glitch with XvMC, caused by wrong idct permutation.

2017-10-09 Thread Michael Niedermayer
On Mon, Oct 09, 2017 at 09:02:38AM -0400, Ronald S. Bultje wrote:
> Hi,
> 
> On Mon, Oct 9, 2017 at 6:46 AM, Ivan Kalvachev  wrote:
> 
> > On 10/9/17, Ronald S. Bultje  wrote:
> > > On Sun, Oct 8, 2017 at 6:52 PM, Ivan Kalvachev 
> > wrote:
> > > [..]
> > >
> > > Indentation is off in the second hunk, can you fix that?
> >
> > You want it 4 spaces to the right
> >
> 
> Yes, please.
> 
> BTW, I think it would be better to use "127" number.
> >
> 
> I don't really mind either way. The number 128 suggests it may have been
> intended as a bitmask. Michael is probably better positioned to comment on
> this.

I dont really remember but i think 128 was choosen for ABI
compatibility with additions to it from libav. So it shuld no longer
matter what values are used on additions

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

What does censorship reveal? It reveals fear. -- Julian Assange


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 20/20] cbs: Add some read/write tests

2017-10-09 Thread Michael Niedermayer
On Sun, Oct 08, 2017 at 09:01:54PM +0100, Mark Thompson wrote:
> Use the appropriate metadata filter for each codec - in the absence of any
> options to modify the stream, the output bitstream should be identical to
> the input (though the output file may differ in padding).
> 
> All tests use conformance bitstreams, the MPEG-2 streams are newly added
> from the conformance test streams
> 
> ---
>  tests/Makefile|  1 +
>  tests/fate/cbs.mak| 74 
> +++
>  tests/ref/fate/cbs-h264-AUD_MW_E  |  1 +
>  tests/ref/fate/cbs-h264-BASQP1_Sony_C |  1 +
>  tests/ref/fate/cbs-h264-CABACI3_Sony_B|  1 +
>  tests/ref/fate/cbs-h264-CVBS3_Sony_C  |  1 +
>  tests/ref/fate/cbs-h264-CVFC1_Sony_C  |  1 +
>  tests/ref/fate/cbs-h264-CVMANL1_TOSHIBA_B |  1 +
>  tests/ref/fate/cbs-h264-CVNLFI1_Sony_C|  1 +
>  tests/ref/fate/cbs-h264-CVSE2_Sony_B  |  1 +
>  tests/ref/fate/cbs-h264-CVWP1_TOSHIBA_E   |  1 +
>  tests/ref/fate/cbs-h264-FM1_BT_B  |  1 +
>  tests/ref/fate/cbs-h264-MR1_BT_A  |  1 +
>  tests/ref/fate/cbs-h264-SVA_Base_B|  1 +
>  tests/ref/fate/cbs-h264-Sharp_MP_PAFF_1r2 |  1 +
>  tests/ref/fate/cbs-h264-sp1_bt_a  |  1 +
>  tests/ref/fate/cbs-hevc-CAINIT_E_SHARP_3  |  1 +
>  tests/ref/fate/cbs-hevc-CAINIT_H_SHARP_3  |  1 +
>  tests/ref/fate/cbs-hevc-CONFWIN_A_Sony_1  |  1 +
>  tests/ref/fate/cbs-hevc-HRD_A_Fujitsu_2   |  1 +
>  tests/ref/fate/cbs-hevc-LTRPSPS_A_Qualcomm_1  |  1 +
>  tests/ref/fate/cbs-hevc-NUT_A_ericsson_5  |  1 +
>  tests/ref/fate/cbs-hevc-PICSIZE_A_Bossen_1|  1 +
>  tests/ref/fate/cbs-hevc-PICSIZE_B_Bossen_1|  1 +
>  tests/ref/fate/cbs-hevc-RPLM_A_qualcomm_4 |  1 +
>  tests/ref/fate/cbs-hevc-RPS_A_docomo_4|  1 +
>  tests/ref/fate/cbs-hevc-RPS_E_qualcomm_5  |  1 +
>  tests/ref/fate/cbs-hevc-SLIST_A_Sony_4|  1 +
>  tests/ref/fate/cbs-hevc-SLIST_D_Sony_9|  1 +
>  tests/ref/fate/cbs-hevc-STRUCT_A_Samsung_5|  1 +
>  tests/ref/fate/cbs-hevc-TILES_B_Cisco_1   |  1 +
>  tests/ref/fate/cbs-hevc-WPP_A_ericsson_MAIN_2 |  1 +
>  tests/ref/fate/cbs-hevc-WPP_F_ericsson_MAIN_2 |  1 +
>  tests/ref/fate/cbs-hevc-WP_A_Toshiba_3|  1 +
>  tests/ref/fate/cbs-hevc-ipcm_E_NEC_2  |  1 +
>  tests/ref/fate/cbs-mpeg2-hhi_burst_422_short  |  1 +
>  tests/ref/fate/cbs-mpeg2-sony-ct3 |  1 +
>  tests/ref/fate/cbs-mpeg2-tcela-6  |  1 +
>  38 files changed, 111 insertions(+)
>  create mode 100644 tests/fate/cbs.mak
>  create mode 100644 tests/ref/fate/cbs-h264-AUD_MW_E
>  create mode 100644 tests/ref/fate/cbs-h264-BASQP1_Sony_C
>  create mode 100644 tests/ref/fate/cbs-h264-CABACI3_Sony_B
>  create mode 100644 tests/ref/fate/cbs-h264-CVBS3_Sony_C
>  create mode 100644 tests/ref/fate/cbs-h264-CVFC1_Sony_C
>  create mode 100644 tests/ref/fate/cbs-h264-CVMANL1_TOSHIBA_B
>  create mode 100644 tests/ref/fate/cbs-h264-CVNLFI1_Sony_C
>  create mode 100644 tests/ref/fate/cbs-h264-CVSE2_Sony_B
>  create mode 100644 tests/ref/fate/cbs-h264-CVWP1_TOSHIBA_E
>  create mode 100644 tests/ref/fate/cbs-h264-FM1_BT_B
>  create mode 100644 tests/ref/fate/cbs-h264-MR1_BT_A
>  create mode 100644 tests/ref/fate/cbs-h264-SVA_Base_B
>  create mode 100644 tests/ref/fate/cbs-h264-Sharp_MP_PAFF_1r2
>  create mode 100644 tests/ref/fate/cbs-h264-sp1_bt_a
>  create mode 100644 tests/ref/fate/cbs-hevc-CAINIT_E_SHARP_3
>  create mode 100644 tests/ref/fate/cbs-hevc-CAINIT_H_SHARP_3
>  create mode 100644 tests/ref/fate/cbs-hevc-CONFWIN_A_Sony_1
>  create mode 100644 tests/ref/fate/cbs-hevc-HRD_A_Fujitsu_2
>  create mode 100644 tests/ref/fate/cbs-hevc-LTRPSPS_A_Qualcomm_1
>  create mode 100644 tests/ref/fate/cbs-hevc-NUT_A_ericsson_5
>  create mode 100644 tests/ref/fate/cbs-hevc-PICSIZE_A_Bossen_1
>  create mode 100644 tests/ref/fate/cbs-hevc-PICSIZE_B_Bossen_1
>  create mode 100644 tests/ref/fate/cbs-hevc-RPLM_A_qualcomm_4
>  create mode 100644 tests/ref/fate/cbs-hevc-RPS_A_docomo_4
>  create mode 100644 tests/ref/fate/cbs-hevc-RPS_E_qualcomm_5
>  create mode 100644 tests/ref/fate/cbs-hevc-SLIST_A_Sony_4
>  create mode 100644 tests/ref/fate/cbs-hevc-SLIST_D_Sony_9
>  create mode 100644 tests/ref/fate/cbs-hevc-STRUCT_A_Samsung_5
>  create mode 100644 tests/ref/fate/cbs-hevc-TILES_B_Cisco_1
>  create mode 100644 tests/ref/fate/cbs-hevc-WPP_A_ericsson_MAIN_2
>  create mode 100644 tests/ref/fate/cbs-hevc-WPP_F_ericsson_MAIN_2
>  create mode 100644 tests/ref/fate/cbs-hevc-WP_A_Toshiba_3
>  create mode 100644 tests/ref/fate/cbs-hevc-ipcm_E_NEC_2
>  create mode 100644 tests/ref/fate/cbs-mpeg2-hhi_burst_422_short
>  create mode 100644 tests/ref/fate/cbs-mpeg2-sony-ct3
>  create mode 100644 tests/ref/fate/cbs-mpeg2-tcela-6

make V=2 fate-cbs-h264-FM1_BT_B
appears to segfa

Re: [FFmpeg-devel] [PATCH 20/20] cbs: Add some read/write tests

2017-10-09 Thread Mark Thompson
On 09/10/17 19:41, Michael Niedermayer wrote:
> On Sun, Oct 08, 2017 at 09:01:54PM +0100, Mark Thompson wrote:
>> Use the appropriate metadata filter for each codec - in the absence of any
>> options to modify the stream, the output bitstream should be identical to
>> the input (though the output file may differ in padding).
>>
>> All tests use conformance bitstreams, the MPEG-2 streams are newly added
>> from the conformance test streams
>> 
>> ---
>>  tests/Makefile|  1 +
>>  tests/fate/cbs.mak| 74 
>> +++
>>  tests/ref/fate/cbs-h264-AUD_MW_E  |  1 +
>>  tests/ref/fate/cbs-h264-BASQP1_Sony_C |  1 +
>>  tests/ref/fate/cbs-h264-CABACI3_Sony_B|  1 +
>>  tests/ref/fate/cbs-h264-CVBS3_Sony_C  |  1 +
>>  tests/ref/fate/cbs-h264-CVFC1_Sony_C  |  1 +
>>  tests/ref/fate/cbs-h264-CVMANL1_TOSHIBA_B |  1 +
>>  tests/ref/fate/cbs-h264-CVNLFI1_Sony_C|  1 +
>>  tests/ref/fate/cbs-h264-CVSE2_Sony_B  |  1 +
>>  tests/ref/fate/cbs-h264-CVWP1_TOSHIBA_E   |  1 +
>>  tests/ref/fate/cbs-h264-FM1_BT_B  |  1 +
>>  tests/ref/fate/cbs-h264-MR1_BT_A  |  1 +
>>  tests/ref/fate/cbs-h264-SVA_Base_B|  1 +
>>  tests/ref/fate/cbs-h264-Sharp_MP_PAFF_1r2 |  1 +
>>  tests/ref/fate/cbs-h264-sp1_bt_a  |  1 +
>>  tests/ref/fate/cbs-hevc-CAINIT_E_SHARP_3  |  1 +
>>  tests/ref/fate/cbs-hevc-CAINIT_H_SHARP_3  |  1 +
>>  tests/ref/fate/cbs-hevc-CONFWIN_A_Sony_1  |  1 +
>>  tests/ref/fate/cbs-hevc-HRD_A_Fujitsu_2   |  1 +
>>  tests/ref/fate/cbs-hevc-LTRPSPS_A_Qualcomm_1  |  1 +
>>  tests/ref/fate/cbs-hevc-NUT_A_ericsson_5  |  1 +
>>  tests/ref/fate/cbs-hevc-PICSIZE_A_Bossen_1|  1 +
>>  tests/ref/fate/cbs-hevc-PICSIZE_B_Bossen_1|  1 +
>>  tests/ref/fate/cbs-hevc-RPLM_A_qualcomm_4 |  1 +
>>  tests/ref/fate/cbs-hevc-RPS_A_docomo_4|  1 +
>>  tests/ref/fate/cbs-hevc-RPS_E_qualcomm_5  |  1 +
>>  tests/ref/fate/cbs-hevc-SLIST_A_Sony_4|  1 +
>>  tests/ref/fate/cbs-hevc-SLIST_D_Sony_9|  1 +
>>  tests/ref/fate/cbs-hevc-STRUCT_A_Samsung_5|  1 +
>>  tests/ref/fate/cbs-hevc-TILES_B_Cisco_1   |  1 +
>>  tests/ref/fate/cbs-hevc-WPP_A_ericsson_MAIN_2 |  1 +
>>  tests/ref/fate/cbs-hevc-WPP_F_ericsson_MAIN_2 |  1 +
>>  tests/ref/fate/cbs-hevc-WP_A_Toshiba_3|  1 +
>>  tests/ref/fate/cbs-hevc-ipcm_E_NEC_2  |  1 +
>>  tests/ref/fate/cbs-mpeg2-hhi_burst_422_short  |  1 +
>>  tests/ref/fate/cbs-mpeg2-sony-ct3 |  1 +
>>  tests/ref/fate/cbs-mpeg2-tcela-6  |  1 +
>>  38 files changed, 111 insertions(+)
>>  create mode 100644 tests/fate/cbs.mak
>>  create mode 100644 tests/ref/fate/cbs-h264-AUD_MW_E
>>  create mode 100644 tests/ref/fate/cbs-h264-BASQP1_Sony_C
>>  create mode 100644 tests/ref/fate/cbs-h264-CABACI3_Sony_B
>>  create mode 100644 tests/ref/fate/cbs-h264-CVBS3_Sony_C
>>  create mode 100644 tests/ref/fate/cbs-h264-CVFC1_Sony_C
>>  create mode 100644 tests/ref/fate/cbs-h264-CVMANL1_TOSHIBA_B
>>  create mode 100644 tests/ref/fate/cbs-h264-CVNLFI1_Sony_C
>>  create mode 100644 tests/ref/fate/cbs-h264-CVSE2_Sony_B
>>  create mode 100644 tests/ref/fate/cbs-h264-CVWP1_TOSHIBA_E
>>  create mode 100644 tests/ref/fate/cbs-h264-FM1_BT_B
>>  create mode 100644 tests/ref/fate/cbs-h264-MR1_BT_A
>>  create mode 100644 tests/ref/fate/cbs-h264-SVA_Base_B
>>  create mode 100644 tests/ref/fate/cbs-h264-Sharp_MP_PAFF_1r2
>>  create mode 100644 tests/ref/fate/cbs-h264-sp1_bt_a
>>  create mode 100644 tests/ref/fate/cbs-hevc-CAINIT_E_SHARP_3
>>  create mode 100644 tests/ref/fate/cbs-hevc-CAINIT_H_SHARP_3
>>  create mode 100644 tests/ref/fate/cbs-hevc-CONFWIN_A_Sony_1
>>  create mode 100644 tests/ref/fate/cbs-hevc-HRD_A_Fujitsu_2
>>  create mode 100644 tests/ref/fate/cbs-hevc-LTRPSPS_A_Qualcomm_1
>>  create mode 100644 tests/ref/fate/cbs-hevc-NUT_A_ericsson_5
>>  create mode 100644 tests/ref/fate/cbs-hevc-PICSIZE_A_Bossen_1
>>  create mode 100644 tests/ref/fate/cbs-hevc-PICSIZE_B_Bossen_1
>>  create mode 100644 tests/ref/fate/cbs-hevc-RPLM_A_qualcomm_4
>>  create mode 100644 tests/ref/fate/cbs-hevc-RPS_A_docomo_4
>>  create mode 100644 tests/ref/fate/cbs-hevc-RPS_E_qualcomm_5
>>  create mode 100644 tests/ref/fate/cbs-hevc-SLIST_A_Sony_4
>>  create mode 100644 tests/ref/fate/cbs-hevc-SLIST_D_Sony_9
>>  create mode 100644 tests/ref/fate/cbs-hevc-STRUCT_A_Samsung_5
>>  create mode 100644 tests/ref/fate/cbs-hevc-TILES_B_Cisco_1
>>  create mode 100644 tests/ref/fate/cbs-hevc-WPP_A_ericsson_MAIN_2
>>  create mode 100644 tests/ref/fate/cbs-hevc-WPP_F_ericsson_MAIN_2
>>  create mode 100644 tests/ref/fate/cbs-hevc-WP_A_Toshiba_3
>>  create mode 100644 tests/ref/fate/cbs-hevc-ipcm_E_NEC_2
>>  create mode 100644 tests/ref/fate/cbs-mpeg2-hhi_burst_422_short
>>  create mode 100644 tests/r

[FFmpeg-devel] [PATCH 03/20] lavc: Add coded bitstream read/write API

2017-10-09 Thread Mark Thompson
(cherry picked from commit 18f1706f331bf5dd565774eae680508c8d3a97ad)
(cherry picked from commit 44cde38c8acbef7d5250e6d1b52b1020871e093b)
---
Allows close to be called with a null pointer (for BSF init failure).


 configure |   1 +
 libavcodec/Makefile   |   1 +
 libavcodec/cbs.c  | 482 ++
 libavcodec/cbs.h  | 283 +++
 libavcodec/cbs_internal.h |  86 +
 5 files changed, 853 insertions(+)
 create mode 100644 libavcodec/cbs.c
 create mode 100644 libavcodec/cbs.h
 create mode 100644 libavcodec/cbs_internal.h

diff --git a/configure b/configure
index fc377d90fb..32d693f494 100755
--- a/configure
+++ b/configure
@@ -2142,6 +2142,7 @@ CONFIG_EXTRA="
 blockdsp
 bswapdsp
 cabac
+cbs
 dirac_parse
 dvprofile
 exif
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index c4ec09b1c4..9680553c90 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -59,6 +59,7 @@ OBJS-$(CONFIG_AUDIODSP)+= audiodsp.o
 OBJS-$(CONFIG_BLOCKDSP)+= blockdsp.o
 OBJS-$(CONFIG_BSWAPDSP)+= bswapdsp.o
 OBJS-$(CONFIG_CABAC)   += cabac.o
+OBJS-$(CONFIG_CBS) += cbs.o
 OBJS-$(CONFIG_CRYSTALHD)   += crystalhd.o
 OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o
 OBJS-$(CONFIG_ERROR_RESILIENCE)+= error_resilience.o
diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
new file mode 100644
index 00..fa68040cfa
--- /dev/null
+++ b/libavcodec/cbs.c
@@ -0,0 +1,482 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+
+#include "config.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+
+#include "cbs.h"
+#include "cbs_internal.h"
+
+
+static const CodedBitstreamType *cbs_type_table[] = {
+};
+
+int ff_cbs_init(CodedBitstreamContext **ctx_ptr,
+enum AVCodecID codec_id, void *log_ctx)
+{
+CodedBitstreamContext *ctx;
+const CodedBitstreamType *type;
+int i;
+
+type = NULL;
+for (i = 0; i < FF_ARRAY_ELEMS(cbs_type_table); i++) {
+if (cbs_type_table[i]->codec_id == codec_id) {
+type = cbs_type_table[i];
+break;
+}
+}
+if (!type)
+return AVERROR(EINVAL);
+
+ctx = av_mallocz(sizeof(*ctx));
+if (!ctx)
+return AVERROR(ENOMEM);
+
+ctx->log_ctx = log_ctx;
+ctx->codec   = type;
+
+ctx->priv_data = av_mallocz(ctx->codec->priv_data_size);
+if (!ctx->priv_data) {
+av_freep(&ctx);
+return AVERROR(ENOMEM);
+}
+
+ctx->decompose_unit_types = NULL;
+
+ctx->trace_enable = 0;
+ctx->trace_level  = AV_LOG_TRACE;
+
+*ctx_ptr = ctx;
+return 0;
+}
+
+void ff_cbs_close(CodedBitstreamContext **ctx_ptr)
+{
+CodedBitstreamContext *ctx = *ctx_ptr;
+
+if (!ctx)
+return;
+
+if (ctx->codec && ctx->codec->close)
+ctx->codec->close(ctx);
+
+av_freep(&ctx->priv_data);
+av_freep(ctx_ptr);
+}
+
+static void cbs_unit_uninit(CodedBitstreamContext *ctx,
+CodedBitstreamUnit *unit)
+{
+if (ctx->codec->free_unit && unit->content && !unit->content_external)
+ctx->codec->free_unit(unit);
+
+av_freep(&unit->data);
+unit->data_size = 0;
+unit->data_bit_padding = 0;
+}
+
+void ff_cbs_fragment_uninit(CodedBitstreamContext *ctx,
+CodedBitstreamFragment *frag)
+{
+int i;
+
+for (i = 0; i < frag->nb_units; i++)
+cbs_unit_uninit(ctx, &frag->units[i]);
+av_freep(&frag->units);
+frag->nb_units = 0;
+
+av_freep(&frag->data);
+frag->data_size= 0;
+frag->data_bit_padding = 0;
+}
+
+static int cbs_read_fragment_content(CodedBitstreamContext *ctx,
+ CodedBitstreamFragment *frag)
+{
+int err, i, j;
+
+for (i = 0; i < frag->nb_units; i++) {
+if (ctx->decompose_unit_types) {
+for (j = 0; j < ctx->nb_decompose_unit_types; j++) {
+if (ctx->decompose_unit_types[j] == frag->units[i].type)
+break;
+}
+if (j >= ctx->nb_decompose_unit_types)
+c

[FFmpeg-devel] [PATCH 08/20] lavc: Add h264_metadata bitstream filter

2017-10-09 Thread Mark Thompson
This is able to modify some header metadata found in the SPS/VUI,
and can also add/remove AUDs and insert user data in SEI NAL units.

(cherry picked from commit 9e93001b6135a23fe4e200196c08fb4fbffed6fc)
(cherry picked from commit c42b62d1f9641f10ffc23cad9abbe47d8a4a165b)
---
On 08/10/17 21:32, Mark Thompson wrote:
> +// Insert the SEI in access units containing SPSs, and also
> +// unconditionally in the first access unit we ever see.
> +if ((has_sps && ctx->sei_user_data) || !ctx->sei_first_au) {

This check puts the new test in the wrong place, and then crashes when 
sei_user_data is not set.

Fixed below.


 configure  |   1 +
 doc/bitstream_filters.texi |  63 +
 libavcodec/Makefile|   1 +
 libavcodec/bitstream_filters.c |   1 +
 libavcodec/h264_metadata_bsf.c | 524 +
 5 files changed, 590 insertions(+)
 create mode 100644 libavcodec/h264_metadata_bsf.c

diff --git a/configure b/configure
index 154f9a0fa2..43b0406c24 100755
--- a/configure
+++ b/configure
@@ -2906,6 +2906,7 @@ mpeg4video_parser_select="h263dsp mpegvideo qpeldsp"
 vc1_parser_select="vc1dsp"
 
 # bitstream_filters
+h264_metadata_bsf_select="cbs_h264"
 mjpeg2jpeg_bsf_select="jpegtables"
 trace_headers_bsf_select="cbs_h264 cbs_h265 cbs_mpeg2"
 
diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
index 990b4f3c58..f7dfa1f753 100644
--- a/doc/bitstream_filters.texi
+++ b/doc/bitstream_filters.texi
@@ -92,6 +92,69 @@ When this option is enabled, the long-term headers are 
removed from the
 bitstream after extraction.
 @end table
 
+@section h264_metadata
+
+Modify metadata embedded in an H.264 stream.
+
+@table @option
+@item aud
+Insert or remove AUD NAL units in all access units of the stream.
+
+@table @samp
+@item insert
+@item remove
+@end table
+
+@item sample_aspect_ratio
+Set the sample aspect ratio of the stream in the VUI parameters.
+
+@item video_format
+@item video_full_range_flag
+Set the video format in the stream (see H.264 section E.2.1 and
+table E-2).
+
+@item colour_primaries
+@item transfer_characteristics
+@item matrix_coefficients
+Set the colour description in the stream (see H.264 section E.2.1
+and tables E-3, E-4 and E-5).
+
+@item chroma_sample_loc_type
+Set the chroma sample location in the stream (see H.264 section
+E.2.1 and figure E-1).
+
+@item tick_rate
+Set the tick rate (num_units_in_tick / time_scale) in the VUI
+parameters.  This is the smallest time unit representable in the
+stream, and in many cases represents the field rate of the stream
+(double the frame rate).
+@item fixed_frame_rate_flag
+Set whether the stream has fixed framerate - typically this indicates
+that the framerate is exactly half the tick rate, but the exact
+meaning is dependent on interlacing and the picture structure (see
+H.264 section E.2.1 and table E-6).
+
+@item crop_left
+@item crop_right
+@item crop_top
+@item crop_bottom
+Set the frame cropping offsets in the SPS.  These values will replace
+the current ones if the stream is already cropped.
+
+These fields are set in pixels.  Note that some sizes may not be
+representable if the chroma is subsampled or the stream is interlaced
+(see H.264 section 7.4.2.1.1).
+
+@item sei_user_data
+Insert a string as SEI unregistered user data.  The argument must
+be of the form @emph{UUID+string}, where the UUID is as hex digits
+possibly separated by hyphens, and the string can be anything.
+
+For example, @samp{086f3693-b7b3-4f2c-9653-21492feee5b8+hello} will
+insert the string ``hello'' associated with the given UUID.
+
+@end table
+
 @section h264_mp4toannexb
 
 Convert an H.264 bitstream from length prefixed mode to start code
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 5e09889bc6..25ad785dbe 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1015,6 +1015,7 @@ OBJS-$(CONFIG_DUMP_EXTRADATA_BSF) += 
dump_extradata_bsf.o
 OBJS-$(CONFIG_DCA_CORE_BSF)   += dca_core_bsf.o
 OBJS-$(CONFIG_EXTRACT_EXTRADATA_BSF)  += extract_extradata_bsf.o\
  h2645_parse.o
+OBJS-$(CONFIG_H264_METADATA_BSF)  += h264_metadata_bsf.o
 OBJS-$(CONFIG_H264_MP4TOANNEXB_BSF)   += h264_mp4toannexb_bsf.o
 OBJS-$(CONFIG_HEVC_MP4TOANNEXB_BSF)   += hevc_mp4toannexb_bsf.o
 OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF)+= imx_dump_header_bsf.o
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index dd0247182b..0e133b9036 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -29,6 +29,7 @@ extern const AVBitStreamFilter ff_chomp_bsf;
 extern const AVBitStreamFilter ff_dump_extradata_bsf;
 extern const AVBitStreamFilter ff_dca_core_bsf;
 extern const AVBitStreamFilter ff_extract_extradata_bsf;
+extern const AVBitStreamFilter ff_h264_metadata_bsf;
 extern const AVBitStreamFilter ff_h264_mp4toannexb_bsf;
 extern const AVBitStreamFilter ff_hevc_mp4toannexb

Re: [FFmpeg-devel] libavcodec/proresdec : add qmat dsp with SSE2, AVX2 simd

2017-10-09 Thread Martin Vignali
2017-10-07 18:16 GMT+02:00 Ronald S. Bultje :

> Hi Martin,
>
> On Sat, Oct 7, 2017 at 11:49 AM, Martin Vignali 
> wrote:
>
> > 2017-10-07 17:30 GMT+02:00 Ronald S. Bultje :
> > > On Sat, Oct 7, 2017 at 10:22 AM, Martin Vignali <
> > martin.vign...@gmail.com>
> > > wrote:
> > > > Patch in attach add a new dsp
> > > > for manipulation of qmat
> > > >
> > > > for now, i move this code inside
> > > >
> > > > for (i = 0; i < 64; i++) {
> > > > qmat_luma_scaled  [i] = ctx->qmat_luma  [i] * qscale;
> > > > qmat_chroma_scaled[i] = ctx->qmat_chroma[i] * qscale;
> > > > }
> > > >
> > > > i add a special case for qscale == 1
> > > > and SSE2, AVX2 optimization
> > >
> > > This loop only executes once per slice. We typically do not
> SIMD-optimize
> > > at that level, because it won't give significant speed gains...
> >
> > Ok didn't know that.
> > I mostly follow, what there are already done, like in
> blockdsp.clear_block
> >
>
> Right, so consider that blockdsp is done per block (16x16 pixels), not per
> slice.
>
Ok on principle (only improve, a func which is called quite often)


>
> You could remove this entirely from the slice processing code by simply
> pre-calculating the values in the init function once for the whole stream,
> there's only 224 qscale values so it's 224*64*2 multiplications, which is
> (in the context of prores) virtually negligible.
>

Not sure, we can do that for prores decoder
the qmat seems to be set on the decode frame header func
(based on the header of the frame).

Martin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] libavcodec/hap : add HapAlphaOnly decoding/encoding

2017-10-09 Thread Martin Vignali
>
> If this is what you believe, I don't think an RGBA representation
> should be committed.
>
>
This is not about what i believe ! :-)
But what kind of solution we can use, to support decode/encode for this
codec.

I don't really know, how can i output RGBA (setting only alpha), and gray
only at the same time
i have two opposite direction, for the modification of the decoder and
encoder part !

Martin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] mov: fix decode of fragments that overlap in time

2017-10-09 Thread John Stebbins
When keyframe intervals of dash segments are not perfectly aligned,
fragments in the stream can overlap in time. The previous sorting by
timestamp causes packets to be read out of decode order and results
in decode errors.

Insert new "trun" index entries into index_entries in the order that
the trun are referenced by the sidx.
---
 libavformat/isom.h |  26 +-
 libavformat/mov.c  | 678 -
 2 files changed, 479 insertions(+), 225 deletions(-)

diff --git a/libavformat/isom.h b/libavformat/isom.h
index b9380e9dcc..395d20d25c 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -93,7 +93,6 @@ typedef struct MOVFragment {
 unsigned duration;
 unsigned size;
 unsigned flags;
-int64_t time;
 } MOVFragment;
 
 typedef struct MOVTrackExt {
@@ -109,17 +108,28 @@ typedef struct MOVSbgp {
 unsigned int index;
 } MOVSbgp;
 
+typedef struct MOVFragmentStreamInfo {
+int id;
+int64_t sidx_pts;
+int64_t first_tfra_pts;
+int64_t tfdt_dts;
+int index_entry;
+} MOVFragmentStreamInfo;
+
 typedef struct MOVFragmentIndexItem {
 int64_t moof_offset;
-int64_t time;
 int headers_read;
+int current;
+int nb_stream_info;
+MOVFragmentStreamInfo * stream_info;
 } MOVFragmentIndexItem;
 
 typedef struct MOVFragmentIndex {
-unsigned track_id;
-unsigned item_count;
-unsigned current_item;
-MOVFragmentIndexItem *items;
+int allocated_size;
+int complete;
+int current;
+int nb_items;
+MOVFragmentIndexItem * item;
 } MOVFragmentIndex;
 
 typedef struct MOVIndexRange {
@@ -250,9 +260,7 @@ typedef struct MOVContext {
 int moov_retry;
 int use_mfra_for;
 int has_looked_for_mfra;
-MOVFragmentIndex** fragment_index_data;
-unsigned fragment_index_count;
-int fragment_index_complete;
+MOVFragmentIndex frag_index;
 int atom_depth;
 unsigned int aax_mode;  ///< 'aax' file has been detected
 uint8_t file_key[20];
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 899690d920..03ba2a314b 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1168,6 +1168,211 @@ static int mov_read_moov(MOVContext *c, AVIOContext 
*pb, MOVAtom atom)
 return 0; /* now go for mdat */
 }
 
+static MOVFragmentStreamInfo * get_frag_stream_info(
+MOVFragmentIndex *frag_index,
+int index,
+int id)
+{
+int i;
+MOVFragmentIndexItem * item;
+
+if (index < 0 || index >= frag_index->nb_items)
+return NULL;
+item = &frag_index->item[index];
+for (i = 0; i < item->nb_stream_info; i++)
+if (item->stream_info[i].id == id)
+return &item->stream_info[i];
+
+// This shouldn't happen
+return NULL;
+}
+
+static void set_frag_stream(MOVFragmentIndex *frag_index, int id)
+{
+int i;
+MOVFragmentIndexItem * item;
+
+if (frag_index->current < 0 ||
+frag_index->current >= frag_index->nb_items)
+return;
+
+item = &frag_index->item[frag_index->current];
+for (i = 0; i < item->nb_stream_info; i++)
+if (item->stream_info[i].id == id) {
+item->current = i;
+return;
+}
+
+// id not found.  This shouldn't happen.
+item->current = -1;
+}
+
+static MOVFragmentStreamInfo * get_current_frag_stream_info(
+MOVFragmentIndex *frag_index)
+{
+MOVFragmentIndexItem * item = &frag_index->item[frag_index->current];
+if (item->current >= 0 && item->current < item->nb_stream_info)
+return &item->stream_info[item->current];
+
+// This shouldn't happen
+return NULL;
+}
+
+static int search_frag_moof_offset(MOVFragmentIndex *frag_index, int64_t 
offset)
+{
+int a, b, m;
+int64_t moof_offset;
+
+// Optimize for appending new entries
+if (!frag_index->nb_items ||
+frag_index->item[frag_index->nb_items - 1].moof_offset < offset)
+return frag_index->nb_items;
+
+a = -1;
+b = frag_index->nb_items;
+
+while (b - a > 1) {
+m = (a + b) >> 1;
+moof_offset = frag_index->item[m].moof_offset;
+if (moof_offset >= offset)
+b = m;
+if (moof_offset <= offset)
+a = m;
+}
+return b;
+}
+
+static int64_t get_stream_info_time(MOVFragmentStreamInfo * frag_stream_info)
+{
+
+if (frag_stream_info) {
+if (frag_stream_info->sidx_pts != AV_NOPTS_VALUE)
+return frag_stream_info->sidx_pts;
+if (frag_stream_info->first_tfra_pts != AV_NOPTS_VALUE)
+return frag_stream_info->first_tfra_pts;
+if (frag_stream_info->tfdt_dts != AV_NOPTS_VALUE)
+return frag_stream_info->tfdt_dts;
+}
+return AV_NOPTS_VALUE;
+}
+
+static int64_t get_frag_time(MOVFragmentIndex *frag_index,
+ int index, int track_id)
+{
+MOVFragmentStreamInfo * frag_stream_info;
+int64_t timestamp;
+int i;
+
+if (track_id >= 0) {
+frag_stream_info = get_frag_stream_info(frag_index,

Re: [FFmpeg-devel] [PATCH 1/2] mov: fix decode of fragments that overlap in time

2017-10-09 Thread John Stebbins
On 10/06/2017 04:20 PM, Michael Niedermayer wrote:
> On Thu, Oct 05, 2017 at 02:38:48PM -0700, John Stebbins wrote:
>> On 10/05/2017 09:45 AM, John Stebbins wrote:
>>> On 10/04/2017 03:21 PM, Michael Niedermayer wrote:
 On Wed, Oct 04, 2017 at 10:58:19AM -0700, John Stebbins wrote:
> On 10/04/2017 10:13 AM, Michael Niedermayer wrote:
>> On Wed, Oct 04, 2017 at 08:18:59AM -0700, John Stebbins wrote:
>>> On 10/04/2017 03:50 AM, Michael Niedermayer wrote:
 On Fri, Sep 29, 2017 at 08:54:08AM -0700, John Stebbins wrote:
> When keyframe intervals of dash segments are not perfectly aligned,
> fragments in the stream can overlap in time. Append new "trun" index
> entries to the end of the index instead of sorting by timestamp.
> Sorting by timestamp causes packets to be read out of decode order and
> results in decode errors.
> ---
>  libavformat/mov.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/libavformat/mov.c b/libavformat/mov.c
> index 899690d920..c7422cd9ed 100644
> --- a/libavformat/mov.c
> +++ b/libavformat/mov.c
> @@ -4340,8 +4340,8 @@ static int mov_read_trun(MOVContext *c, 
> AVIOContext *pb, MOVAtom atom)
>MOV_FRAG_SAMPLE_FLAG_DEPENDS_YES));
>  if (keyframe)
>  distance = 0;
> -ctts_index = av_add_index_entry(st, offset, dts, 
> sample_size, distance,
> -keyframe ? AVINDEX_KEYFRAME 
> : 0);
> +ctts_index = add_index_entry(st, offset, dts, sample_size, 
> distance,
> + keyframe ? AVINDEX_KEYFRAME : 
> 0);
 can this lead to timestamps being out of order not just changing
 from strictly monotone to monotone ?

 Maybe iam missing somehing but out of order could/would cause problems
 with av_index_search_timestamp() and possibly others


>>> I'm not sure I understand the question.  But I think I can answer.  The 
>>> new fragment can start before the last fragment
>>> ends. I'll make a concrete example.  Lets say the new fragment's first 
>>> DTS is 10 frames before the end of the previous
>>> fragment. So the first DTS of the new fragment is before the timestamp 
>>> of 10 entries in the index from the previous
>>> fragment.  av_add_index_entry searches the existing index and inserts 
>>> the first sample of the new fragment in position
>>> nb_index_entries - 10 (and shifts the existing entries).  The next 9 
>>> samples of the new fragment get intermixed with the
>>> remaining 9 samples of the previous fragment, sorted by DTS. When the 
>>> samples are read out, you get samples from the
>>> last fragment and the new fragment interleaved together causing 
>>> decoding errors.
>>>
>>> Using add_index_entry will result in the timestamps in the index going 
>>> backwards by 10 frames at the fragment boundary
>>> in this example.  In the other patch that accompanied this one, I've 
>>> marked the samples from the new fragment that
>>> overlap previous samples with AVINDEX_DISCARD. 
>>> ff_index_search_timestamp appears to be AVINDEX_DISCARD aware.  So I
>>> think av_index_search_timestamp will do the right thing.
>> yes, that makes sense now.
>> Please correct me if iam wrong but then patch 1 would introduce a
>> issue that the 2nd fixes. So both patches should be merged to avoid
>> this
>>
>> But theres another problem, trun can be read out of order, when one
>> seeks around, so the next might have to be put elsewhere than after the
>> previous
>>
>> thanks
>>
> Hmm, can you describe the circumstances where this would happen.  I 
> looked at the seek code and can't see any way for it
> to seek to the middle somewhere without first reading previous trun.  It 
> looks to me like if avformat_seek_file or
> av_seek_frame fails to find the desired timestamp in the index it falls 
> back to seek_frame_generic which seeks to the
> position of the last sample in the index and performs av_read_frame until 
> it gets to the timestamp it wants.  Is there a
> path I've missed where it can skip to the middle of the file somehow?
 I used
 -rw-r- 1 michael michael 66908195 Dec 11  2015 buck480p30_na.mp4
 ./ffplay buck480p30_na.mp4

 (i can upload this if needed, i dont know where its from exactly)

 and when seeking around by using the right mouse buttonq it sometimes read
 trun chunks with lower times than previous (seen from the av_logs in
 there)

 I hope i made no mistake and would assume this happens with any file
 with these chunks

 

Re: [FFmpeg-devel] [PATCHv2] Makefile: generate stripped CLI tools directly instead of copying unstripped ones first

2017-10-09 Thread Marton Balint


On Sat, 7 Oct 2017, Marton Balint wrote:


Now works with --disable-stripping.

Signed-off-by: Marton Balint 
---
Makefile  | 4 
configure | 4 
2 files changed, 8 insertions(+)

diff --git a/Makefile b/Makefile
index 4a1253a052..642651d4cc 100644
--- a/Makefile
+++ b/Makefile
@@ -97,8 +97,12 @@ include $(SRC_PATH)/doc/examples/Makefile
libavcodec/utils.o libavformat/utils.o libavdevice/avdevice.o 
libavfilter/avfilter.o libavutil/utils.o libpostproc/postprocess.o 
libswresample/swresample.o libswscale/utils.o : libavutil/ffversion.h

$(PROGS): %$(PROGSSUF)$(EXESUF): %$(PROGSSUF)_g$(EXESUF)
+ifeq ($(STRIPTYPE),direct)
+   $(STRIP) -o $@ $<
+else
$(CP) $< $@
$(STRIP) $@
+endif

%$(PROGSSUF)_g$(EXESUF): $(FF_DEP_LIBS)
$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(OBJS-$*) $(FF_EXTRALIBS)
diff --git a/configure b/configure
index 391c141e7a..3b685b0d24 100755
--- a/configure
+++ b/configure
@@ -3361,6 +3361,7 @@ x86asmexe_default="nasm"
windres_default="windres"
nvcc_default="nvcc"
nvccflags_default="-gencode arch=compute_30,code=sm_30 -O2"
+striptype="direct"

# OS
target_os_default=$(tolower $(uname -s))
@@ -5087,6 +5088,7 @@ case $target_os in
;;
os/2*)
strip="lxlite -CS"
+striptype=""
objformat="aout"
add_cppflags -D_GNU_SOURCE
add_ldflags -Zomf -Zbin-files -Zargs-wild -Zhigh-mem -Zmap
@@ -6824,6 +6826,7 @@ fi # test "$quiet" != "yes"
test -e Makefile || echo "include $source_path/Makefile" > Makefile

enabled stripping || strip="echo skipping strip"
+enabled stripping || striptype=""

config_files="$TMPH ffbuild/config.mak doc/config.texi"

@@ -6867,6 +6870,7 @@ ARFLAGS=$arflags
AR_O=$ar_o
RANLIB=$ranlib
STRIP=$strip
+STRIPTYPE=$striptype
NVCC=$nvcc
CP=cp -p
LN_S=$ln_s
--


Will apply this tomorrow.

Regards,
Marton
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] libavcodec/hap : add HapAlphaOnly decoding/encoding

2017-10-09 Thread Carl Eugen Hoyos
2017-10-09 21:34 GMT+02:00 Martin Vignali :

>>> IMHO, from a compositing point of view, alpha only and gray is
>>> the same thing.
>>> It's common (in cgi compositing for example), to use alpha (or
>>> matte) as separate gray only file.
>>>
>> If this is what you believe, I don't think an RGBA representation
>> should be committed.
>>
> This is not about what i believe ! :-)

I think it is.
I asked you to explain why it is a good idea to map alpha only
to rgba and if this is what users expect. Your answer seems
to indicate that you share my feeling that it would be unexpected.

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] mov: fix decode of fragments that overlap in time

2017-10-09 Thread Carl Eugen Hoyos
2017-10-09 22:09 GMT+02:00 John Stebbins :

> +// This shouldn't happen
> +return NULL;

As in: This cannot happen and should be an assert()
or this does not happen for valid files and should print
an error?

Same below.

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/6] vaapi: Remove H.264 baseline profile

2017-10-09 Thread Carl Eugen Hoyos
2017-10-09 13:21 GMT+02:00 wm4 :

> Shouldn't trying to decode baseline video just fall back to sw decoding?

Given that software doesn't support the specific features of baseline either,
I don't think this helps.
I was under the impression the only sane thing to do when reading
baseline H.264 is printing a warning and continue as if constraint
baseline was detected.

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [V3 4/4] lavc/vaapi_decode: fix profile search when disable exact profile match.

2017-10-09 Thread Mark Thompson
On 09/10/17 12:52, Mark Thompson wrote:
> On 09/10/17 08:50, Jun Zhao wrote:
>>
>> From ef75f07943ff51c63bf7735a90c38a11559cad33 Mon Sep 17 00:00:00 2001
>> From: Jun Zhao 
>> Date: Thu, 21 Sep 2017 02:44:42 -0400
>> Subject: [V3 4/4] lavc/vaapi_decode: fix profile search when disable exact
>>  profile match.
>>
>> when disable exact profile, use the alt_profile for VAAPI HWAccel
>> decoder.
>>
>> Signed-off-by: Jun Zhao 
>> ---
>>  libavcodec/vaapi_decode.c | 4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
>> index 5a555b2bd3..27ef33837c 100644
>> --- a/libavcodec/vaapi_decode.c
>> +++ b/libavcodec/vaapi_decode.c
>> @@ -281,7 +281,7 @@ static int vaapi_decode_make_config(AVCodecContext 
>> *avctx)
>>  VAStatus vas;
>>  int err, i, j;
>>  const AVCodecDescriptor *codec_desc;
>> -VAProfile profile, *profile_list = NULL;
>> +VAProfile profile, va_profile, *profile_list = NULL;
>>  int profile_count, exact_match, alt_profile;
>>  const AVPixFmtDescriptor *sw_desc, *desc;
>>  
>> @@ -328,6 +328,7 @@ static int vaapi_decode_make_config(AVCodecContext 
>> *avctx)
>>  if (exact_match)
>>  break;
>>  alt_profile = vaapi_profile_map[i].codec_profile;
>> +va_profile = vaapi_profile_map[i].va_profile;
>>  }
>>  }
>>  av_freep(&profile_list);
>> @@ -347,6 +348,7 @@ static int vaapi_decode_make_config(AVCodecContext 
>> *avctx)
>>  av_log(avctx, AV_LOG_WARNING, "Using possibly-"
>> "incompatible profile %d instead.\n",
>> alt_profile);
>> +profile = va_profile;
>>  } else {
>>  av_log(avctx, AV_LOG_VERBOSE, "Codec %s profile %d not "
>> "supported for hardware decode.\n",
>> -- 
>> 2.11.0
>>
> 
> LGTM, will push later.

Patches 2 and 4 applied.

Thanks,

- Mark
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/6] vaapi: Remove H.264 baseline profile

2017-10-09 Thread Mark Thompson
On 09/10/17 22:48, Carl Eugen Hoyos wrote:
> 2017-10-09 13:21 GMT+02:00 wm4 :
> 
>> Shouldn't trying to decode baseline video just fall back to sw decoding?
> 
> Given that software doesn't support the specific features of baseline either,
> I don't think this helps.
> I was under the impression the only sane thing to do when reading
> baseline H.264 is printing a warning and continue as if constraint
> baseline was detected.

At least with software decode we would know that it has failed - an 
inconvenient property of many hardware decoders is their reluctance to report 
errors usefully or at all.

Hence the warning on using the ALLOW_PROFILE_MISMATCH flag, which does allow 
the attempt to decode baseline profile streams with hardware:
"""
If the stream is actually not supported then the behaviour is
undefined, and may include returning entirely incorrect output
while indicating success.
"""

I think the behaviour we have now with this option to continue if the user 
really wants (and is aware of the possible consequences) is correct.

Thanks,

- Mark
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] mov: fix decode of fragments that overlap in time

2017-10-09 Thread John Stebbins
On 10/09/2017 02:40 PM, Carl Eugen Hoyos wrote:
> 2017-10-09 22:09 GMT+02:00 John Stebbins :
>
>> +// This shouldn't happen
>> +return NULL;
> As in: This cannot happen and should be an assert()
> or this does not happen for valid files and should print
> an error?
>
> Same below.
>
>

In theory, there are a couple of scenarios where this could happen.  They all 
involve the moov being at the end of the
file after a moof. ffmpeg errors out with AVERROR_INVALIDDATA in several other 
scenarios if it detects this condition. 
E.g. in mov_read_trun and several other places, it does this:

    for (i = 0; i < c->fc->nb_streams; i++) {
    if (c->fc->streams[i]->id == frag->track_id) {
    st = c->fc->streams[i];
    break;
    }
    }
    if (!st) {
    av_log(c->fc, AV_LOG_ERROR, "could not find corresponding track id 
%u\n", frag->track_id);
    return AVERROR_INVALIDDATA;
    }

The spec says moov *should* come before moof, but does not require it. So this 
isn't technically invalid data.  But
ffmpeg doesn't currently support moov after moof, so I figured I was safe in 
not supporting it in this patch.  I could
add code similar to the above AVStream checking in a few key places (which 
would print an error) and that would make
those "shouldn't happen" into impossible conditions where an assert would be 
warranted.

-- 
John  GnuPG fingerprint: D0EC B3DB C372 D1F1 0B01  83F0 49F1 D7B2 60D4 D0F7




signature.asc
Description: OpenPGP digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH v2] Fix visual glitch with XvMC, caused by wrong idct permutation.

2017-10-09 Thread Ivan Kalvachev
On 10/9/17, Michael Niedermayer  wrote:
> On Mon, Oct 09, 2017 at 09:02:38AM -0400, Ronald S. Bultje wrote:
>> Hi,
>>
>> On Mon, Oct 9, 2017 at 6:46 AM, Ivan Kalvachev 
>> wrote:
>>
>> > On 10/9/17, Ronald S. Bultje  wrote:
>> > > On Sun, Oct 8, 2017 at 6:52 PM, Ivan Kalvachev 
>> > wrote:
>> > > [..]
>> > >
>> > > Indentation is off in the second hunk, can you fix that?
>> >
>> > You want it 4 spaces to the right

Done.

>>
>> Yes, please.
>>
>> BTW, I think it would be better to use "127" number.
>> >
>>
>> I don't really mind either way. The number 128 suggests it may have been
>> intended as a bitmask. Michael is probably better positioned to comment
>> on
>> this.
>
> I don't really remember but i think 128 was chosen for ABI
> compatibility with additions to it from libav. So it should no longer
> matter what values are used on additions

Then I'm using the next free number "24".

Please, commit when you think it is appropriate.

Best Regards
   Ivan Kalvachev
From 8842a69091b5eb5cf9b704b3ff504d21db4aad9b Mon Sep 17 00:00:00 2001
From: Ivan Kalvachev 
Date: Mon, 9 Oct 2017 01:25:00 +0300
Subject: [PATCH] Fix visual glitch with XvMC, caused by wrong idct
 permutation.

In the past XvMC forced simple_idct since
it was using FF_IDCT_PERM_NONE.
However now we have SIMD variants of simple_idct that
are using FF_IDCT_PERM_TRANSPOSE and if they are selected
XvMC would get coefficients in the wrong order.

The patch creates new FF_IDCT_NONE that
is used only for this kind of hardware decoding
and that fallbacks to the old C only simple idct.

Signed-off-by: Ivan Kalvachev 
---
 libavcodec/avcodec.h   | 1 +
 libavcodec/idctdsp.c   | 1 +
 libavcodec/mpeg12dec.c | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 52cc5b0ca..18c3e3ea1 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3146,6 +3146,7 @@ typedef struct AVCodecContext {
 #if FF_API_ARCH_ALPHA
 #define FF_IDCT_SIMPLEALPHA   23
 #endif
+#define FF_IDCT_NONE  24 /* Used by XvMC to extract IDCT coefficients with FF_IDCT_PERM_NONE */
 #define FF_IDCT_SIMPLEAUTO128
 
 /**
diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c
index d596aed1a..0122d29ef 100644
--- a/libavcodec/idctdsp.c
+++ b/libavcodec/idctdsp.c
@@ -279,6 +279,7 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
 c->perm_type = FF_IDCT_PERM_NONE;
 #endif /* CONFIG_FAANIDCT */
 } else { // accurate/default
+/* Be sure FF_IDCT_NONE will select this one, since it uses FF_IDCT_PERM_NONE */
 c->idct_put  = ff_simple_idct_put_8;
 c->idct_add  = ff_simple_idct_add_8;
 c->idct  = ff_simple_idct_8;
diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index 22c29c150..4e68be27f 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -1217,7 +1217,7 @@ static void setup_hwaccel_for_pixfmt(AVCodecContext *avctx)
 #endif
 )
 if (avctx->idct_algo == FF_IDCT_AUTO)
-avctx->idct_algo = FF_IDCT_SIMPLE;
+avctx->idct_algo = FF_IDCT_NONE;
 
 if (avctx->hwaccel && avctx->pix_fmt == AV_PIX_FMT_XVMC) {
 Mpeg1Context *s1 = avctx->priv_data;
-- 
2.14.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix crash if av_vdpau_bind_context() is not used.

2017-10-09 Thread Ivan Kalvachev
On 10/9/17, wm4  wrote:
> On Mon, 9 Oct 2017 03:04:53 +0300
> Ivan Kalvachev  wrote:
>
>> The public functions av_alloc_vdpaucontext() and
>> av_vdpau_alloc_context() are allocating AVVDPAUContext
>> structure that is supposed to be placed in avctx->hwaccel_context.
>>
>> However the rest of libavcodec/vdpau.c uses avctx->hwaccel_context
>> as struct VDPAUHWContext, that is bigger and does contain
>> AVVDPAUContext as first member.
>>
>> The usage includes write to the new variables in the bigger stuct,
>> without checking for block size.
>>
>> Fix by always allocating the bigger structure.
>>
>> BTW,
>> I have no idea why the new fields haven't simply been added to the
>> existing struct...
>> It seems that the programmer who wrote this has been aware of the problem,
>> because av_vdpau_bind_context reallocates the structure.
>>
>> It might be good idea to check the other usages of this reallocation
>> function.
>>
>> Best Regards
>>Ivan Kalvachev
>
> IMO not really worth fixing at this point, because this is the old-old
> vdpau API. Even av_vdpau_bind_context() (which does not require using
> av_alloc_vdpaucontext()) is deprecated. Or rather should be - I just
> haven't bothered deprecating it because the deprecation dance is too
> messy. In any case, you shouldn't use any of those APIs - use the
> generic hwaccel API instead (setting hw_frames_ctx or hw_device_ctx).

Every bug must be fixed, even if the code is going to be removed next.

Since you "didn't bother" to deprecate it, this code will remain even after
the API bump. And it is still (mis)used by at least one program that
crashed on me.

So it MUST be fixed.

Feel free at any time to mark it as deprecated
and set a deprecation target.


Best Regards
   Ivan Kalvachev
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] fate: add PERSIST_RPARAM_A_RExt_Sony_3 hevc conformance test

2017-10-09 Thread James Almer
The PERSIST_RPARAM_A_RExt_Sony_1 bitstream has an out-of-range value
and has therefore been superseded.
It is otherwise identical, and decodes the same.

Signed-off-by: James Almer 
---
 tests/fate/hevc.mak| 3 +--
 ...RAM_A_RExt_Sony_1 => hevc-conformance-PERSIST_RPARAM_A_RExt_Sony_3} | 0
 2 files changed, 1 insertion(+), 2 deletions(-)
 rename tests/ref/fate/{hevc-conformance-PERSIST_RPARAM_A_RExt_Sony_1 => 
hevc-conformance-PERSIST_RPARAM_A_RExt_Sony_3} (100%)

diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak
index 2e798eca60..423b0630c7 100644
--- a/tests/fate/hevc.mak
+++ b/tests/fate/hevc.mak
@@ -167,7 +167,7 @@ HEVC_SAMPLES_444_8BIT = \
 
 HEVC_SAMPLES_444_12BIT =\
 IPCM_B_RExt_NEC \
-PERSIST_RPARAM_A_RExt_Sony_1\
+PERSIST_RPARAM_A_RExt_Sony_3\
 SAO_A_RExt_MediaTek_1   \
 
 
@@ -182,7 +182,6 @@ HEVC_SAMPLES_444_12BIT =\
 
 # do not pass:
 # TSUNEQBD_A_MAIN10_Technicolor_2.bit (segfault mix 9-10bits)
-# PERSIST_RPARAM_A_RExt_Sony_1 (rext)
 
 
 define FATE_HEVC_TEST
diff --git a/tests/ref/fate/hevc-conformance-PERSIST_RPARAM_A_RExt_Sony_1 
b/tests/ref/fate/hevc-conformance-PERSIST_RPARAM_A_RExt_Sony_3
similarity index 100%
rename from tests/ref/fate/hevc-conformance-PERSIST_RPARAM_A_RExt_Sony_1
rename to tests/ref/fate/hevc-conformance-PERSIST_RPARAM_A_RExt_Sony_3
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] build: prevent SDL2 from polluting global cflags and extralibs

2017-10-09 Thread James Almer
On 10/9/2017 8:27 AM, wm4 wrote:
> On Sun,  8 Oct 2017 17:16:21 -0300
> James Almer  wrote:
> 
>> Remove the SDL_main define from the global cflags but not from the
>> ffplay cflags, and the -mwindows linker option from extralibs instead
>> of overriding it with the addition of -mconsole.
>>
>> Signed-off-by: James Almer 
>> ---
>>  configure   | 4 ++--
>>  ffbuild/library.mak | 1 -
>>  2 files changed, 2 insertions(+), 3 deletions(-)
>>
>> diff --git a/configure b/configure
>> index 77c9a18c3c..0fdf4bcbe3 100755
>> --- a/configure
>> +++ b/configure
>> @@ -6107,10 +6107,10 @@ if enabled sdl2; then
>>  enable sdl2
>>  fi
>>  if test $target_os = "mingw32"; then
>> -sdl2_extralibs="$sdl2_extralibs -mconsole"
>> +sdl2_extralibs=$(filter_out '-mwindows' $sdl2_extralibs)
>>  fi
>>  fi
>> -enabled sdl2 && add_cflags $sdl2_cflags && add_extralibs $sdl2_extralibs
>> +enabled sdl2 && add_cflags $(filter_out '-Dmain=SDL_main' $sdl2_cflags) && 
>> add_extralibs $sdl2_extralibs
>>  
>>  if enabled decklink; then
>>  case $target_os in
>> diff --git a/ffbuild/library.mak b/ffbuild/library.mak
>> index ee19c3c797..4191edcf9c 100644
>> --- a/ffbuild/library.mak
>> +++ b/ffbuild/library.mak
>> @@ -16,7 +16,6 @@ all-$(CONFIG_SHARED): $(SUBDIR)$(SLIBNAME) 
>> $(SUBDIR)lib$(FULLNAME).pc
>>  
>>  LIBOBJS := $(OBJS) $(SUBDIR)%.h.o $(TESTOBJS)
>>  $(LIBOBJS) $(LIBOBJS:.o=.s) $(LIBOBJS:.o=.i):   CPPFLAGS += 
>> -DHAVE_AV_CONFIG_H
>> -$(TESTOBJS) $(TESTOBJS:.o=.i): CFLAGS += -Umain
>>  
>>  $(SUBDIR)$(LIBNAME): $(OBJS)
>>  $(RM) $@
> 
> LGTM, though I'd argue dropping SDL instead.

Pushed, thanks.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] warning: unknown warning option '-Wno-bool-operation'; did you mean '-Wno-bool-conversion'? [-Wunknown-warning-option]

2017-10-09 Thread Helmut K. C. Tessarek
I get thousands of these warnings when compiling ffmpeg:

warning: unknown warning option '-Wno-bool-operation'; did you mean
'-Wno-bool-conversion'? [-Wunknown-warning-option]

I compile on Mac OSX 10.11.6 with Apple LLVM version 8.0.0
(clang-800.0.42.1).

-- 
regards Helmut K. C. Tessarek  KeyID 0xF7832007C11F128D
Key fingerprint = 28A3 1666 4FE8 D72C CFD5 8B23 F783 2007 C11F 128D

/*
   Thou shalt not follow the NULL pointer for chaos and madness
   await thee at its end.
*/
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] libavcodec/proresdec : add qmat dsp with SSE2, AVX2 simd

2017-10-09 Thread Ivan Kalvachev
On 10/9/17, Martin Vignali  wrote:
> 2017-10-07 18:16 GMT+02:00 Ronald S. Bultje :
>
>> Hi Martin,
>>
>> On Sat, Oct 7, 2017 at 11:49 AM, Martin Vignali 
>> wrote:
>>
>> > 2017-10-07 17:30 GMT+02:00 Ronald S. Bultje :
>> > > On Sat, Oct 7, 2017 at 10:22 AM, Martin Vignali <
>> > martin.vign...@gmail.com>
>> > > wrote:
>> > > > Patch in attach add a new dsp
>> > > > for manipulation of qmat
>> > > >
>> > > > for now, i move this code inside
>> > > >
>> > > > for (i = 0; i < 64; i++) {
>> > > > qmat_luma_scaled  [i] = ctx->qmat_luma  [i] * qscale;
>> > > > qmat_chroma_scaled[i] = ctx->qmat_chroma[i] * qscale;
>> > > > }
>> > > >
>> > > > i add a special case for qscale == 1
>> > > > and SSE2, AVX2 optimization
>> > >
>> > > This loop only executes once per slice. We typically do not
>> SIMD-optimize
>> > > at that level, because it won't give significant speed gains...
>> >
>> > Ok didn't know that.
>> > I mostly follow, what there are already done, like in
>> blockdsp.clear_block
>> >
>>
>> Right, so consider that blockdsp is done per block (16x16 pixels), not per
>> slice.
>>
> Ok on principle (only improve, a func which is called quite often)

It's more of:  We can't refuse code that makes a measurable improvement.

Also have in mind that compilers are getting smarter and this code is
good target for auto-vectorization. Of course FFmpeg disables is,
because of long history of compiler bugs related to it.

>> You could remove this entirely from the slice processing code by simply
>> pre-calculating the values in the init function once for the whole stream,
>> there's only 224 qscale values so it's 224*64*2 multiplications, which is
>> (in the context of prores) virtually negligible.
>>
>
> Not sure, we can do that for prores decoder
> the qmat seems to be set on the decode frame header func
> (based on the header of the frame).

You can at least check if the qscale has changed and avoid recalculation.
I think that the lgpl decoder does that.

Best Regards
   Ivan Kalvachev
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix crash if av_vdpau_bind_context() is not used.

2017-10-09 Thread James Almer
On 10/9/2017 8:30 AM, wm4 wrote:
> On Mon, 9 Oct 2017 03:04:53 +0300
> Ivan Kalvachev  wrote:
> 
>> The public functions av_alloc_vdpaucontext() and
>> av_vdpau_alloc_context() are allocating AVVDPAUContext
>> structure that is supposed to be placed in avctx->hwaccel_context.
>>
>> However the rest of libavcodec/vdpau.c uses avctx->hwaccel_context
>> as struct VDPAUHWContext, that is bigger and does contain
>> AVVDPAUContext as first member.
>>
>> The usage includes write to the new variables in the bigger stuct,
>> without checking for block size.
>>
>> Fix by always allocating the bigger structure.
>>
>> BTW,
>> I have no idea why the new fields haven't simply been added to the
>> existing struct...
>> It seems that the programmer who wrote this has been aware of the problem,
>> because av_vdpau_bind_context reallocates the structure.
>>
>> It might be good idea to check the other usages of this reallocation 
>> function.
>>
>> Best Regards
>>Ivan Kalvachev
> 
> IMO not really worth fixing at this point, because this is the old-old
> vdpau API. Even av_vdpau_bind_context() (which does not require using
> av_alloc_vdpaucontext()) is deprecated. Or rather should be - I just
> haven't bothered deprecating it because the deprecation dance is too
> messy. In any case, you shouldn't use any of those APIs - use the
> generic hwaccel API instead (setting hw_frames_ctx or hw_device_ctx).

av_vdpau_alloc_context() is not deprecated, neither here on in libav.
APIChanges only has a mention that av_vdpau_bind_context() should be
used instead and that's it. It doesn't even seem to mention that neither
should be used at all in favor of the hwaccel API.

If all these are truly deprecated, then I'd recommend doing the dance
asap to the clock may start ticking.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] ffmpeg.c: Fallback to duration_dts, when duration_pts can't be determined.

2017-10-09 Thread Sasi Inguva
This is required for FLV files, for which duration_pts comes out to be zero.

Signed-off-by: Sasi Inguva 
---
 fftools/ffmpeg.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 6d64bc1043..5f373db847 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -2665,6 +2665,9 @@ static int process_input_packet(InputStream *ist, const 
AVPacket *pkt, int no_eo
 ist->next_dts = AV_NOPTS_VALUE;
 }
 
+if (duration_pts == 0)
+duration_pts = duration_dts;
+
 if (got_output)
 ist->next_pts += av_rescale_q(duration_pts, 
ist->st->time_base, AV_TIME_BASE_Q);
 break;
-- 
2.14.2.920.gcf0c67979c-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 00/20] Coded bitstream editing (v3)

2017-10-09 Thread Jun Zhao


On 2017/10/9 19:26, wm4 wrote:
> On Sun,  8 Oct 2017 21:01:34 +0100
> Mark Thompson  wrote:
>
>> Incorporating all review comments from last time:
>> * Change all CBS users to hold a pointer rather than the whole structure.
>> * Rearrange the MPEG-2 framerate stuff so that it doesn't add code and then 
>> remove it in the series.
>> * Add a type for the coded bitstream unit type.
>> * Miscellaneous fixups.
>>
> Patches 1-20 LGTM.
+1
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] udp: added option to ignore empty UDP packets

2017-10-09 Thread Daniel Kučera
Waiting for review.

Dňa 6. 10. 2017 10:32 dopoludnia používateľ "Daniel Kučera" <
daniel.kuc...@gmail.com> napísal:

> > And since empty packets are valid and can be used by applications (and
> > are actually used by protocols out there), the workaround of dropping
> > them is not acceptable.
> >
>
> I'm not sure if you mean this patch is unacceptable but if so, I want
> to note, that this patch is not the same as I submitted before: this
> one adds cmdlne option to ignore empty packets and it doesn't ignore
> them when not explicitly enabled.
>
>
> --
>
> S pozdravom / Best regards
> Daniel Kucera.
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avformat/srt: add Haivision Open SRT protocol

2017-10-09 Thread Nablet Developer
protocol requires libsrt (https://github.com/Haivision/srt) to be
installed

Signed-off-by: Nablet Developer 
---
 configure   |5 +
 libavformat/Makefile|1 +
 libavformat/opensrt.c   | 1105 +++
 libavformat/protocols.c |1 +
 libavformat/url.h   |4 +
 5 files changed, 1116 insertions(+)
 create mode 100644 libavformat/opensrt.c

diff --git a/configure b/configure
index 391c141..312c632 100755
--- a/configure
+++ b/configure
@@ -293,6 +293,7 @@ External library support:
   --enable-opengl  enable OpenGL rendering [no]
   --enable-openssl enable openssl, needed for https support
if gnutls is not used [no]
+  --enable-opensrt enable Haivision Open SRT protoco [no]
   --disable-sndio  disable sndio support [autodetect]
   --disable-schannel   disable SChannel SSP, needed for TLS support on
Windows if openssl and gnutls are not used 
[autodetect]
@@ -1638,6 +1639,7 @@ EXTERNAL_LIBRARY_LIST="
 openal
 opencl
 opengl
+opensrt
 "
 
 HWACCEL_AUTODETECT_LIBRARY_LIST="
@@ -3144,6 +3146,8 @@ libsmbclient_protocol_deps="libsmbclient gplv3"
 libssh_protocol_deps="libssh"
 mmsh_protocol_select="http_protocol"
 mmst_protocol_select="network"
+opensrt_protocol_select="network"
+opensrt_protocol_deps="opensrt"
 rtmp_protocol_conflict="librtmp_protocol"
 rtmp_protocol_select="tcp_protocol"
 rtmpe_protocol_select="ffrtmpcrypt_protocol"
@@ -6063,6 +6067,7 @@ enabled omx_rpi   && { check_header OMX_Core.h ||
{ ! enabled cross_compile && add_cflags 
-isystem/opt/vc/include/IL && check_header OMX_Core.h ; } ||
die "ERROR: OpenMAX IL headers not found"; }
 enabled omx   && require_header OMX_Core.h
+enabled opensrt   && require_pkg_config libsrt srt srt/srt.h srt_socket
 enabled openssl   && { use_pkg_config openssl openssl openssl/ssl.h 
OPENSSL_init_ssl ||
use_pkg_config openssl openssl openssl/ssl.h 
SSL_library_init ||
check_lib openssl openssl/ssl.h 
SSL_library_init -lssl -lcrypto ||
diff --git a/libavformat/Makefile b/libavformat/Makefile
index df709c29..a3cbb4e 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -593,6 +593,7 @@ OBJS-$(CONFIG_TLS_SCHANNEL_PROTOCOL) += tls_schannel.o 
tls.o
 OBJS-$(CONFIG_TLS_SECURETRANSPORT_PROTOCOL) += tls_securetransport.o tls.o
 OBJS-$(CONFIG_UDP_PROTOCOL)  += udp.o
 OBJS-$(CONFIG_UDPLITE_PROTOCOL)  += udp.o
+OBJS-$(CONFIG_OPENSRT_PROTOCOL)  += opensrt.o
 OBJS-$(CONFIG_UNIX_PROTOCOL) += unix.o
 
 # libavdevice dependencies
diff --git a/libavformat/opensrt.c b/libavformat/opensrt.c
new file mode 100644
index 000..6483942
--- /dev/null
+++ b/libavformat/opensrt.c
@@ -0,0 +1,1105 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Haivision Open SRT (Secure Reliable Transport) protocol
+ */
+
+#define _DEFAULT_SOURCE
+#define _BSD_SOURCE /* Needed for using struct ip_mreq with recent glibc */
+
+#include "avformat.h"
+#include "avio_internal.h"
+#include "libavutil/avassert.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/fifo.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/avstring.h"
+#include "libavutil/opt.h"
+#include "libavutil/log.h"
+#include "libavutil/time.h"
+#include "internal.h"
+#include "network.h"
+#include "os_support.h"
+#include "url.h"
+
+#ifdef __APPLE__
+#include "TargetConditionals.h"
+#endif
+
+#include 
+#include 
+
+
+#if HAVE_PTHREAD_CANCEL
+#include 
+#endif
+
+#ifndef HAVE_PTHREAD_CANCEL
+#define HAVE_PTHREAD_CANCEL 0
+#endif
+
+#ifndef IPV6_ADD_MEMBERSHIP
+#define IPV6_ADD_MEMBERSHIP IPV6_JOIN_GROUP
+#define IPV6_DROP_MEMBERSHIP IPV6_LEAVE_GROUP
+#endif
+
+#define UDP_TX_BUF_SIZE 32768
+#define UDP_MAX_PKT_SIZE 65536
+#define UDP_HEADER_SIZE 8
+
+typedef struct SRTContext {
+const AVClass *class;
+SRTSOCKET srt_fd;
+int buffer_size;
+int pkt_size;
+int local_port;
+int reuse_socket;
+int overrun_nonfatal;
+struct sockaddr_storage dest_addr;
+int dest_addr_le