Re: [FFmpeg-devel] [PATCH] avutil/crc: use ff_thread_once at av_crc_get_table

2017-10-30 Thread Muhammad Faiz
On Tue, Oct 24, 2017 at 4:31 PM, Muhammad Faiz  wrote:
> Fix tsan warnings.
>
> Signed-off-by: Muhammad Faiz 
> ---
>  libavutil/crc.c | 49 +
>  1 file changed, 29 insertions(+), 20 deletions(-)

Ping.

My recent benchmark:

old/new:
 296320/ 340400 decicycles in AV_CRC_8_ATM,   1 runs,  0 skips
   1180/   2040 decicycles in AV_CRC_8_ATM,   1 runs,  0 skips
830/   1460 decicycles in AV_CRC_8_ATM,   2 runs,  0 skips
620/995 decicycles in AV_CRC_8_ATM,   4 runs,  0 skips
505/797 decicycles in AV_CRC_8_ATM,   8 runs,  0 skips
487/653 decicycles in AV_CRC_8_ATM,  16 runs,  0 skips
448/546 decicycles in AV_CRC_8_ATM,  32 runs,  0 skips
435/531 decicycles in AV_CRC_8_ATM,  64 runs,  0 skips
445/479 decicycles in AV_CRC_8_ATM, 128 runs,  0 skips
454/451 decicycles in AV_CRC_8_ATM, 256 runs,  0 skips
469/436 decicycles in AV_CRC_8_ATM, 512 runs,  0 skips
459/429 decicycles in AV_CRC_8_ATM,1024 runs,  0 skips
467/425 decicycles in AV_CRC_8_ATM,2048 runs,  0 skips

 137180/ 118440 decicycles in AV_CRC_16_ANSI,   1 runs,  0 skips
720/   1140 decicycles in AV_CRC_16_ANSI,   1 runs,  0 skips
550/880 decicycles in AV_CRC_16_ANSI,   2 runs,  0 skips
515/700 decicycles in AV_CRC_16_ANSI,   4 runs,  0 skips
550/560 decicycles in AV_CRC_16_ANSI,   8 runs,  0 skips
508/515 decicycles in AV_CRC_16_ANSI,  16 runs,  0 skips
493/520 decicycles in AV_CRC_16_ANSI,  32 runs,  0 skips
482/515 decicycles in AV_CRC_16_ANSI,  64 runs,  0 skips
482/468 decicycles in AV_CRC_16_ANSI, 128 runs,  0 skips
481/443 decicycles in AV_CRC_16_ANSI, 256 runs,  0 skips
467/429 decicycles in AV_CRC_16_ANSI, 512 runs,  0 skips
474/422 decicycles in AV_CRC_16_ANSI,1024 runs,  0 skips
469/425 decicycles in AV_CRC_16_ANSI,2048 runs,  0 skips

 124720/ 104060 decicycles in AV_CRC_16_CCITT,   1 runs,  0 skips
460/860 decicycles in AV_CRC_16_CCITT,   1 runs,  0 skips
440/740 decicycles in AV_CRC_16_CCITT,   2 runs,  0 skips
505/660 decicycles in AV_CRC_16_CCITT,   4 runs,  0 skips
472/605 decicycles in AV_CRC_16_CCITT,   8 runs,  0 skips
486/561 decicycles in AV_CRC_16_CCITT,  16 runs,  0 skips
466/557 decicycles in AV_CRC_16_CCITT,  32 runs,  0 skips
489/546 decicycles in AV_CRC_16_CCITT,  64 runs,  0 skips
496/552 decicycles in AV_CRC_16_CCITT, 128 runs,  0 skips
459/522 decicycles in AV_CRC_16_CCITT, 256 runs,  0 skips
447/493 decicycles in AV_CRC_16_CCITT, 512 runs,  0 skips
468/469 decicycles in AV_CRC_16_CCITT,1024 runs,  0 skips
477/454 decicycles in AV_CRC_16_CCITT,2048 runs,  0 skips

 123500/ 102600 decicycles in AV_CRC_32_IEEE,   1 runs,  0 skips
460/   1060 decicycles in AV_CRC_32_IEEE,   1 runs,  0 skips
420/850 decicycles in AV_CRC_32_IEEE,   2 runs,  0 skips
415/685 decicycles in AV_CRC_32_IEEE,   4 runs,  0 skips
407/557 decicycles in AV_CRC_32_IEEE,   8 runs,  0 skips
402/540 decicycles in AV_CRC_32_IEEE,  16 runs,  0 skips
428/491 decicycles in AV_CRC_32_IEEE,  32 runs,  0 skips
452/501 decicycles in AV_CRC_32_IEEE,  64 runs,  0 skips
466/507 decicycles in AV_CRC_32_IEEE, 128 runs,  0 skips
474/506 decicycles in AV_CRC_32_IEEE, 256 runs,  0 skips
478/482 decicycles in AV_CRC_32_IEEE, 512 runs,  0 skips
476/454 decicycles in AV_CRC_32_IEEE,1024 runs,  0 skips
472/435 decicycles in AV_CRC_32_IEEE,2048 runs,  0 skips

 133800/ 110660 decicycles in AV_CRC_32_IEEE_LE,   1 runs,  0 skips
460/   1060 decicycles in AV_CRC_32_IEEE_LE,   1 runs,  0 skips
560/820 decicycles in AV_CRC_32_IEEE_LE,   2 runs,  0 skips
530/665 decicycles in AV_CRC_32_IEEE_LE,   4 runs,  0 skips
502/547 decicycles in AV_CRC_32_IEEE_LE,   8 runs,  0 skips
498/533 decicycles in AV_CRC_32_IEEE_LE,  16 runs,  0 skips
481/533 decicycles in AV_CRC_32_IEEE_LE,  32 runs,  0 skips
493/542 decicycles in AV_CRC_32_IEEE_LE,  64 runs,  0 skips
494/489 decicycles in AV_CRC_32_IEEE_LE, 128 runs,  0 skips
500/461 decicycles in AV_CRC_32_IEEE_LE, 256 runs,  0 skips
493/446 decicycles in AV_CRC_32_IEEE_LE, 512 runs,  0 skips
498/439 decicycles in AV_CRC_32_IEEE_LE,1024 runs,  0 skips
482/436 de

Re: [FFmpeg-devel] [RFC]lswr/rematrix: Support s32p

2017-10-30 Thread Muhammad Faiz
On Sun, Oct 29, 2017 at 3:55 AM, Carl Eugen Hoyos  wrote:
> Hi!
>
> Attached patch fixes a random testcase for ticket #6785 here but I
> don't know if this is the correct fix.
>
> Please review, Carl Eugen
>
> From a93b9309d74f5eadece371ee1e682d266af6cd83 Mon Sep 17 00:00:00 2001
> From: Carl Eugen Hoyos 
> Date: Sat, 28 Oct 2017 22:52:02 +0200
> Subject: [PATCH] lswr/rematrix: Support s32p.
>
> Fixes ticket #6785.
> ---
>  libswresample/rematrix.c |   21 +++--
>  1 file changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
> index 66a43c1..a6aa6b0 100644
> --- a/libswresample/rematrix.c
> +++ b/libswresample/rematrix.c
> @@ -445,14 +445,23 @@ av_cold int swri_rematrix_init(SwrContext *s){
>  s->mix_2_1_f = (mix_2_1_func_type*)sum2_double;
>  s->mix_any_f = (mix_any_func_type*)get_mix_any_func_double(s);
>  }else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){
> -// Only for dithering currently
> -// s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double));
> -s->native_one= av_mallocz(sizeof(int));
> +s->native_one= av_mallocz(sizeof(int64_t));
>  if (!s->native_one)
>  return AVERROR(ENOMEM);
> -// for (i = 0; i < nb_out; i++)
> -// for (j = 0; j < nb_in; j++)
> -// ((double*)s->native_matrix)[i * nb_in + j] = 
> s->matrix[i][j];
> +s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int));
> +if (!s->native_matrix) {
> +av_freep(&s->native_one);
> +return AVERROR(ENOMEM);
> +}
> +for (i = 0; i < nb_out; i++) {
> +double rem = 0;
> +
> +for (j = 0; j < nb_in; j++) {
> +double target = s->matrix[i][j] * 32768 + rem;
> +((int*)s->native_matrix)[i * nb_in + j] = lrintf(target);
> +rem += target - ((int64_t*)s->native_matrix)[i * nb_in + j];
> +}
> +}
>  *((int*)s->native_one) = 32768;
>  s->mix_1_1_f = (mix_1_1_func_type*)copy_s32;
>  s->mix_2_1_f = (mix_2_1_func_type*)sum2_s32;

The code is confusing.
Which is the type of native_matrix and native_one? int or int64_t?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avdevice: remove dangling parenthesis from 2245476e5c45

2017-10-30 Thread Ricardo Constantino
---
 libavdevice/decklink_dec.cpp| 2 +-
 libavdevice/libndi_newtek_dec.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavdevice/decklink_dec.cpp b/libavdevice/decklink_dec.cpp
index e786dd85dc..191547ff10 100644
--- a/libavdevice/decklink_dec.cpp
+++ b/libavdevice/decklink_dec.cpp
@@ -959,7 +959,7 @@ av_cold int ff_decklink_read_header(AVFormatContext *avctx)
 
 st->time_base.den  = ctx->bmd_tb_den;
 st->time_base.num  = ctx->bmd_tb_num;
-st->r_frame_rate   = av_make_q(st->time_base.den, st->time_base.num));
+st->r_frame_rate   = av_make_q(st->time_base.den, st->time_base.num);
 
 switch((BMDPixelFormat)cctx->raw_format) {
 case bmdFormat8BitYUV:
diff --git a/libavdevice/libndi_newtek_dec.c b/libavdevice/libndi_newtek_dec.c
index 5bbaa39786..2f14f4b03f 100644
--- a/libavdevice/libndi_newtek_dec.c
+++ b/libavdevice/libndi_newtek_dec.c
@@ -189,7 +189,7 @@ static int ndi_create_video_stream(AVFormatContext *avctx, 
NDIlib_video_frame_t
 }
 
 st->time_base   = NDI_TIME_BASE_Q;
-st->r_frame_rate= av_make_q(v->frame_rate_N, 
v->frame_rate_D));
+st->r_frame_rate= av_make_q(v->frame_rate_N, 
v->frame_rate_D);
 
 tmp = av_mul_q(av_d2q(v->picture_aspect_ratio, INT_MAX), 
(AVRational){v->yres, v->xres});
 av_reduce(&st->sample_aspect_ratio.num, &st->sample_aspect_ratio.den, 
tmp.num, tmp.den, 1000);
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] lavfi/testsrc2: fix hang with very small sizes.

2017-10-30 Thread Nicolas George
Move a subtraction to the other side of the equal to avoid overflows.

Signed-off-by: Nicolas George 
---
 libavfilter/vsrc_testsrc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vsrc_testsrc.c b/libavfilter/vsrc_testsrc.c
index fe0d50aa41..a790974d14 100644
--- a/libavfilter/vsrc_testsrc.c
+++ b/libavfilter/vsrc_testsrc.c
@@ -857,8 +857,8 @@ static void test2_fill_picture(AVFilterContext *ctx, 
AVFrame *frame)
 uint8_t alpha[256];
 
 r = s->pts;
-for (y = ymin; y < ymax - 15; y += 16) {
-for (x = xmin; x < xmax - 15; x += 16) {
+for (y = ymin; y + 15 < ymax; y += 16) {
+for (x = xmin; x + 15 < xmax; x += 16) {
 if ((x ^ y) & 16)
 continue;
 for (i = 0; i < 256; i++) {
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] libavfilter/vsrc_testsrc : fix hang with testsrc2 and small size

2017-10-30 Thread Nicolas George
Le septidi 7 brumaire, an CCXXVI, Martin Vignali a écrit :
> Not sure i understand, can you explain a little bit more ?

This was a quick guess make sure the values are positive before
manipulating them instead of making far-reaching changes. As it happens,
there was an even simpler fix, see the patch I just sent.

Regards,

-- 
  Nicolas George


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH v4 2/2] rtp: rfc4175: add handler for YCbCr-4:2:2

2017-10-30 Thread Éloi Bail
> I think you misunderstood "unpublished" for some other word. Those
> specs are actually available for anyone that wants to read them (if
> they are willing to pay for them). 2110-20 isn't available on any of
> the usual sources for obtaining SMPTE standards documents.

> - Hendrik

Hi Hendrik. 

Yes I see. SMPTE 2110-20 should be published very soon. I would suggest then to 
add "partial support of RFC 4175: RTP Payload Format for Uncompressed Video ") 
. 
I don't want to be pushy claiming the modification of the changelog. The goal 
is to get people contributing on this topic. 

Eloi 
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/mips: Improve avc chroma avg hv mc msa functions

2017-10-30 Thread Manojkumar Bhosale
LGTM

-Original Message-
From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf Of 
kaustubh.ra...@imgtec.com
Sent: Friday, October 27, 2017 5:03 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Kaustubh Raste
Subject: [FFmpeg-devel] [PATCH] avcodec/mips: Improve avc chroma avg hv mc msa 
functions

From: Kaustubh Raste 

Replace generic with block size specific function.
Load the specific destination bytes instead of MSA load and pack.

Signed-off-by: Kaustubh Raste 
---
 libavcodec/mips/h264chroma_msa.c |  438 +-
 1 file changed, 238 insertions(+), 200 deletions(-)

diff --git a/libavcodec/mips/h264chroma_msa.c b/libavcodec/mips/h264chroma_msa.c
index a5c3334..4c25761 100644
--- a/libavcodec/mips/h264chroma_msa.c
+++ b/libavcodec/mips/h264chroma_msa.c
@@ -1408,15 +1408,15 @@ static void avc_chroma_vt_and_aver_dst_8w_msa(uint8_t 
*src, uint8_t *dst,
 }
 }
 
-static void avc_chroma_hv_and_aver_dst_2x2_msa(uint8_t *src, int32_t 
src_stride,
-   uint8_t *dst, int32_t 
dst_stride,
+static void avc_chroma_hv_and_aver_dst_2x2_msa(uint8_t *src, uint8_t *dst,
+   int32_t stride,
uint32_t coef_hor0,
uint32_t coef_hor1,
uint32_t coef_ver0,
uint32_t coef_ver1)  {
 uint16_t out0, out1;
-v16u8 dst0, dst1;
+v16u8 dst0 = { 0 };
 v16u8 src0, src1, src2;
 v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
 v16i8 res, mask;
@@ -1428,8 +1428,11 @@ static void avc_chroma_hv_and_aver_dst_2x2_msa(uint8_t 
*src, int32_t src_stride,
 
 mask = LD_SB(&chroma_mask_arr[48]);
 
-LD_UB3(src, src_stride, src0, src1, src2);
-LD_UB2(dst, dst_stride, dst0, dst1);
+LD_UB3(src, stride, src0, src1, src2);
+out0 = LH(dst);
+out1 = LH(dst + stride);
+dst0 = (v16u8) __msa_insert_h((v8i16) dst0, 0, out0);
+dst0 = (v16u8) __msa_insert_h((v8i16) dst0, 1, out1);
 VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
 DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
 MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1); @@ 
-1438,67 +1441,26 @@ static void avc_chroma_hv_and_aver_dst_2x2_msa(uint8_t 
*src, int32_t src_stride,
 res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
 res_vt0 = __msa_sat_u_h(res_vt0, 7);
 res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
-dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
 dst0 = __msa_aver_u_b((v16u8) res, dst0);
 out0 = __msa_copy_u_h((v8i16) dst0, 0);
 out1 = __msa_copy_u_h((v8i16) dst0, 1);
 
 SH(out0, dst);
-dst += dst_stride;
+dst += stride;
 SH(out1, dst);
 }
 
-static void avc_chroma_hv_and_aver_dst_2x4_msa(uint8_t *src, int32_t 
src_stride,
-   uint8_t *dst, int32_t 
dst_stride,
+static void avc_chroma_hv_and_aver_dst_2x4_msa(uint8_t *src, uint8_t *dst,
+   int32_t stride,
uint32_t coef_hor0,
uint32_t coef_hor1,
uint32_t coef_ver0,
uint32_t coef_ver1)  {
+uint16_t tp0, tp1, tp2, tp3;
 v16u8 src0, src1, src2, src3, src4;
 v16u8 tmp0, tmp1, tmp2, tmp3;
-v16u8 dst0, dst1, dst2, dst3;
-v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
-v16i8 res, mask;
-v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
-v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
-v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
-v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
-v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
-
-mask = LD_SB(&chroma_mask_arr[48]);
-
-LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
-LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
-VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3);
-ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
-DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
-MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
-
-res_vt0 += res_vt1;
-res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
-res_vt0 = __msa_sat_u_h(res_vt0, 7);
-res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
-
-dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
-dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 2, (v8i16) dst2);
-dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 3, (v8i16) dst3);
-dst0 = __msa_aver_u_b((v16u8) res, dst0);
-
-ST2x4_UB(dst0, 0, dst, dst_stride);
-}
-
-static void avc_chr

Re: [FFmpeg-devel] [PATCH] avcodec/mips: Improve avc avg mc 22, 11, 31, 13 and 33 msa functions

2017-10-30 Thread Manojkumar Bhosale
LGTM

-Original Message-
From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf Of 
kaustubh.ra...@imgtec.com
Sent: Friday, October 27, 2017 5:01 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Kaustubh Raste
Subject: [FFmpeg-devel] [PATCH] avcodec/mips: Improve avc avg mc 22, 11, 31, 13 
and 33 msa functions

From: Kaustubh Raste 

Remove loops and unroll as block sizes are known.
Load the specific destination bytes instead of MSA load and pack.
Remove unused macro and functions.

Signed-off-by: Kaustubh Raste 
---
 libavcodec/mips/h264qpel_msa.c |  781 ++--
 1 file changed, 513 insertions(+), 268 deletions(-)

diff --git a/libavcodec/mips/h264qpel_msa.c b/libavcodec/mips/h264qpel_msa.c 
index fcccb98..e3a8634 100644
--- a/libavcodec/mips/h264qpel_msa.c
+++ b/libavcodec/mips/h264qpel_msa.c
@@ -75,22 +75,6 @@ static const uint8_t luma_mask_arr[16 * 8] = {
 DPADD_SB2_SH(tmp0_m, tmp1_m, plus20b_m, plus20b_m, out1, out2);  \
 }
 
-#define AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(vec0, vec1, vec2, vec3, vec4, vec5)  
\
-( {
\
-v8i16 tmp1_m;  
\
-v16i8 tmp0_m, tmp2_m;  
\
-v16i8 minus5b_m = __msa_ldi_b(-5); 
\
-v16i8 plus20b_m = __msa_ldi_b(20); 
\
-   
\
-tmp1_m = (v8i16) __msa_ilvr_b((v16i8) vec5, (v16i8) vec0); 
\
-tmp1_m = __msa_hadd_s_h((v16i8) tmp1_m, (v16i8) tmp1_m);   
\
-   
\
-ILVR_B2_SB(vec4, vec1, vec3, vec2, tmp0_m, tmp2_m);
\
-DPADD_SB2_SH(tmp0_m, tmp2_m, minus5b_m, plus20b_m, tmp1_m, tmp1_m);
\
-   
\
-tmp1_m;
\
-} )
-
 #define AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(vec0, vec1, vec2, vec3, vec4, vec5)  
\
 ( {
\
 v4i32 tmp1_m;  
\
@@ -1157,128 +1141,6 @@ static void 
avc_luma_vt_qrt_and_aver_dst_16x16_msa(const uint8_t *src,
 }
 }
 
-static void avc_luma_mid_and_aver_dst_4x4_msa(const uint8_t *src,
-  int32_t src_stride,
-  uint8_t *dst, int32_t dst_stride)
-{
-v16i8 src0, src1, src2, src3, src4;
-v16i8 mask0, mask1, mask2;
-v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
-v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
-v8i16 res0, res1, res2, res3;
-v16u8 dst0, dst1, dst2, dst3;
-v16u8 tmp0, tmp1, tmp2, tmp3;
-
-LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
-LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-src += (5 * src_stride);
-
-XORI_B5_128_SB(src0, src1, src2, src3, src4);
-
-hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
-  mask0, mask1, mask2);
-hz_out2 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
-  mask0, mask1, mask2);
-
-PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
-
-hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
-
-LD_SB4(src, src_stride, src0, src1, src2, src3);
-XORI_B4_128_SB(src0, src1, src2, src3);
-
-hz_out5 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
-  mask0, mask1, mask2);
-hz_out7 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
-  mask0, mask1, mask2);
-
-PCKOD_D2_SH(hz_out5, hz_out5, hz_out7, hz_out7, hz_out6, hz_out8);
-
-res0 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out0, hz_out1, hz_out2,
- hz_out3, hz_out4, hz_out5);
-res1 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out1, hz_out2, hz_out3,
- hz_out4, hz_out5, hz_out6);
-res2 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out2, hz_out3, hz_out4,
- hz_out5, hz_out6, hz_out7);
-res3 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out3, hz_out4, hz_out5,
- hz_out6, hz_out7, hz_out8);
-LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-tmp0 = PCKEV_XORI128_UB(res0, res1);
-tmp1 = PCKEV_XORI128_UB(res2, res3);
-PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2, tmp3);
-AVER_UB2_UB(tmp0, tmp2, tmp1, tmp3, tmp0, tmp1);
-
-  

Re: [FFmpeg-devel] [PATCH 1/2] avfilter: pass correct argument to helper function

2017-10-30 Thread Nicolas George
Le sextidi 6 brumaire, an CCXXVI, Paul B Mahol a écrit :
> Signed-off-by: Paul B Mahol 
> ---
>  libavfilter/avfilter.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
> index bc9e60bb85..b98b32bacb 100644
> --- a/libavfilter/avfilter.c
> +++ b/libavfilter/avfilter.c
> @@ -1532,7 +1532,7 @@ int ff_inlink_consume_samples(AVFilterLink *link, 
> unsigned min, unsigned max,
>  return 0;
>  if (link->status_in)
>  min = FFMIN(min, ff_framequeue_queued_samples(&link->fifo));
> -ret = take_samples(link, min, link->max_samples, &frame);
> +ret = take_samples(link, min, max, &frame);
>  if (ret < 0)
>  return ret;
>  consume_update(link, frame);

Ok.

Regards,

-- 
  Nicolas George


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [RFC]lswr/rematrix: Support s32p

2017-10-30 Thread Michael Niedermayer
On Mon, Oct 30, 2017 at 03:47:13PM +0700, Muhammad Faiz wrote:
> On Sun, Oct 29, 2017 at 3:55 AM, Carl Eugen Hoyos  wrote:
> > Hi!
> >
> > Attached patch fixes a random testcase for ticket #6785 here but I
> > don't know if this is the correct fix.
> >
> > Please review, Carl Eugen
> >
> > From a93b9309d74f5eadece371ee1e682d266af6cd83 Mon Sep 17 00:00:00 2001
> > From: Carl Eugen Hoyos 
> > Date: Sat, 28 Oct 2017 22:52:02 +0200
> > Subject: [PATCH] lswr/rematrix: Support s32p.
> >
> > Fixes ticket #6785.
> > ---
> >  libswresample/rematrix.c |   21 +++--
> >  1 file changed, 15 insertions(+), 6 deletions(-)
> >
> > diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
> > index 66a43c1..a6aa6b0 100644
> > --- a/libswresample/rematrix.c
> > +++ b/libswresample/rematrix.c
> > @@ -445,14 +445,23 @@ av_cold int swri_rematrix_init(SwrContext *s){
> >  s->mix_2_1_f = (mix_2_1_func_type*)sum2_double;
> >  s->mix_any_f = (mix_any_func_type*)get_mix_any_func_double(s);
> >  }else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){
> > -// Only for dithering currently
> > -// s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double));
> > -s->native_one= av_mallocz(sizeof(int));
> > +s->native_one= av_mallocz(sizeof(int64_t));
> >  if (!s->native_one)
> >  return AVERROR(ENOMEM);
> > -// for (i = 0; i < nb_out; i++)
> > -// for (j = 0; j < nb_in; j++)
> > -// ((double*)s->native_matrix)[i * nb_in + j] = 
> > s->matrix[i][j];
> > +s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int));
> > +if (!s->native_matrix) {
> > +av_freep(&s->native_one);
> > +return AVERROR(ENOMEM);
> > +}
> > +for (i = 0; i < nb_out; i++) {
> > +double rem = 0;
> > +
> > +for (j = 0; j < nb_in; j++) {
> > +double target = s->matrix[i][j] * 32768 + rem;
> > +((int*)s->native_matrix)[i * nb_in + j] = lrintf(target);
> > +rem += target - ((int64_t*)s->native_matrix)[i * nb_in + 
> > j];
> > +}
> > +}
> >  *((int*)s->native_one) = 32768;
> >  s->mix_1_1_f = (mix_1_1_func_type*)copy_s32;
> >  s->mix_2_1_f = (mix_2_1_func_type*)sum2_s32;
> 
> The code is confusing.
> Which is the type of native_matrix and native_one? int or int64_t?

they should be int if iam not mistaken

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If a bugfix only changes things apparently unrelated to the bug with no
further explanation, that is a good sign that the bugfix is wrong.


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avutil/crc: use ff_thread_once at av_crc_get_table

2017-10-30 Thread Michael Niedermayer
On Mon, Oct 30, 2017 at 02:14:35PM +0700, Muhammad Faiz wrote:
> On Tue, Oct 24, 2017 at 4:31 PM, Muhammad Faiz  wrote:
> > Fix tsan warnings.
> >
> > Signed-off-by: Muhammad Faiz 
> > ---
> >  libavutil/crc.c | 49 +
> >  1 file changed, 29 insertions(+), 20 deletions(-)
> 
> Ping.

I assume james patch is faster than both ?

If this code is never run in speed relevant loops then your solution is
better. Otherwise i think james patch is better

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Frequently ignored answer#1 FFmpeg bugs should be sent to our bugtracker. User
questions about the command line tools should be sent to the ffmpeg-user ML.
And questions about how to use libav* should be sent to the libav-user ML.


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 0/8] AVX-512 support (v.1)

2017-10-30 Thread James Darnley
This patch set adds support for AVX-512 functions to be written.  While not
immediately useful this does let people start writing them.

Presently it lumps a "more manageable" set of sub-features into the overall
AVX-512 flag.

Just to be clear: current processors severly limit the performace when executing
instructions on ZMM registers (512-bit).  Switching between the states also
takes some time.  Therefore functions that use them should be tested carefully
in "real-world" conditions to ensure that overall performance doesn't drop.

However, as Gramner points out in his x86inc commit, it provides an additional
16 registers for a total of 32 SIMD registers.  These can all be used in XMM
(128-bit) and YMM (256-bit) forms.  New instructions can also be used on these
smaller registers.

There are 2 commits here that I don't intend to be applied (now).  The first is
the alignment increase reported by avutil.  The second is the v210enc function,
it passes checkasm but it is not any faster.  It is there to show that all the
previous commits work correctly, namely: configure checks, cpuid detection,
x86inc changes, checkasm.

P.S.  I forgot to reword the commit message of "x86inc: reduce difference to
x264 upstream" to state what it does and why.  The smartalign directive is
documented here: http://www.nasm.us/xdoc/2.13.01/html/nasmdoc5.html#section-5.2

Henrik Gramner (1):
  x86inc: AVX-512 support

James Darnley (7):
  configure: test whether x86 assembler supports AVX-512
  avutil: add AVX-512 flags
  avutil: detect when AVX-512 is available
  avutil: add alignment needed for AVX-512
  x86inc: reduce difference to x264 upstream
  checkasm: support for AVX-512 functions
  avcodec/v210enc: add AVX-512 10-bit line pack function

 configure |   5 ++
 libavcodec/x86/v210enc.asm|   5 ++
 libavcodec/x86/v210enc_init.c |   7 ++
 libavutil/cpu.c   |   6 +-
 libavutil/cpu.h   |   1 +
 libavutil/tests/cpu.c |   1 +
 libavutil/x86/cpu.c   |  11 +++
 libavutil/x86/cpu.h   |   2 +
 libavutil/x86/x86inc.asm  | 188 ++
 tests/checkasm/checkasm.c |   1 +
 10 files changed, 191 insertions(+), 36 deletions(-)

-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/8] avutil: add AVX-512 flags

2017-10-30 Thread James Darnley
---
 libavutil/cpu.c   | 6 +-
 libavutil/cpu.h   | 1 +
 libavutil/tests/cpu.c | 1 +
 libavutil/x86/cpu.h   | 2 ++
 4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index c8401b8258..6548cc3042 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -80,7 +80,8 @@ void av_force_cpu_flags(int arg){
 AV_CPU_FLAG_XOP  |
 AV_CPU_FLAG_FMA3 |
 AV_CPU_FLAG_FMA4 |
-AV_CPU_FLAG_AVX2 ))
+AV_CPU_FLAG_AVX2 |
+AV_CPU_FLAG_AVX512   ))
 && !(arg & AV_CPU_FLAG_MMX)) {
 av_log(NULL, AV_LOG_WARNING, "MMX implied by specified flags\n");
 arg |= AV_CPU_FLAG_MMX;
@@ -126,6 +127,7 @@ int av_parse_cpu_flags(const char *s)
 #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX)
 #define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | AV_CPU_FLAG_BMI1)
 #define CPUFLAG_AESNI(AV_CPU_FLAG_AESNI| CPUFLAG_SSE42)
+#define CPUFLAG_AVX512   (AV_CPU_FLAG_AVX512   | CPUFLAG_AVX2)
 static const AVOption cpuflags_opts[] = {
 { "flags"   , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, 
INT64_MAX, .unit = "flags" },
 #if   ARCH_PPC
@@ -154,6 +156,7 @@ int av_parse_cpu_flags(const char *s)
 { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT
 },.unit = "flags" },
 { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV
 },.unit = "flags" },
 { "aesni"   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AESNI   
 },.unit = "flags" },
+{ "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX512  
 },.unit = "flags" },
 #elif ARCH_ARM
 { "armv5te",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV5TE 
 },.unit = "flags" },
 { "armv6",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6   
 },.unit = "flags" },
@@ -216,6 +219,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
 { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
AV_CPU_FLAG_3DNOWEXT },.unit = "flags" },
 { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV
 },.unit = "flags" },
 { "aesni",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI   
 },.unit = "flags" },
+{ "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512  
 },.unit = "flags" },
 
 #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX
 #define CPU_FLAG_P3 CPU_FLAG_P2 | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 9e5d40affe..91523f3f5a 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -55,6 +55,7 @@
 #define AV_CPU_FLAG_FMA30x1 ///< Haswell FMA3 functions
 #define AV_CPU_FLAG_BMI10x2 ///< Bit Manipulation Instruction Set 1
 #define AV_CPU_FLAG_BMI20x4 ///< Bit Manipulation Instruction Set 2
+#define AV_CPU_FLAG_AVX512 0x10 ///< AVX-512 functions
 
 #define AV_CPU_FLAG_ALTIVEC  0x0001 ///< standard
 #define AV_CPU_FLAG_VSX  0x0002 ///< ISA 2.06
diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c
index f02a54cbbb..ce45b715a0 100644
--- a/libavutil/tests/cpu.c
+++ b/libavutil/tests/cpu.c
@@ -73,6 +73,7 @@ static const struct {
 { AV_CPU_FLAG_BMI1,  "bmi1"   },
 { AV_CPU_FLAG_BMI2,  "bmi2"   },
 { AV_CPU_FLAG_AESNI, "aesni"  },
+{ AV_CPU_FLAG_AVX512,"avx512" },
 #endif
 { 0 }
 };
diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
index 309b8e746c..7f4e5d08bb 100644
--- a/libavutil/x86/cpu.h
+++ b/libavutil/x86/cpu.h
@@ -50,6 +50,7 @@
 #define X86_FMA4(flags) CPUEXT(flags, FMA4)
 #define X86_AVX2(flags) CPUEXT(flags, AVX2)
 #define X86_AESNI(flags)CPUEXT(flags, AESNI)
+#define X86_AVX512(flags)   CPUEXT(flags, AVX512)
 
 #define EXTERNAL_AMD3DNOW(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW)
 #define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, 
AMD3DNOWEXT)
@@ -79,6 +80,7 @@
 #define EXTERNAL_AVX2_FAST(flags)   CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, 
AVX2, AVX)
 #define EXTERNAL_AVX2_SLOW(flags)   CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, 
AVX2, AVX)
 #define EXTERNAL_AESNI(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI)
+#define EXTERNAL_AVX512(flags)  CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512)
 
 #define INLINE_AMD3DNOW(flags)  CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW)
 #define INLINE_AMD3DNOWEXT(flags)   CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT)
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 5/8] x86inc: AVX-512 support

2017-10-30 Thread James Darnley
From: Henrik Gramner 

AVX-512 consists of a plethora of different extensions, but in order to keep
things a bit more manageable we group together the following extensions
under a single baseline cpu flag which should cover SKL-X and future CPUs:
 * AVX-512 Foundation (F)
 * AVX-512 Conflict Detection Instructions (CD)
 * AVX-512 Byte and Word Instructions (BW)
 * AVX-512 Doubleword and Quadword Instructions (DQ)
 * AVX-512 Vector Length Extensions (VL)

On x86-64 AVX-512 provides 16 additional vector registers, prefer using
those over existing ones since it allows us to avoid using `vzeroupper`
unless more than 16 vector registers are required. They also happen to
be volatile on Windows which means that we don't need to save and restore
existing xmm register contents unless more than 22 vector registers are
required.

Big thanks to Intel for their support.
---
 libavutil/x86/x86inc.asm | 172 ++-
 1 file changed, 139 insertions(+), 33 deletions(-)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 6a054a3e09..f3dd2b788a 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -337,6 +337,8 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
 %endmacro
 
 %define required_stack_alignment ((mmsize + 15) & ~15)
+%define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 || xmm_regs_used 
> 16 || notcpuflag(avx512)))
+%define high_mm_regs (16*cpuflag(avx512))
 
 %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
 %ifnum %1
@@ -450,15 +452,16 @@ DECLARE_REG 14, R13, 120
 
 %macro WIN64_PUSH_XMM 0
 ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space 
allocated.
-%if xmm_regs_used > 6
+%if xmm_regs_used > 6 + high_mm_regs
 movaps [rstk + stack_offset +  8], xmm6
 %endif
-%if xmm_regs_used > 7
+%if xmm_regs_used > 7 + high_mm_regs
 movaps [rstk + stack_offset + 24], xmm7
 %endif
-%if xmm_regs_used > 8
+%assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
+%if %%xmm_regs_on_stack > 0
 %assign %%i 8
-%rep xmm_regs_used-8
+%rep %%xmm_regs_on_stack
 movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
 %assign %%i %%i+1
 %endrep
@@ -467,10 +470,11 @@ DECLARE_REG 14, R13, 120
 
 %macro WIN64_SPILL_XMM 1
 %assign xmm_regs_used %1
-ASSERT xmm_regs_used <= 16
-%if xmm_regs_used > 8
+ASSERT xmm_regs_used <= 16 + high_mm_regs
+%assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
+%if %%xmm_regs_on_stack > 0
 ; Allocate stack space for callee-saved xmm registers plus shadow 
space and align the stack.
-%assign %%pad (xmm_regs_used-8)*16 + 32
+%assign %%pad %%xmm_regs_on_stack*16 + 32
 %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & 
(STACK_ALIGNMENT-1))
 SUB rsp, stack_size_padded
 %endif
@@ -479,9 +483,10 @@ DECLARE_REG 14, R13, 120
 
 %macro WIN64_RESTORE_XMM_INTERNAL 0
 %assign %%pad_size 0
-%if xmm_regs_used > 8
-%assign %%i xmm_regs_used
-%rep xmm_regs_used-8
+%assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
+%if %%xmm_regs_on_stack > 0
+%assign %%i xmm_regs_used - high_mm_regs
+%rep %%xmm_regs_on_stack
 %assign %%i %%i-1
 movaps xmm %+ %%i, [rsp + (%%i-8)*16 + stack_size + 32]
 %endrep
@@ -494,10 +499,10 @@ DECLARE_REG 14, R13, 120
 %assign %%pad_size stack_size_padded
 %endif
 %endif
-%if xmm_regs_used > 7
+%if xmm_regs_used > 7 + high_mm_regs
 movaps xmm7, [rsp + stack_offset - %%pad_size + 24]
 %endif
-%if xmm_regs_used > 6
+%if xmm_regs_used > 6 + high_mm_regs
 movaps xmm6, [rsp + stack_offset - %%pad_size +  8]
 %endif
 %endmacro
@@ -509,12 +514,12 @@ DECLARE_REG 14, R13, 120
 %assign xmm_regs_used 0
 %endmacro
 
-%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 || 
stack_size > 0
+%define has_epilogue regs_used > 7 || stack_size > 0 || vzeroupper_required || 
xmm_regs_used > 6+high_mm_regs
 
 %macro RET 0
 WIN64_RESTORE_XMM_INTERNAL
 POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
-%if mmsize == 32
+%if vzeroupper_required
 vzeroupper
 %endif
 AUTO_REP_RET
@@ -538,9 +543,10 @@ DECLARE_REG 12, R15, 56
 DECLARE_REG 13, R12, 64
 DECLARE_REG 14, R13, 72
 
-%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
+%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
 %assign num_args %1
 %assign regs_used %2
+%assign xmm_regs_used %3
 ASSERT regs_used >= num_args
 SETUP_STACK_POINTER %4
 ASSERT regs_used <= 15
@@ -550,7 +556,7 @@ DECLARE_REG 14, R13, 72
 DEFINE_ARGS_INTERNAL %0, %4, %5
 %endmacro
 
-%define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0
+%define has_epilogue regs_used > 9 

Re: [FFmpeg-devel] [PATCH 1/2] avcodec/aacdec_fixed: Fix integer overflow in predict()

2017-10-30 Thread Michael Niedermayer
On Fri, Oct 27, 2017 at 02:23:20AM +0200, Michael Niedermayer wrote:
> Fixes: runtime error: signed integer overflow: -2110708110 + -82837504 cannot 
> be represented in type 'int'
> Fixes: 3547/clusterfuzz-testcase-minimized-6009386439802880
> 
> Found-by: continuous fuzzing process 
> https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
> Signed-off-by: Michael Niedermayer 
> ---
>  libavcodec/aacdec_fixed.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

will apply patchset


[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Many things microsoft did are stupid, but not doing something just because
microsoft did it is even more stupid. If everything ms did were stupid they
would be bankrupt already.


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 3/8] avutil: detect when AVX-512 is available

2017-10-30 Thread James Darnley
---
 libavutil/x86/cpu.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index f33088c8c7..b22a950b79 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -143,6 +143,7 @@ int ff_get_cpu_flags_x86(void)
 #endif /* HAVE_SSE */
 }
 if (max_std_level >= 7) {
+int xcr0_lo = 0, xcr0_hi = 0;
 cpuid(7, eax, ebx, ecx, edx);
 #if HAVE_AVX2
 if ((rval & AV_CPU_FLAG_AVX) && (ebx & 0x0020))
@@ -154,6 +155,14 @@ int ff_get_cpu_flags_x86(void)
 if (ebx & 0x0100)
 rval |= AV_CPU_FLAG_BMI2;
 }
+#if HAVE_AVX512 /* F, CD, BW, DQ, VL */
+xgetbv(0, xcr0_lo, xcr0_hi);
+if ((xcr0_lo & 0xe6) == 0xe6) {
+if ((ebx & 0xd003) == 0xd003)
+rval |= AV_CPU_FLAG_AVX512;
+
+}
+#endif
 }
 
 cpuid(0x8000, max_ext_level, ebx, ecx, edx);
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 8/8] avcodec/v210enc: add AVX-512 10-bit line pack function

2017-10-30 Thread James Darnley
---
 libavcodec/x86/v210enc.asm| 5 +
 libavcodec/x86/v210enc_init.c | 7 +++
 2 files changed, 12 insertions(+)

diff --git a/libavcodec/x86/v210enc.asm b/libavcodec/x86/v210enc.asm
index 965f2bea3c..5068af27f8 100644
--- a/libavcodec/x86/v210enc.asm
+++ b/libavcodec/x86/v210enc.asm
@@ -103,6 +103,11 @@ INIT_YMM avx2
 v210_planar_pack_10
 %endif
 
+%if HAVE_AVX512_EXTERNAL
+INIT_YMM avx512
+v210_planar_pack_10
+%endif
+
 %macro v210_planar_pack_8 0
 
 ; v210_planar_pack_8(const uint8_t *y, const uint8_t *u, const uint8_t *v, 
uint8_t *dst, ptrdiff_t width)
diff --git a/libavcodec/x86/v210enc_init.c b/libavcodec/x86/v210enc_init.c
index e997b4b67a..e8aac373a0 100644
--- a/libavcodec/x86/v210enc_init.c
+++ b/libavcodec/x86/v210enc_init.c
@@ -32,6 +32,9 @@ void ff_v210_planar_pack_10_ssse3(const uint16_t *y, const 
uint16_t *u,
 void ff_v210_planar_pack_10_avx2(const uint16_t *y, const uint16_t *u,
  const uint16_t *v, uint8_t *dst,
  ptrdiff_t width);
+void ff_v210_planar_pack_10_avx512(const uint16_t *y, const uint16_t *u,
+   const uint16_t *v, uint8_t *dst,
+   ptrdiff_t width);
 
 av_cold void ff_v210enc_init_x86(V210EncContext *s)
 {
@@ -51,4 +54,8 @@ av_cold void ff_v210enc_init_x86(V210EncContext *s)
 s->sample_factor_10 = 2;
 s->pack_line_10 = ff_v210_planar_pack_10_avx2;
 }
+
+if (EXTERNAL_AVX512(cpu_flags)) {
+s->pack_line_10 = ff_v210_planar_pack_10_avx512;
+}
 }
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 4/8] avutil: add alignment needed for AVX-512

2017-10-30 Thread James Darnley
---
 libavutil/x86/cpu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index b22a950b79..c56f2a8754 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -247,6 +247,8 @@ size_t ff_get_cpu_max_align_x86(void)
 {
 int flags = av_get_cpu_flags();
 
+if (flags & AV_CPU_FLAG_AVX512)
+return 64;
 if (flags & (AV_CPU_FLAG_AVX2  |
  AV_CPU_FLAG_AVX   |
  AV_CPU_FLAG_XOP   |
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 6/8] x86inc: reduce difference to x264 upstream

2017-10-30 Thread James Darnley
These changes were commited to x264 in b568a256 "Experimental nasm
support"
---
 libavutil/x86/x86inc.asm | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index f3dd2b788a..10e4603a65 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -113,6 +113,10 @@
 %endif
 %endmacro
 
+%ifdef __NASM_VER__
+%use smartalign
+%endif
+
 ; Macros to eliminate most code duplication between x86_32 and x86_64:
 ; Currently this works only for leaf functions which load all their arguments
 ; into registers at the start, and make no other use of the stack. Luckily that
@@ -857,9 +861,17 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, 
jge, jng, jnge, ja, jae,
 %endif
 
 %if ARCH_X86_64 || cpuflag(sse2)
-CPUNOP amdnop
+%ifdef __NASM_VER__
+ALIGNMODE p6
+%else
+CPU amdnop
+%endif
 %else
-CPUNOP basicnop
+%ifdef __NASM_VER__
+ALIGNMODE nop
+%else
+CPU basicnop
+%endif
 %endif
 %endmacro
 
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/8] configure: test whether x86 assembler supports AVX-512

2017-10-30 Thread James Darnley
---
 configure | 5 +
 1 file changed, 5 insertions(+)

diff --git a/configure b/configure
index 2f259996ae..e172fe8571 100755
--- a/configure
+++ b/configure
@@ -406,6 +406,7 @@ Optimization options (experts only):
   --disable-fma3   disable FMA3 optimizations
   --disable-fma4   disable FMA4 optimizations
   --disable-avx2   disable AVX2 optimizations
+  --disable-avx-512disable AVX-512 optimizations
   --disable-aesni  disable AESNI optimizations
   --disable-armv5tedisable armv5te optimizations
   --disable-armv6  disable armv6 optimizations
@@ -1840,6 +1841,7 @@ ARCH_EXT_LIST_X86_SIMD="
 amd3dnowext
 avx
 avx2
+avx512
 fma3
 fma4
 mmx
@@ -2364,6 +2366,7 @@ xop_deps="avx"
 fma3_deps="avx"
 fma4_deps="avx"
 avx2_deps="avx"
+avx512_deps="avx2"
 
 mmx_external_deps="x86asm"
 mmx_inline_deps="inline_asm"
@@ -5669,6 +5672,7 @@ EOF
 elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
 esac
 
+check_x86asm "vmovdqa32 [eax]{k1}{z}, zmm0"|| disable 
avx512_external
 check_x86asm "vextracti128 xmm0, ymm0, 0"  || disable avx2_external
 check_x86asm "vpmacsdd xmm0, xmm1, xmm2, xmm3" || disable xop_external
 check_x86asm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4_external
@@ -6800,6 +6804,7 @@ if enabled x86; then
 echo "AESNI enabled ${aesni-no}"
 echo "AVX enabled   ${avx-no}"
 echo "AVX2 enabled  ${avx2-no}"
+echo "AVX-512 enabled   ${avx512-no}"
 echo "XOP enabled   ${xop-no}"
 echo "FMA3 enabled  ${fma3-no}"
 echo "FMA4 enabled  ${fma4-no}"
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 7/8] checkasm: support for AVX-512 functions

2017-10-30 Thread James Darnley
---
 tests/checkasm/checkasm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index b8b0e32dbd..9fb1438bdb 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -192,6 +192,7 @@ static const struct {
 { "FMA3", "fma3", AV_CPU_FLAG_FMA3 },
 { "FMA4", "fma4", AV_CPU_FLAG_FMA4 },
 { "AVX2", "avx2", AV_CPU_FLAG_AVX2 },
+{ "AVX-512",  "avx512",   AV_CPU_FLAG_AVX512 },
 #endif
 { NULL }
 };
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/avcodec.h: remove doxy from the old bsf API functions

2017-10-30 Thread Derek Buitenhuis
On 10/29/2017 7:20 PM, James Almer wrote:
> Make it clear that these are deprecated and the new API should be
> used instead.
> 
> As a side effect, this slightly reduces differences with libav.
> 
> Signed-off-by: James Almer 
> ---
>  libavcodec/avcodec.h | 70 
> +++-
>  1 file changed, 14 insertions(+), 56 deletions(-)

OK.

- Derek
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 8/8] avcodec/v210enc: add AVX-512 10-bit line pack function

2017-10-30 Thread Henrik Gramner
On Mon, Oct 30, 2017 at 2:08 PM, James Darnley  wrote:
> +INIT_YMM avx512

ymm?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/8] configure: test whether x86 assembler supports AVX-512

2017-10-30 Thread Hendrik Leppkes
On Mon, Oct 30, 2017 at 2:08 PM, James Darnley  wrote:
> ---
>  configure | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/configure b/configure
> index 2f259996ae..e172fe8571 100755
> --- a/configure
> +++ b/configure
> @@ -406,6 +406,7 @@ Optimization options (experts only):
>--disable-fma3   disable FMA3 optimizations
>--disable-fma4   disable FMA4 optimizations
>--disable-avx2   disable AVX2 optimizations
> +  --disable-avx-512disable AVX-512 optimizations

This should be avx512, to match the configure variable

>--disable-aesni  disable AESNI optimizations
>--disable-armv5tedisable armv5te optimizations
>--disable-armv6  disable armv6 optimizations
> @@ -1840,6 +1841,7 @@ ARCH_EXT_LIST_X86_SIMD="
>  amd3dnowext
>  avx
>  avx2
> +avx512
>  fma3
>  fma4
>  mmx
> @@ -2364,6 +2366,7 @@ xop_deps="avx"
>  fma3_deps="avx"
>  fma4_deps="avx"
>  avx2_deps="avx"
> +avx512_deps="avx2"
>
>  mmx_external_deps="x86asm"
>  mmx_inline_deps="inline_asm"
> @@ -5669,6 +5672,7 @@ EOF
>  elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
>  esac
>
> +check_x86asm "vmovdqa32 [eax]{k1}{z}, zmm0"|| disable 
> avx512_external
>  check_x86asm "vextracti128 xmm0, ymm0, 0"  || disable 
> avx2_external
>  check_x86asm "vpmacsdd xmm0, xmm1, xmm2, xmm3" || disable 
> xop_external
>  check_x86asm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable 
> fma4_external
> @@ -6800,6 +6804,7 @@ if enabled x86; then
>  echo "AESNI enabled ${aesni-no}"
>  echo "AVX enabled   ${avx-no}"
>  echo "AVX2 enabled  ${avx2-no}"
> +echo "AVX-512 enabled   ${avx512-no}"
>  echo "XOP enabled   ${xop-no}"
>  echo "FMA3 enabled  ${fma3-no}"
>  echo "FMA4 enabled  ${fma4-no}"
> --
> 2.14.2
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/8] configure: test whether x86 assembler supports AVX-512

2017-10-30 Thread James Almer
On 10/30/2017 10:08 AM, James Darnley wrote:
> ---
>  configure | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/configure b/configure
> index 2f259996ae..e172fe8571 100755
> --- a/configure
> +++ b/configure
> @@ -406,6 +406,7 @@ Optimization options (experts only):
>--disable-fma3   disable FMA3 optimizations
>--disable-fma4   disable FMA4 optimizations
>--disable-avx2   disable AVX2 optimizations
> +  --disable-avx-512disable AVX-512 optimizations
>--disable-aesni  disable AESNI optimizations
>--disable-armv5tedisable armv5te optimizations
>--disable-armv6  disable armv6 optimizations
> @@ -1840,6 +1841,7 @@ ARCH_EXT_LIST_X86_SIMD="
>  amd3dnowext
>  avx
>  avx2
> +avx512
>  fma3
>  fma4
>  mmx
> @@ -2364,6 +2366,7 @@ xop_deps="avx"
>  fma3_deps="avx"
>  fma4_deps="avx"
>  avx2_deps="avx"
> +avx512_deps="avx2"
>  
>  mmx_external_deps="x86asm"
>  mmx_inline_deps="inline_asm"
> @@ -5669,6 +5672,7 @@ EOF
>  elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
>  esac
>  
> +check_x86asm "vmovdqa32 [eax]{k1}{z}, zmm0"|| disable 
> avx512_external

Afaik nasm had a lot of bugs in the first few versions with support for
avx512, so maybe this check should make sure it accepts only NASM 2.13
or 2.13.01 as a minimum? Assuming it doesn't already, in which case
nevermind.

>  check_x86asm "vextracti128 xmm0, ymm0, 0"  || disable 
> avx2_external
>  check_x86asm "vpmacsdd xmm0, xmm1, xmm2, xmm3" || disable 
> xop_external
>  check_x86asm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable 
> fma4_external
> @@ -6800,6 +6804,7 @@ if enabled x86; then
>  echo "AESNI enabled ${aesni-no}"
>  echo "AVX enabled   ${avx-no}"
>  echo "AVX2 enabled  ${avx2-no}"
> +echo "AVX-512 enabled   ${avx512-no}"
>  echo "XOP enabled   ${xop-no}"
>  echo "FMA3 enabled  ${fma3-no}"
>  echo "FMA4 enabled  ${fma4-no}"
> 

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/avcodec.h: remove doxy from the old bsf API functions

2017-10-30 Thread James Almer
On 10/30/2017 12:20 PM, Derek Buitenhuis wrote:
> On 10/29/2017 7:20 PM, James Almer wrote:
>> Make it clear that these are deprecated and the new API should be
>> used instead.
>>
>> As a side effect, this slightly reduces differences with libav.
>>
>> Signed-off-by: James Almer 
>> ---
>>  libavcodec/avcodec.h | 70 
>> +++-
>>  1 file changed, 14 insertions(+), 56 deletions(-)
> 
> OK.
> 
> - Derek

Pushed, thanks.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] order T-shirts

2017-10-30 Thread Thilo Borgmann
Am 30.10.17 um 03:52 schrieb Steven Liu:
> 2017-10-30 10:12 GMT+08:00 Lou Logan :
>> On Sat, Oct 28, 2017, at 03:41 AM, Thilo Borgmann wrote:
>>> Both designs come for 12€ each! Lou's design doesn't have a printing on
>>> the neck. I'd choosen the green for our logo by their color chart - it
>>> looks good color-wise to me.
>>
>> Looks good to me too. Thanks for all of your work on this.
> 
> What about move the logo to up-right from center? looks like the
> attechment file?

Would not work with such a small logo according what they told me.
We had to remove the ".org" part to be able to make it smaller after the first 
draft.
As small as they could do it, I don't think the complete "[Logo] FFmpeg.org" it 
would look good anywhere except right in the middle like in Lou's design. For 
having it in that place, I think Thomas' design is the way to go.

-Thilo
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 8/8] avcodec/v210enc: add AVX-512 10-bit line pack function

2017-10-30 Thread James Darnley
On 2017-10-30 17:14, Henrik Gramner wrote:
> On Mon, Oct 30, 2017 at 2:08 PM, James Darnley  wrote:
>> +INIT_YMM avx512
> 
> ymm?

Yes.  I haven't written a correct one using zmm regs yet.  I will ask
some questions about it, possibly very soon.  If I don't get a longer
version to work, I could just use zmm but not use all the space.  (This
is supposed to be a test so I do not need to be concerned about speed.)

This does use some new instructions, at least.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/8] configure: test whether x86 assembler supports AVX-512

2017-10-30 Thread James Darnley
On 2017-10-30 17:53, Hendrik Leppkes wrote:
> On Mon, Oct 30, 2017 at 2:08 PM, James Darnley  wrote:
>> ---
>>  configure | 5 +
>>  1 file changed, 5 insertions(+)
>>
>> diff --git a/configure b/configure
>> index 2f259996ae..e172fe8571 100755
>> --- a/configure
>> +++ b/configure
>> @@ -406,6 +406,7 @@ Optimization options (experts only):
>>--disable-fma3   disable FMA3 optimizations
>>--disable-fma4   disable FMA4 optimizations
>>--disable-avx2   disable AVX2 optimizations
>> +  --disable-avx-512disable AVX-512 optimizations
> 
> This should be avx512, to match the configure variable

Yes.  An oversight on my part.  Code is supposed to not have the hyphen
whereas descriptive text and comments should.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/8] configure: test whether x86 assembler supports AVX-512

2017-10-30 Thread James Darnley
On 2017-10-30 18:40, James Almer wrote:
> On 10/30/2017 10:08 AM, James Darnley wrote:
>> @@ -5669,6 +5672,7 @@ EOF
>>  elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
>>  esac
>>  
>> +check_x86asm "vmovdqa32 [eax]{k1}{z}, zmm0"|| disable 
>> avx512_external
> 
> Afaik nasm had a lot of bugs in the first few versions with support for
> avx512, so maybe this check should make sure it accepts only NASM 2.13
> or 2.13.01 as a minimum? Assuming it doesn't already, in which case
> nevermind.

I will look into that.  I remember hearing similar things in the past.
However my brief conversation with Gramner last week led to to believe
that 2.13 is good and that this check from x264 is enough.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] fix minor leak in id3v2 parsing

2017-10-30 Thread Fredrik Hubinette
Sorry, it was an old patch that I had delayed to submit for various reasons.
Glad it's fixed.

  /Hubbe

On Thu, Oct 26, 2017 at 9:57 AM, James Almer  wrote:

> On 10/24/2017 8:09 PM, Moritz Barsnick wrote:
> > On Tue, Oct 24, 2017 at 19:38:58 -0300, James Almer wrote:
> >>> Subject: [PATCH] Fix minor ffmpeg memory leak in id3v2 parsing.
> >>>
> >>> Reviewed-on: https://chromium-review.googlesource.com/439405
> >>> Reviewed-by: Dale Curtis 
> > [...]
> >> This doesn't seem to apply to git head, or even the recently cut 3.4
> >> branch. Was this patch made for the 3.3 branch?
> >
> > This code was changed/fixed on master 20 days ago by
> > 1fd80106be3dca9fa0ea13fb364c8d221bd27c15, even before 3.4 was branched.
> >
> > The fix may be valid for < 3.4 nonetheless. Is Chromium not using
> > master?
> >
> > Moritz
>
> They may be using master but didn't fetch new commits since late
> September or similar.
>
> Pushed to 3.3 branch in any case. It will be in the 3.3.5 release.
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-10-30 Thread Mark Thompson
On 30/10/17 17:56, mmironov wrote:
> From 9337cb69176bc15aaaf74186cb3468f106236f04 Mon Sep 17 00:00:00 2001
> From: mmironov 
> Date: Fri, 27 Oct 2017 13:03:15 -0400
> Subject: [PATCH] Added: HW accelerated H.264 and HEVC encoding for AMD GPUs
>  based on AMF SDK
> 
> Signed-off-by: mmironov 
> ---
>  Changelog|3 +-
>  compat/amd/amfsdkenc.h   | 1750 
> ++
>  configure|   26 +
>  libavcodec/Makefile  |4 +
>  libavcodec/allcodecs.c   |2 +
>  libavcodec/amfenc.c  |  465 
>  libavcodec/amfenc.h  |  129 
>  libavcodec/amfenc_h264.c |  345 +
>  libavcodec/amfenc_hevc.c |  289 
>  libavcodec/version.h |4 +-
>  10 files changed, 3014 insertions(+), 3 deletions(-)
>  create mode 100644 compat/amd/amfsdkenc.h
>  create mode 100644 libavcodec/amfenc.c
>  create mode 100644 libavcodec/amfenc.h
>  create mode 100644 libavcodec/amfenc_h264.c
>  create mode 100644 libavcodec/amfenc_hevc.c
> 
> diff --git a/Changelog b/Changelog
> index 6592d86..f0d22fa 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -6,7 +6,8 @@ version :
>  - Dropped support for OpenJPEG versions 2.0 and below. Using OpenJPEG now
>requires 2.1 (or later) and pkg-config.
>  - VDA dropped (use VideoToolbox instead)
> -
> +- AMF H.264 encoder
> +- AMF HEVC encoder
>  
>  version 3.4:
>  - deflicker video filter
> diff --git a/compat/amd/amfsdkenc.h b/compat/amd/amfsdkenc.h
> ...
> diff --git a/configure b/configure
> index 0e1ccaa..c785cc9 100755
> --- a/configure
> +++ b/configure
> @@ -304,6 +304,7 @@ External library support:
>  
>The following libraries provide various hardware acceleration features:
>--disable-audiotoolbox   disable Apple AudioToolbox code [autodetect]
> +  --disable-amfdisable AMF video encoding code [autodetect]
>--disable-cuda   disable dynamically linked Nvidia CUDA code 
> [autodetect]
>--enable-cuda-sdkenable CUDA features that require the CUDA SDK 
> [no]
>--disable-cuvid  disable Nvidia CUVID support [autodetect]
> @@ -1641,6 +1642,7 @@ EXTERNAL_LIBRARY_LIST="
>  "
>  
>  HWACCEL_AUTODETECT_LIBRARY_LIST="
> +amf
>  audiotoolbox
>  crystalhd
>  cuda
> @@ -2785,12 +2787,16 @@ scale_npp_filter_deps="cuda libnpp"
>  scale_cuda_filter_deps="cuda_sdk"
>  thumbnail_cuda_filter_deps="cuda_sdk"
>  
> +amf_deps_any="dlopen LoadLibrary"
> +amf_encoder_deps="amf"

"amf_encoder" isn't mentioned anywhere else?

> +
>  nvenc_deps="cuda"
>  nvenc_deps_any="libdl LoadLibrary"
>  nvenc_encoder_deps="nvenc"
>  
>  h263_v4l2m2m_decoder_deps="v4l2_m2m h263_v4l2_m2m"
>  h263_v4l2m2m_encoder_deps="v4l2_m2m h263_v4l2_m2m"
> +h264_amf_encoder_deps="amf"
>  h264_crystalhd_decoder_select="crystalhd h264_mp4toannexb_bsf h264_parser"
>  h264_cuvid_decoder_deps="cuda cuvid"
>  h264_cuvid_decoder_select="h264_mp4toannexb_bsf"
> @@ -2809,6 +2815,7 @@ 
> h264_vaapi_encoder_deps="VAEncPictureParameterBufferH264"
>  h264_vaapi_encoder_select="cbs_h264 vaapi_encode"
>  h264_v4l2m2m_decoder_deps="v4l2_m2m h264_v4l2_m2m"
>  h264_v4l2m2m_encoder_deps="v4l2_m2m h264_v4l2_m2m"
> +hevc_amf_encoder_deps="amf"
>  hevc_cuvid_decoder_deps="cuda cuvid"
>  hevc_cuvid_decoder_select="hevc_mp4toannexb_bsf"
>  hevc_mediacodec_decoder_deps="mediacodec"
> @@ -6305,6 +6312,18 @@ else
>  disable cuda cuvid nvenc
>  fi
>  
> +if enabled x86; then
> +case $target_os in
> +mingw32*|mingw64*|win32|win64|cygwin*)
> +;;
> +*)
> +disable  amf
> +;;
> +esac
> +else
> +disable amf
> +fi
> +
>  enabled nvenc &&
>  check_cc -I$source_path <  #include "compat/nvenc/nvEncodeAPI.h"
> @@ -6313,6 +6332,13 @@ void f(void) { struct { const GUID guid; } s[] = { { 
> NV_ENC_PRESET_HQ_GUID } };
>  int main(void) { return 0; }
>  EOF
>  
> +enabled amf &&
> +check_cc -I$source_path < +#include "compat/amd/amfsdkenc.h"
> +AMFFactory *factory;
> +int main(void) { return 0; }
> +EOF
> +
>  # Funny iconv installations are not unusual, so check it after all flags 
> have been set
>  if enabled libc_iconv; then
>  check_func_headers iconv.h iconv
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index bc4d7da..cbf45ac 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -50,6 +50,7 @@ OBJS = allcodecs.o  
> \
>  # subsystems
>  OBJS-$(CONFIG_AANDCTTABLES)+= aandcttab.o
>  OBJS-$(CONFIG_AC3DSP)  += ac3dsp.o ac3.o ac3tab.o
> +OBJS-$(CONFIG_AMF) += amfenc.o
>  OBJS-$(CONFIG_AUDIO_FRAME_QUEUE)   += audio_frame_queue.o
>  OBJS-$(CONFIG_AUDIODSP)+= audiodsp.o
>  OBJS-$(CONFIG_BLOCKDSP)+= blockdsp.o
> @@ -334,6 +335,7 @@ OBJS-$(CONFIG_H264_DECODER)+= h264dec.o 
> h264_cabac.o h264_cavlc.o \
>  OBJS-$(CONFIG_H264_CUVID_DECODER)  += cuvid.o
>  OBJS-$(CO

[FFmpeg-devel] [PATCH] Fix missing used attribute for inline assembly variables

2017-10-30 Thread Thomas Köppe
Variables used in inline assembly need to be marked with attribute((used)).
Static constants already were, via the define of DECLARE_ASM_CONST.
But DECLARE_ALIGNED does not add this attribute, and some of the variables
defined with it are const only used in inline assembly, and therefore
appeared dead.

This change makes FFMPEG linkable with Clang's ThinLTO.

---
 libavcodec/cabac.c | 2 +-
 libavutil/mem.h| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index dd2b057c6d..7321b48901 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -32,7 +32,7 @@
 #include "cabac.h"
 #include "cabac_functions.h"

-const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
+DECLARE_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 +
4*64 + 63] = {
 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
diff --git a/libavutil/mem.h b/libavutil/mem.h
index 527cd03191..db06e109be 100644
--- a/libavutil/mem.h
+++ b/libavutil/mem.h
@@ -98,7 +98,7 @@
 AV_PRAGMA(DATA_ALIGN(v,n))  \
 static const t __attribute__((aligned(n))) v
 #elif defined(__DJGPP__)
-#define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned
(FFMIN(n, 16 v
+#define DECLARE_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned
(FFMIN(n, 16 v
 #define DECLARE_ASM_CONST(n,t,v)static const t av_used
__attribute__ ((aligned (FFMIN(n, 16 v
 #elif defined(__GNUC__) || defined(__clang__)
 #define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned (n))) v
-- 
2.15.0.rc2.357.g7e34df9404-goog
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix missing used attribute for inline assembly variables

2017-10-30 Thread James Almer
On 10/30/2017 4:17 PM, Thomas Köppe wrote:
> Variables used in inline assembly need to be marked with attribute((used)).
> Static constants already were, via the define of DECLARE_ASM_CONST.
> But DECLARE_ALIGNED does not add this attribute, and some of the variables
> defined with it are const only used in inline assembly, and therefore
> appeared dead.
> 
> This change makes FFMPEG linkable with Clang's ThinLTO.
> 
> ---
>  libavcodec/cabac.c | 2 +-
>  libavutil/mem.h| 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
> index dd2b057c6d..7321b48901 100644
> --- a/libavcodec/cabac.c
> +++ b/libavcodec/cabac.c
> @@ -32,7 +32,7 @@
>  #include "cabac.h"
>  #include "cabac_functions.h"
> 
> -const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
> +DECLARE_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 +
> 4*64 + 63] = {
>  9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
>  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
>  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
> diff --git a/libavutil/mem.h b/libavutil/mem.h
> index 527cd03191..db06e109be 100644
> --- a/libavutil/mem.h
> +++ b/libavutil/mem.h
> @@ -98,7 +98,7 @@
>  AV_PRAGMA(DATA_ALIGN(v,n))  \
>  static const t __attribute__((aligned(n))) v
>  #elif defined(__DJGPP__)

Is your intention to change the DJGPP path, or the gnuc/clang path?

> -#define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned
> (FFMIN(n, 16 v
> +#define DECLARE_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned
> (FFMIN(n, 16 v
>  #define DECLARE_ASM_CONST(n,t,v)static const t av_used
> __attribute__ ((aligned (FFMIN(n, 16 v
>  #elif defined(__GNUC__) || defined(__clang__)
>  #define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned (n))) v
> 

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] On in-tree external headers

2017-10-30 Thread Mark Thompson

Hi all,

The recent submission of AMD AMF patches including a builtin header prompted me 
to think further about what external API headers should actually be included in 
the tree.

For the AMD headers (like V4L2 previously), it seems entirely silly to include 
them - they are available upstream in a free form which can easily be packaged 
and available on any build machine to use.

However, there is the problem that this in some sense places them second-class 
to the Nvidia implementation, which includes all headers in-tree and 
automatically enables itself - any normal build for x86 will include Nvidia 
support by default if the user doesn't explicitly disable it.  The effect of 
that is essentially that the ffmpeg project is facilitating Nvidia's anti-open 
behaviour by including the headers, which is I think something we really 
shouldn't be doing.

So: can we please precisely codify under what circumstances external headers 
should be included in the ffmpeg tree?

As an initial position for consideration, I propose "no external headers may be 
included in the ffmpeg tree".  That is, the contents of the compat/ directory 
should only be OS/compiler compatibility workarounds, not any functional 
headers.

Would anyone like to propose an alternative position?  (Precisely defined - 
"what we have now" would need some clarification of what that actually means so 
that we can apply it consistently to future requests.)

I also ask that, whatever discussion here ends up with, the voting committee 
should vote to ratify it so that we don't have to discuss it again every time 
someone proposes including headers.

Thanks,

- Mark


PS:

On 26/10/17 21:53, Philip Langdale wrote:
> the nvenc header is there for the reason stated: it's too hard to otherwise
> obtain, and when you do obtain it, it's not in an installable form, so we
> don't know how to find it on the build machine in any sane way.
> 
> The cuda headers in our tree are there because there were actually reverse
> engineered, as the official cuda headers don't have a reasonable licence.

None of this stops an individual from creating an independent repository and 
packaging the headers for open-source projects (including ffmpeg) to use.  This 
might be better for other open-source projects as well, because they could 
refer to that repository rather than having to either include the headers 
themselves directly (as ffmpeg currently does) or engage in whatever pain is 
necessary to use them from Nvidia.


PPS:

The position stated above would imply removing the avisynth headers.  Can 
anyone who uses it comment on what would be required for that?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix missing used attribute for inline assembly variables

2017-10-30 Thread Thomas Köppe
It should be under __GNUC__ -- something went wrong with the patch! I'll
make a new one. Sorry about that!

On 30 October 2017 at 19:31, James Almer  wrote:

> On 10/30/2017 4:17 PM, Thomas Köppe wrote:
> > Variables used in inline assembly need to be marked with
> attribute((used)).
> > Static constants already were, via the define of DECLARE_ASM_CONST.
> > But DECLARE_ALIGNED does not add this attribute, and some of the
> variables
> > defined with it are const only used in inline assembly, and therefore
> > appeared dead.
> >
> > This change makes FFMPEG linkable with Clang's ThinLTO.
> >
> > ---
> >  libavcodec/cabac.c | 2 +-
> >  libavutil/mem.h| 2 +-
> >  2 files changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
> > index dd2b057c6d..7321b48901 100644
> > --- a/libavcodec/cabac.c
> > +++ b/libavcodec/cabac.c
> > @@ -32,7 +32,7 @@
> >  #include "cabac.h"
> >  #include "cabac_functions.h"
> >
> > -const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
> > +DECLARE_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 +
> > 4*64 + 63] = {
> >  9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
> >  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
> >  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
> > diff --git a/libavutil/mem.h b/libavutil/mem.h
> > index 527cd03191..db06e109be 100644
> > --- a/libavutil/mem.h
> > +++ b/libavutil/mem.h
> > @@ -98,7 +98,7 @@
> >  AV_PRAGMA(DATA_ALIGN(v,n))  \
> >  static const t __attribute__((aligned(n))) v
> >  #elif defined(__DJGPP__)
>
> Is your intention to change the DJGPP path, or the gnuc/clang path?
>
> > -#define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned
> > (FFMIN(n, 16 v
> > +#define DECLARE_ALIGNED(n,t,v)  t av_used __attribute__
> ((aligned
> > (FFMIN(n, 16 v
> >  #define DECLARE_ASM_CONST(n,t,v)static const t av_used
> > __attribute__ ((aligned (FFMIN(n, 16 v
> >  #elif defined(__GNUC__) || defined(__clang__)
> >  #define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned (n)))
> v
> >
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] On in-tree external headers

2017-10-30 Thread Jan Ekstrom
On Mon, Oct 30, 2017 at 9:51 PM, Mark Thompson  wrote:
> PPS:
>
> The position stated above would imply removing the avisynth headers.  Can 
> anyone who uses it comment on what would be required for that?

Avisynth headers are available from the Avisynth SDK that comes with
each installer of Avisynth. The reason why some projects made their
own forks of the header is because of the useful features added during
the (rather long) development phase of the 2.6 release, during which
the GPL exceptions regarding the header were removed. And thus people
re-implemented them basing on the 2.5.8 stable header. Not sure if the
GPL exceptions were re-instated with the final release of Avisynth
2.6, and what is the status with the headers that come with the
Avisynth+ project. In any case, the headers are readily available from
various places.

Otherwise the only difference with the included header would be
Avxsynth compatibility? Which is quite a bit less used than original
Avisynth on wine, or Vapoursynth natively on *nix at this point. It
was one of those things where a company forked an old version of a
project and made it compile on *nix, while most of the plugins etc
were quite Windows-specific. The project still does exist and is
available, though.


Best regards,
Jan Ekström
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix missing used attribute for inline assembly variables

2017-10-30 Thread Thomas Köppe
(I must have forgotten to rebase first!)

On 30 October 2017 at 20:14, Thomas Köppe  wrote:

> It should be under __GNUC__ -- something went wrong with the patch! I'll
> make a new one. Sorry about that!
>
> On 30 October 2017 at 19:31, James Almer  wrote:
>
>> On 10/30/2017 4:17 PM, Thomas Köppe wrote:
>> > Variables used in inline assembly need to be marked with
>> attribute((used)).
>> > Static constants already were, via the define of DECLARE_ASM_CONST.
>> > But DECLARE_ALIGNED does not add this attribute, and some of the
>> variables
>> > defined with it are const only used in inline assembly, and therefore
>> > appeared dead.
>> >
>> > This change makes FFMPEG linkable with Clang's ThinLTO.
>> >
>> > ---
>> >  libavcodec/cabac.c | 2 +-
>> >  libavutil/mem.h| 2 +-
>> >  2 files changed, 2 insertions(+), 2 deletions(-)
>> >
>> > diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
>> > index dd2b057c6d..7321b48901 100644
>> > --- a/libavcodec/cabac.c
>> > +++ b/libavcodec/cabac.c
>> > @@ -32,7 +32,7 @@
>> >  #include "cabac.h"
>> >  #include "cabac_functions.h"
>> >
>> > -const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
>> > +DECLARE_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 +
>> > 4*64 + 63] = {
>> >  9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
>> >  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
>> >  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
>> > diff --git a/libavutil/mem.h b/libavutil/mem.h
>> > index 527cd03191..db06e109be 100644
>> > --- a/libavutil/mem.h
>> > +++ b/libavutil/mem.h
>> > @@ -98,7 +98,7 @@
>> >  AV_PRAGMA(DATA_ALIGN(v,n))  \
>> >  static const t __attribute__((aligned(n))) v
>> >  #elif defined(__DJGPP__)
>>
>> Is your intention to change the DJGPP path, or the gnuc/clang path?
>>
>> > -#define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned
>> > (FFMIN(n, 16 v
>> > +#define DECLARE_ALIGNED(n,t,v)  t av_used __attribute__
>> ((aligned
>> > (FFMIN(n, 16 v
>> >  #define DECLARE_ASM_CONST(n,t,v)static const t av_used
>> > __attribute__ ((aligned (FFMIN(n, 16 v
>> >  #elif defined(__GNUC__) || defined(__clang__)
>> >  #define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned
>> (n))) v
>> >
>>
>> ___
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix missing used attribute for inline assembly variables

2017-10-30 Thread Thomas Köppe
Please find below the actual patch:
==

Variables used in inline assembly need to be marked with attribute((used)).
Static constants already were, via the define of DECLARE_ASM_CONST.
But DECLARE_ALIGNED does not add this attribute, and some of the variables
defined with it are const only used in inline assembly, and therefore
appeared dead.

This change makes FFMPEG linkable with Clang's ThinLTO.
---
 libavcodec/cabac.c | 2 +-
 libavutil/mem.h| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index dd2b057c6d..7321b48901 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -32,7 +32,7 @@
 #include "cabac.h"
 #include "cabac_functions.h"

-const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
+DECLARE_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 +
4*64 + 63] = {
 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
diff --git a/libavutil/mem.h b/libavutil/mem.h
index 527cd03191..c4ee11af58 100644
--- a/libavutil/mem.h
+++ b/libavutil/mem.h
@@ -101,7 +101,7 @@
 #define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned
(FFMIN(n, 16 v
 #define DECLARE_ASM_CONST(n,t,v)static const t av_used
__attribute__ ((aligned (FFMIN(n, 16 v
 #elif defined(__GNUC__) || defined(__clang__)
-#define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned (n))) v
+#define DECLARE_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned
(n))) v
 #define DECLARE_ASM_CONST(n,t,v)static const t av_used
__attribute__ ((aligned (n))) v
 #elif defined(_MSC_VER)
 #define DECLARE_ALIGNED(n,t,v)  __declspec(align(n)) t v
-- 
2.15.0.rc2.357.g7e34df9404-goog


On 30 October 2017 at 20:25, Thomas Köppe  wrote:

> (I must have forgotten to rebase first!)
>
> On 30 October 2017 at 20:14, Thomas Köppe  wrote:
>
>> It should be under __GNUC__ -- something went wrong with the patch! I'll
>> make a new one. Sorry about that!
>>
>> On 30 October 2017 at 19:31, James Almer  wrote:
>>
>>> On 10/30/2017 4:17 PM, Thomas Köppe wrote:
>>> > Variables used in inline assembly need to be marked with
>>> attribute((used)).
>>> > Static constants already were, via the define of DECLARE_ASM_CONST.
>>> > But DECLARE_ALIGNED does not add this attribute, and some of the
>>> variables
>>> > defined with it are const only used in inline assembly, and therefore
>>> > appeared dead.
>>> >
>>> > This change makes FFMPEG linkable with Clang's ThinLTO.
>>> >
>>> > ---
>>> >  libavcodec/cabac.c | 2 +-
>>> >  libavutil/mem.h| 2 +-
>>> >  2 files changed, 2 insertions(+), 2 deletions(-)
>>> >
>>> > diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
>>> > index dd2b057c6d..7321b48901 100644
>>> > --- a/libavcodec/cabac.c
>>> > +++ b/libavcodec/cabac.c
>>> > @@ -32,7 +32,7 @@
>>> >  #include "cabac.h"
>>> >  #include "cabac_functions.h"
>>> >
>>> > -const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
>>> > +DECLARE_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 +
>>> > 4*64 + 63] = {
>>> >  9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
>>> >  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
>>> >  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
>>> > diff --git a/libavutil/mem.h b/libavutil/mem.h
>>> > index 527cd03191..db06e109be 100644
>>> > --- a/libavutil/mem.h
>>> > +++ b/libavutil/mem.h
>>> > @@ -98,7 +98,7 @@
>>> >  AV_PRAGMA(DATA_ALIGN(v,n))  \
>>> >  static const t __attribute__((aligned(n))) v
>>> >  #elif defined(__DJGPP__)
>>>
>>> Is your intention to change the DJGPP path, or the gnuc/clang path?
>>>
>>> > -#define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned
>>> > (FFMIN(n, 16 v
>>> > +#define DECLARE_ALIGNED(n,t,v)  t av_used __attribute__
>>> ((aligned
>>> > (FFMIN(n, 16 v
>>> >  #define DECLARE_ASM_CONST(n,t,v)static const t av_used
>>> > __attribute__ ((aligned (FFMIN(n, 16 v
>>> >  #elif defined(__GNUC__) || defined(__clang__)
>>> >  #define DECLARE_ALIGNED(n,t,v)  t __attribute__ ((aligned
>>> (n))) v
>>> >
>>>
>>> ___
>>> ffmpeg-devel mailing list
>>> ffmpeg-devel@ffmpeg.org
>>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>
>>
>>
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-10-30 Thread Mironov, Mikhail
> > +static void AMF_CDECL_CALL AMFTraceWriter_Write(AMFTraceWriter
> *pThis,
> > +const wchar_t *scope, const wchar_t *message)
> > +{
> > +AmfTraceWriter *tracer = (AmfTraceWriter*)pThis;
> > +av_log(tracer->avctx, AV_LOG_DEBUG, "%ls: %ls", scope, message);
> 
> Does the message necessarily include a newline already?

Yes.

> > +init_fun = (AMFInit_Fn)dlsym(ctx->library,
> AMF_INIT_FUNCTION_NAME);
> > +AMF_RETURN_IF_FALSE(ctx, init_fun != NULL, AVERROR_UNKNOWN,
> "DLL %s failed to find function %s. \n", AMF_DLL_NAMEA,
> AMF_INIT_FUNCTION_NAME);
> 
> I think do s/ \n/\n/ for all of these messages.

Sorry, didn't get this.

> 
> > +
> > +version_fun = (AMFQueryVersion_Fn)dlsym(ctx->library,
> AMF_QUERY_VERSION_FUNCTION_NAME);
> > +AMF_RETURN_IF_FALSE(ctx, version_fun != NULL,
> AVERROR_UNKNOWN, "DLL %s failed to find function %s. \n",
> AMF_DLL_NAMEA, AMF_QUERY_VERSION_FUNCTION_NAME);
> > +
> > +res = version_fun(&ctx->version);
> > +AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s
> failed with error %d. \n", AMF_QUERY_VERSION_FUNCTION_NAME, res);
> > +res = init_fun(AMF_FULL_VERSION, &ctx->factory);
> > +AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s
> failed with error %d. \n", AMF_INIT_FUNCTION_NAME, res);
> > +res = ctx->factory->pVtbl->GetTrace(ctx->factory, &ctx->trace);
> > +AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN,
> "GetTrace() failed with error %d. \n", res);
> > +res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug);
> > +AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN,
> "GetDebug() failed with error %d. \n", res);
> > +return 0;
> > +}
> > +
> > +static int amf_init_context(AVCodecContext *avctx)
> > +{
> > +AmfContext *ctx = avctx->priv_data;
> > +AMF_RESULT  res = AMF_OK;
> > +
> > +// the return of these functions indicates old state and do not affect
> behaviour
> > +ctx->trace->pVtbl->EnableWriter(ctx->trace,
> AMF_TRACE_WRITER_CONSOLE, 0);
> > +#if AMF_DEBUG_TRACE
> > +ctx->trace->pVtbl->EnableWriter(ctx->trace,
> AMF_TRACE_WRITER_DEBUG_OUTPUT, 1);
> > +ctx->trace->pVtbl->SetWriterLevel(ctx->trace,
> AMF_TRACE_WRITER_DEBUG_OUTPUT, AMF_TRACE_TRACE);
> > +ctx->trace->pVtbl->SetGlobalLevel(ctx->trace, AMF_TRACE_TRACE);
> > +#else
> > +ctx->trace->pVtbl->EnableWriter(ctx->trace,
> AMF_TRACE_WRITER_DEBUG_OUTPUT, 0);
> > +#endif
> 
> I don't much like this compile-time option.  What sort of messages does the
> trace writer actually give you?  Will a user ever want to enable it?

Two points:
1. There is extensive AMF logging that can help diagnose a problem. Do we want 
to have it all time in AV_LOG_DEBUG?
2. AMD can trace to debug output and this is useful but for normal ffmpeg 
operation it is under #ifdef.

> 
> > +
> > +static GUID  AMFTextureArrayIndexGUID =
> AMFTextureArrayIndexGUIDDef;
> 
> GUID is a Windows type, should this be AMFGuid?  (I tried removing the
> check and compiling on Linux, other than the D3D11 stuff this is the only
> error.)
> 

This is Windows type and used with Windows interface ID3D11Texture2D.
When Linux support is added all this section will be under #ifdef.

> > +
> > +int ff_amf_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
> > +const AVFrame *frame, int *got_packet)
> > +{
> > +int ret = 0;
> > +AMF_RESULT  res = AMF_OK;
> > +AmfContext *ctx = avctx->priv_data;
> > +AMFSurface *surface = NULL;
> > +AMFData*data = NULL;
> > +amf_bool   submitted = 0;
> > +
> > +while (!submitted) {
> > +if (!frame) { // submit drain
> > +if (!ctx->eof) { // submit drain onre time only
> > +res = ctx->encoder->pVtbl->Drain(ctx->encoder);
> > +if (res == AMF_INPUT_FULL) {
> > +av_usleep(1000); // input queue is full: wait, poll 
> > and submit
> Drain again
> > + // need to get some output and try 
> > again
> > +} else if (res == AMF_OK) {
> > +ctx->eof = 1; // drain started
> > +submitted = 1;
> > +}
> > +}
> > +} else { // submit frame
> > +if (surface == NULL) { // prepare surface from frame one time 
> > only
> > +if (frame->hw_frames_ctx && ( // HW frame detected
> > +  // check if the same 
> > hw_frames_ctx as used in
> initialization
> > +(ctx->hw_frames_ctx && frame->hw_frames_ctx->data == 
> > ctx-
> >hw_frames_ctx->data) ||
> > +// check if the same hw_device_ctx as used in 
> > initialization
> > +(ctx->hw_device_ctx && ((AVHWFramesContext*)frame-
> >hw_frames_ctx->data)->device_ctx ==
> > +(AVHWDeviceContext*)ctx->hw_device_ctx->data)
> > +)) {

Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-10-30 Thread Carl Eugen Hoyos
2017-10-30 18:56 GMT+01:00 mmironov :

[...]

> +const enum AVPixelFormat ff_amf_pix_fmts[] = {
> +AV_PIX_FMT_NV12,
> +AV_PIX_FMT_0RGB32,
> +AV_PIX_FMT_0BGR32,
> +AV_PIX_FMT_YUV420P,
> +AV_PIX_FMT_D3D11,
> +AV_PIX_FMT_NONE
> +};
> +
> +typedef struct FormatMap {
> +enum AVPixelFormat   av_format;
> +enum AMF_SURFACE_FORMAT  amf_format;
> +} FormatMap;
> +
> +static const FormatMap format_map[] =
> +{
> +{ AV_PIX_FMT_NONE,   AMF_SURFACE_UNKNOWN },
> +{ AV_PIX_FMT_NV12,   AMF_SURFACE_NV12 },

> +{ AV_PIX_FMT_0BGR32,   AMF_SURFACE_BGRA },
> +{ AV_PIX_FMT_0RGB32,   AMF_SURFACE_RGBA },

On little-endian, this is different from what you originally sent:
Which one is correct? (Visually)

> +{ AV_PIX_FMT_GRAY8,  AMF_SURFACE_GRAY8 },

> +{ AV_PIX_FMT_BGR0,   AMF_SURFACE_BGRA },

Please remove this line to reduce the confusion.
(Or fix above if this line is correct.)

> +{ AV_PIX_FMT_YUV420P,AMF_SURFACE_YV12 },
> +{ AV_PIX_FMT_YUV420P,AMF_SURFACE_YUV420P },

Do you agree that it is impossible that both lines are correct?

> +{ AV_PIX_FMT_YUYV422,AMF_SURFACE_YUY2 },
> +{ AV_PIX_FMT_D3D11,  AMF_SURFACE_NV12 },

Carl Eugen

[...]
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/2] avcodec/xan: Improve overlapping check

2017-10-30 Thread Michael Niedermayer
Fixes: memcpy-param-overlap
Fixes: 3612/clusterfuzz-testcase-minimized-6393461273001984

Found-by: continuous fuzzing process 
https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
Signed-off-by: Michael Niedermayer 
---
 libavcodec/xan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/xan.c b/libavcodec/xan.c
index 4c01c0013f..8b4ec82405 100644
--- a/libavcodec/xan.c
+++ b/libavcodec/xan.c
@@ -263,7 +263,7 @@ static inline void xan_wc3_copy_pixel_run(XanContext *s, 
AVFrame *frame,
 prevframe_index = (y + motion_y) * stride + x + motion_x;
 prevframe_x = x + motion_x;
 
-if (prev_palette_plane == palette_plane && FFABS(curframe_index - 
prevframe_index) < pixel_count) {
+if (prev_palette_plane == palette_plane && FFABS(motion_x + 
width*motion_y) < pixel_count) {
  avpriv_request_sample(s->avctx, "Overlapping copy");
  return ;
 }
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/2] avcodec/h264idct_template: Fix integer overflows in ff_h264_idct8_add()

2017-10-30 Thread Michael Niedermayer
Fixes: runtime error: signed integer overflow: 924846844 + 1457520640 cannot be 
represented in type 'int'
Fixes: 3416/clusterfuzz-testcase-minimized-6125587682820096

Found-by: continuous fuzzing process 
https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
Signed-off-by: Michael Niedermayer 
---
 libavcodec/h264idct_template.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index 3ad58c4a11..fbd07cb8d5 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -107,10 +107,10 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t 
*_block, int stride){
 }
 for( i = 0; i < 8; i++ )
 {
-const unsigned a0 =  block[0+i*8] + block[4+i*8];
-const unsigned a2 =  block[0+i*8] - block[4+i*8];
-const unsigned a4 = (block[2+i*8]>>1) - block[6+i*8];
-const unsigned a6 = (block[6+i*8]>>1) + block[2+i*8];
+const unsigned a0 =  block[0+i*8] + (unsigned)block[4+i*8];
+const unsigned a2 =  block[0+i*8] - (unsigned)block[4+i*8];
+const unsigned a4 = (block[2+i*8]>>1) - (unsigned)block[6+i*8];
+const unsigned a6 = (block[6+i*8]>>1) + (unsigned)block[2+i*8];
 
 const unsigned b0 = a0 + a6;
 const unsigned b2 = a2 + a4;
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-10-30 Thread Mark Thompson
On 30/10/17 21:30, Mironov, Mikhail wrote:
>>> +static void AMF_CDECL_CALL AMFTraceWriter_Write(AMFTraceWriter
>> *pThis,
>>> +const wchar_t *scope, const wchar_t *message)
>>> +{
>>> +AmfTraceWriter *tracer = (AmfTraceWriter*)pThis;
>>> +av_log(tracer->avctx, AV_LOG_DEBUG, "%ls: %ls", scope, message);
>>
>> Does the message necessarily include a newline already?
> 
> Yes.
> 
>>> +init_fun = (AMFInit_Fn)dlsym(ctx->library,
>> AMF_INIT_FUNCTION_NAME);
>>> +AMF_RETURN_IF_FALSE(ctx, init_fun != NULL, AVERROR_UNKNOWN,
>> "DLL %s failed to find function %s. \n", AMF_DLL_NAMEA,
>> AMF_INIT_FUNCTION_NAME);
>>
>> I think do s/ \n/\n/ for all of these messages.
> 
> Sorry, didn't get this.

Most of your messages end with a space before the newline, the space probably 
shouldn't be there.

>>> +
>>> +version_fun = (AMFQueryVersion_Fn)dlsym(ctx->library,
>> AMF_QUERY_VERSION_FUNCTION_NAME);
>>> +AMF_RETURN_IF_FALSE(ctx, version_fun != NULL,
>> AVERROR_UNKNOWN, "DLL %s failed to find function %s. \n",
>> AMF_DLL_NAMEA, AMF_QUERY_VERSION_FUNCTION_NAME);
>>> +
>>> +res = version_fun(&ctx->version);
>>> +AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s
>> failed with error %d. \n", AMF_QUERY_VERSION_FUNCTION_NAME, res);
>>> +res = init_fun(AMF_FULL_VERSION, &ctx->factory);
>>> +AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s
>> failed with error %d. \n", AMF_INIT_FUNCTION_NAME, res);
>>> +res = ctx->factory->pVtbl->GetTrace(ctx->factory, &ctx->trace);
>>> +AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN,
>> "GetTrace() failed with error %d. \n", res);
>>> +res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug);
>>> +AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN,
>> "GetDebug() failed with error %d. \n", res);
>>> +return 0;
>>> +}
>>> +
>>> +static int amf_init_context(AVCodecContext *avctx)
>>> +{
>>> +AmfContext *ctx = avctx->priv_data;
>>> +AMF_RESULT  res = AMF_OK;
>>> +
>>> +// the return of these functions indicates old state and do not affect
>> behaviour
>>> +ctx->trace->pVtbl->EnableWriter(ctx->trace,
>> AMF_TRACE_WRITER_CONSOLE, 0);
>>> +#if AMF_DEBUG_TRACE
>>> +ctx->trace->pVtbl->EnableWriter(ctx->trace,
>> AMF_TRACE_WRITER_DEBUG_OUTPUT, 1);
>>> +ctx->trace->pVtbl->SetWriterLevel(ctx->trace,
>> AMF_TRACE_WRITER_DEBUG_OUTPUT, AMF_TRACE_TRACE);
>>> +ctx->trace->pVtbl->SetGlobalLevel(ctx->trace, AMF_TRACE_TRACE);
>>> +#else
>>> +ctx->trace->pVtbl->EnableWriter(ctx->trace,
>> AMF_TRACE_WRITER_DEBUG_OUTPUT, 0);
>>> +#endif
>>
>> I don't much like this compile-time option.  What sort of messages does the
>> trace writer actually give you?  Will a user ever want to enable it?
> 
> Two points:
> 1. There is extensive AMF logging that can help diagnose a problem. Do we 
> want to have it all time in AV_LOG_DEBUG?
> 2. AMD can trace to debug output and this is useful but for normal ffmpeg 
> operation it is under #ifdef.

Help who diagnose a problem?  Either it is useful to a user, in which case put 
it behind a real option, or it isn't, in which case don't include it at all.  A 
compile-time option just encourages bitrot on whichever side is not default.

>>> +
>>> +static GUID  AMFTextureArrayIndexGUID =
>> AMFTextureArrayIndexGUIDDef;
>>
>> GUID is a Windows type, should this be AMFGuid?  (I tried removing the
>> check and compiling on Linux, other than the D3D11 stuff this is the only
>> error.)
>>
> 
> This is Windows type and used with Windows interface ID3D11Texture2D.
> When Linux support is added all this section will be under #ifdef.

It might be cleaner to put it inside the function (see below).  Also, it should 
be const.

>>> +
>>> +int ff_amf_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
>>> +const AVFrame *frame, int *got_packet)
>>> +{
>>> +int ret = 0;
>>> +AMF_RESULT  res = AMF_OK;
>>> +AmfContext *ctx = avctx->priv_data;
>>> +AMFSurface *surface = NULL;
>>> +AMFData*data = NULL;
>>> +amf_bool   submitted = 0;
>>> +
>>> +while (!submitted) {
>>> +if (!frame) { // submit drain
>>> +if (!ctx->eof) { // submit drain onre time only
>>> +res = ctx->encoder->pVtbl->Drain(ctx->encoder);
>>> +if (res == AMF_INPUT_FULL) {
>>> +av_usleep(1000); // input queue is full: wait, poll 
>>> and submit
>> Drain again
>>> + // need to get some output and try 
>>> again
>>> +} else if (res == AMF_OK) {
>>> +ctx->eof = 1; // drain started
>>> +submitted = 1;
>>> +}
>>> +}
>>> +} else { // submit frame
>>> +if (surface == NULL) { // prepare surface from frame one time 
>>> only
>>> +if (frame->hw_frames_ctx && ( // HW frame detected
>>> +   

Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-10-30 Thread Carl Eugen Hoyos
2017-10-30 23:35 GMT+01:00 Mark Thompson :
> On 30/10/17 21:30, Mironov, Mikhail wrote:
 +static void AMF_CDECL_CALL AMFTraceWriter_Write(AMFTraceWriter
>>> *pThis,
 +const wchar_t *scope, const wchar_t *message)
 +{
 +AmfTraceWriter *tracer = (AmfTraceWriter*)pThis;
 +av_log(tracer->avctx, AV_LOG_DEBUG, "%ls: %ls", scope, message);
>>>
>>> Does the message necessarily include a newline already?
>>
>> Yes.
>>
 +init_fun = (AMFInit_Fn)dlsym(ctx->library,
>>> AMF_INIT_FUNCTION_NAME);
 +AMF_RETURN_IF_FALSE(ctx, init_fun != NULL, AVERROR_UNKNOWN,
>>> "DLL %s failed to find function %s. \n", AMF_DLL_NAMEA,
>>> AMF_INIT_FUNCTION_NAME);
>>>
>>> I think do s/ \n/\n/ for all of these messages.
>>
>> Sorry, didn't get this.
>
> Most of your messages end with a space before the newline,
> the space probably shouldn't be there.

Correct: While I like the space, it shouldn't be there for
consistency.

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 0/8] AVX-512 support (v.1)

2017-10-30 Thread Carl Eugen Hoyos
2017-10-30 14:08 GMT+01:00 James Darnley :

> There are 2 commits here that I don't intend to be applied (now).

> The first is the alignment increase reported by avutil.

Why is that bad?

> The second is the v210enc function, it passes checkasm but
> it is not any faster.  It is there to show that all the previous
> commits work correctly, namely: configure checks, cpuid
> detection, x86inc changes, checkasm.

Sound like very good reasons to actually commit.

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH]lavf/latmenc: Error out for invalid codecs

2017-10-30 Thread Carl Eugen Hoyos
Hi!

Attached patch makes sure the loas muxer does not try to write
anything but aac and latm.

Please comment, Carl Eugen
From 9f8f39b402f77b53613a395129f96feee5e873ba Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos 
Date: Mon, 30 Oct 2017 23:49:29 +0100
Subject: [PATCH] lavf/latmenc: Error out for invalid codecs.

---
 libavformat/latmenc.c |4 
 1 file changed, 4 insertions(+)

diff --git a/libavformat/latmenc.c b/libavformat/latmenc.c
index c919976..29a74e3 100644
--- a/libavformat/latmenc.c
+++ b/libavformat/latmenc.c
@@ -89,6 +89,10 @@ static int latm_write_header(AVFormatContext *s)
 
 if (par->codec_id == AV_CODEC_ID_AAC_LATM)
 return 0;
+if (par->codec_id != AV_CODEC_ID_AAC) {
+av_log(ctx, AV_LOG_ERROR, "Only AAC and LATM are supported\n");
+return AVERROR_INVALIDDATA;
+}
 
 if (par->extradata_size > 0 &&
 latm_decode_extradata(ctx, par->extradata, par->extradata_size) < 0)
-- 
1.7.10.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-10-30 Thread Mironov, Mikhail


Mikhail

> -Original Message-
> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf
> Of Carl Eugen Hoyos
> Sent: October 30, 2017 6:19 PM
> To: FFmpeg development discussions and patches  de...@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD
> GPUs based on AMF SDK
> 
> 2017-10-30 18:56 GMT+01:00 mmironov :
> 
> [...]
> 
> > +const enum AVPixelFormat ff_amf_pix_fmts[] = {
> > +AV_PIX_FMT_NV12,
> > +AV_PIX_FMT_0RGB32,
> > +AV_PIX_FMT_0BGR32,
> > +AV_PIX_FMT_YUV420P,
> > +AV_PIX_FMT_D3D11,
> > +AV_PIX_FMT_NONE
> > +};
> > +
> > +typedef struct FormatMap {
> > +enum AVPixelFormat   av_format;
> > +enum AMF_SURFACE_FORMAT  amf_format; } FormatMap;
> > +
> > +static const FormatMap format_map[] = {
> > +{ AV_PIX_FMT_NONE,   AMF_SURFACE_UNKNOWN },
> > +{ AV_PIX_FMT_NV12,   AMF_SURFACE_NV12 },
> 
> > +{ AV_PIX_FMT_0BGR32,   AMF_SURFACE_BGRA },
> > +{ AV_PIX_FMT_0RGB32,   AMF_SURFACE_RGBA },
> 
> On little-endian, this is different from what you originally sent:
> Which one is correct? (Visually)

It should be RGB0 and BGR0 all the time. I will correct.

> 
> > +{ AV_PIX_FMT_GRAY8,  AMF_SURFACE_GRAY8 },
> 
> > +{ AV_PIX_FMT_BGR0,   AMF_SURFACE_BGRA },
> 
> Please remove this line to reduce the confusion.
> (Or fix above if this line is correct.)


OK

> 
> > +{ AV_PIX_FMT_YUV420P,AMF_SURFACE_YV12 },
> > +{ AV_PIX_FMT_YUV420P,AMF_SURFACE_YUV420P },
> 
> Do you agree that it is impossible that both lines are correct?

Agree. Will fix.

> 
> > +{ AV_PIX_FMT_YUYV422,AMF_SURFACE_YUY2 },
> > +{ AV_PIX_FMT_D3D11,  AMF_SURFACE_NV12 },
> 
> Carl Eugen
> 
> [...]
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-10-30 Thread Mironov, Mikhail
> >>> +
> >>> +int ff_amf_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
> >>> +const AVFrame *frame, int *got_packet) {
> >>> +int ret = 0;
> >>> +AMF_RESULT  res = AMF_OK;
> >>> +AmfContext *ctx = avctx->priv_data;
> >>> +AMFSurface *surface = NULL;
> >>> +AMFData*data = NULL;
> >>> +amf_bool   submitted = 0;
> >>> +
> >>> +while (!submitted) {
> >>> +if (!frame) { // submit drain
> >>> +if (!ctx->eof) { // submit drain onre time only
> >>> +res = ctx->encoder->pVtbl->Drain(ctx->encoder);
> >>> +if (res == AMF_INPUT_FULL) {
> >>> +av_usleep(1000); // input queue is full: wait,
> >>> + poll and submit
> >> Drain again
> >>> + // need to get some output and try 
> >>> again
> >>> +} else if (res == AMF_OK) {
> >>> +ctx->eof = 1; // drain started
> >>> +submitted = 1;
> >>> +}
> >>> +}
> >>> +} else { // submit frame
> >>> +if (surface == NULL) { // prepare surface from frame one time
> only
> >>> +if (frame->hw_frames_ctx && ( // HW frame detected
> >>> +  // check if the same
> >>> + hw_frames_ctx as used in
> >> initialization
> >>> +(ctx->hw_frames_ctx &&
> >>> + frame->hw_frames_ctx->data == ctx-
> >>> hw_frames_ctx->data) ||
> >>> +// check if the same hw_device_ctx as used in 
> >>> initialization
> >>> +(ctx->hw_device_ctx &&
> >>> + ((AVHWFramesContext*)frame-
> >>> hw_frames_ctx->data)->device_ctx ==
> >>> +(AVHWDeviceContext*)ctx->hw_device_ctx->data)
> >>> +)) {
> 
> (Here.)
> 
> >>> +ID3D11Texture2D* texture =
> >>> + (ID3D11Texture2D*)frame-
> >>> data[0]; // actual texture
> >>> +int index = (int)(size_t)frame->data[1]; //
> >>> + index is a slice in
> >> texture array is - set to tell AMF which slice to use
> >>
> >> (int)(intptr_t)frame->data[1];
> >>
> >>> +texture->lpVtbl->SetPrivateData(texture,
> >> &AMFTextureArrayIndexGUID, sizeof(index), &index);
> >>> +
> >>> +res =
> >>> + ctx->context->pVtbl->CreateSurfaceFromDX11Native(ctx-
> >>> context, texture, &surface, NULL); // wrap to AMF surface
> >>> +surface->pVtbl->SetCrop(surface, 0, 0,
> >>> + frame->width, frame-
> >>> height); // decode surfaces are vertically aligned by 16 tell AMF
> >>> real size
> >>
> >> "decode surfaces"?  These need not come from a decoder.  Does it work
> >> with hwupload?
> >>
> >>> +surface->pVtbl->SetPts(surface, frame->pts);
> >>> +} else {
> >>> +res =
> >>> + ctx->context->pVtbl->AllocSurface(ctx->context,
> >> AMF_MEMORY_HOST, ctx->format, avctx->width, avctx->height,
> &surface);
> >>> +AMF_RETURN_IF_FALSE(ctx, res == AMF_OK,
> >>> + AVERROR_BUG,
> >> "AllocSurface() failed  with error %d \n", res);
> >>> +amf_copy_surface(avctx, frame, surface);
> >>> +}
> >>> +}
> >>> +// encode
> >>> +res = ctx->encoder->pVtbl->SubmitInput(ctx->encoder,
> >> (AMFData*)surface);
> >>> +if (res == AMF_INPUT_FULL) { // handle full queue
> >>> +av_usleep(1000); // input queue is full: wait, poll
> >>> + and submit
> >> surface again
> >>> +} else {
> >>> +surface->pVtbl->Release(surface);
> >>> +surface = NULL;
> >>> +AMF_RETURN_IF_FALSE(ctx, res == AMF_OK,
> >> AVERROR_UNKNOWN, "SubmitInput() failed with error %d \n", res);
> >>> +submitted = 1;
> >>> +}
> >>> +}
> >>> +// poll results
> >>> +if (!data) {
> >>> +res = ctx->encoder->pVtbl->QueryOutput(ctx->encoder, &data);
> >>> +if (data) {
> >>> +AMFBuffer* buffer;
> >>> +AMFGuid guid = IID_AMFBuffer();
> >>> +data->pVtbl->QueryInterface(data, &guid,
> >>> + (void**)&buffer); //
> >> query for buffer interface
> >>> +ret = amf_copy_buffer(avctx, pkt, buffer);
> >>> +if (!ret)
> >>> +*got_packet = 1;
> >>> +buffer->pVtbl->Release(buffer);
> >>> +data->pVtbl->Release(data);
> >>> +if (ctx->eof) {
> >>> +submitted = 1; // we are in the drain state - no 
> >>> submissions
> >>> +}
> >>> +} else if (res == AMF_EOF) {
> >>> +submitted = 1; // drain complete
> >>> +} else {
> >>> +if (!submitted) {
> >>> +av_usleep(1000); // wait and poll again
> >>> +   

[FFmpeg-devel] [PATCH]lavf/mxfenc: Support 60fps output

2017-10-30 Thread Carl Eugen Hoyos
Hi!

Kasper Folman has reported that FFmpeg does not write 60fps mxf output
and has provided a 60fps mxf file on -users.

Attached patch allows 60fps output.

Please comment, Carl Eugen
From bb5b071db9a8977bd1a7dea579b4d244e17ca3a3 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos 
Date: Tue, 31 Oct 2017 00:19:38 +0100
Subject: [PATCH] lavf/mxfenc: Support 60fps output.

---
 libavformat/mxf.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/mxf.c b/libavformat/mxf.c
index bfc3218..5994b09 100644
--- a/libavformat/mxf.c
+++ b/libavformat/mxf.c
@@ -137,6 +137,7 @@ static const MXFSamplesPerFrame mxf_spf[] = {
 { { 1001, 6 }, { 801,  801,  801,  801,  800,  0 } }, // NTSC 59.94
 { { 1, 25 },   { 1920, 0,0,0,0,0 } }, // PAL 25
 { { 1, 50 },   { 960,  0,0,0,0,0 } }, // PAL 50
+{ { 1, 60 },   { 800,  0,0,0,0,0 } },
 };
 
 static const AVRational mxf_time_base[] = {
@@ -146,6 +147,7 @@ static const AVRational mxf_time_base[] = {
 { 1001, 6 },
 { 1, 25 },
 { 1, 50 },
+{ 1, 60 },
 { 0, 0}
 };
 
-- 
1.7.10.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [RFC]lswr/rematrix: Support s32p

2017-10-30 Thread Carl Eugen Hoyos
2017-10-30 9:47 GMT+01:00 Muhammad Faiz :
> On Sun, Oct 29, 2017 at 3:55 AM, Carl Eugen Hoyos  wrote:
>> Hi!
>>
>> Attached patch fixes a random testcase for ticket #6785 here but I
>> don't know if this is the correct fix.
>>
>> Please review, Carl Eugen
>>
>> From a93b9309d74f5eadece371ee1e682d266af6cd83 Mon Sep 17 00:00:00 2001
>> From: Carl Eugen Hoyos 
>> Date: Sat, 28 Oct 2017 22:52:02 +0200
>> Subject: [PATCH] lswr/rematrix: Support s32p.
>>
>> Fixes ticket #6785.
>> ---
>>  libswresample/rematrix.c |   21 +++--
>>  1 file changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
>> index 66a43c1..a6aa6b0 100644
>> --- a/libswresample/rematrix.c
>> +++ b/libswresample/rematrix.c
>> @@ -445,14 +445,23 @@ av_cold int swri_rematrix_init(SwrContext *s){
>>  s->mix_2_1_f = (mix_2_1_func_type*)sum2_double;
>>  s->mix_any_f = (mix_any_func_type*)get_mix_any_func_double(s);
>>  }else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){
>> -// Only for dithering currently
>> -// s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double));
>> -s->native_one= av_mallocz(sizeof(int));
>> +s->native_one= av_mallocz(sizeof(int64_t));
>>  if (!s->native_one)
>>  return AVERROR(ENOMEM);
>> -// for (i = 0; i < nb_out; i++)
>> -// for (j = 0; j < nb_in; j++)
>> -// ((double*)s->native_matrix)[i * nb_in + j] = 
>> s->matrix[i][j];
>> +s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int));
>> +if (!s->native_matrix) {
>> +av_freep(&s->native_one);
>> +return AVERROR(ENOMEM);
>> +}
>> +for (i = 0; i < nb_out; i++) {
>> +double rem = 0;
>> +
>> +for (j = 0; j < nb_in; j++) {
>> +double target = s->matrix[i][j] * 32768 + rem;
>> +((int*)s->native_matrix)[i * nb_in + j] = lrintf(target);
>> +rem += target - ((int64_t*)s->native_matrix)[i * nb_in + j];
>> +}
>> +}
>>  *((int*)s->native_one) = 32768;
>>  s->mix_1_1_f = (mix_1_1_func_type*)copy_s32;
>>  s->mix_2_1_f = (mix_2_1_func_type*)sum2_s32;
>
> The code is confusing.
> Which is the type of native_matrix and native_one? int or int64_t?

New patch attached.

Please review, Carl Eugen
From a93b9309d74f5eadece371ee1e682d266af6cd83 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos 
Date: Sat, 28 Oct 2017 22:52:02 +0200
Subject: [PATCH] lswr/rematrix: Support s32p.

Fixes ticket #6785.
---
 libswresample/rematrix.c |   21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
index 66a43c1..a6aa6b0 100644
--- a/libswresample/rematrix.c
+++ b/libswresample/rematrix.c
@@ -445,14 +445,23 @@ av_cold int swri_rematrix_init(SwrContext *s){
 s->mix_2_1_f = (mix_2_1_func_type*)sum2_double;
 s->mix_any_f = (mix_any_func_type*)get_mix_any_func_double(s);
 }else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){
-// Only for dithering currently
-// s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double));
-s->native_one= av_mallocz(sizeof(int));
+s->native_one= av_mallocz(sizeof(int));
 if (!s->native_one)
 return AVERROR(ENOMEM);
-// for (i = 0; i < nb_out; i++)
-// for (j = 0; j < nb_in; j++)
-// ((double*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j];
+s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int));
+if (!s->native_matrix) {
+av_freep(&s->native_one);
+return AVERROR(ENOMEM);
+}
+for (i = 0; i < nb_out; i++) {
+double rem = 0;
+
+for (j = 0; j < nb_in; j++) {
+double target = s->matrix[i][j] * 32768 + rem;
+((int*)s->native_matrix)[i * nb_in + j] = lrintf(target);
+rem += target - ((int*)s->native_matrix)[i * nb_in + j];
+}
+}
 *((int*)s->native_one) = 32768;
 s->mix_1_1_f = (mix_1_1_func_type*)copy_s32;
 s->mix_2_1_f = (mix_2_1_func_type*)sum2_s32;
-- 
1.7.10.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 0/8] AVX-512 support (v.1)

2017-10-30 Thread Martin Vignali
Hello,


> > The second is the v210enc function, it passes checkasm but
> > it is not any faster.  It is there to show that all the previous
> > commits work correctly, namely: configure checks, cpuid
> > detection, x86inc changes, checkasm.
>
> 
>

If the goal is to check other patches, you can probably use simpler func,
but "really" adapt for AVX512
like blockdsp and/or losslessvideodsp->addbytes, who can probably switch to
AVX512, without lot of work

for losslessvideodsp, (see libavcodec/lossless_videodsp : add add_bytes
AVX2 discussion (not yet apply))
the mmsize, is basically enough to make basic test

Both have checkasm already write, and can really test the "zmm part"

Martin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] lavfi/testsrc2: fix hang with very small sizes.

2017-10-30 Thread Martin Vignali
2017-10-30 10:14 GMT+01:00 Nicolas George :

> Move a subtraction to the other side of the equal to avoid overflows.
>
> Signed-off-by: Nicolas George 
> ---
>  libavfilter/vsrc_testsrc.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/libavfilter/vsrc_testsrc.c b/libavfilter/vsrc_testsrc.c
> index fe0d50aa41..a790974d14 100644
> --- a/libavfilter/vsrc_testsrc.c
> +++ b/libavfilter/vsrc_testsrc.c
> @@ -857,8 +857,8 @@ static void test2_fill_picture(AVFilterContext *ctx,
> AVFrame *frame)
>  uint8_t alpha[256];
>
>  r = s->pts;
> -for (y = ymin; y < ymax - 15; y += 16) {
> -for (x = xmin; x < xmax - 15; x += 16) {
> +for (y = ymin; y + 15 < ymax; y += 16) {
> +for (x = xmin; x + 15 < xmax; x += 16) {
>  if ((x ^ y) & 16)
>  continue;
>  for (i = 0; i < 256; i++) {
> --
> 2.14.2


lgtm
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 4/8] avutil: add alignment needed for AVX-512

2017-10-30 Thread James Almer
On 10/30/2017 10:08 AM, James Darnley wrote:
> ---
>  libavutil/x86/cpu.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
> index b22a950b79..c56f2a8754 100644
> --- a/libavutil/x86/cpu.c
> +++ b/libavutil/x86/cpu.c
> @@ -247,6 +247,8 @@ size_t ff_get_cpu_max_align_x86(void)
>  {
>  int flags = av_get_cpu_flags();
>  
> +if (flags & AV_CPU_FLAG_AVX512)
> +return 64;
>  if (flags & (AV_CPU_FLAG_AVX2  |
>   AV_CPU_FLAG_AVX   |
>   AV_CPU_FLAG_XOP   |
> 

av_get_cpu_max_align() is not currently being used to align buffers in
libavutil/libavcodec, so you also need to change the STRIDE_ALIGN
constant to 64 before you can feasibly start using avx512 functions.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] On in-tree external headers

2017-10-30 Thread Stephen Hutchinson

On 10/30/2017 4:16 PM, Jan Ekstrom wrote:

On Mon, Oct 30, 2017 at 9:51 PM, Mark Thompson  wrote:

PPS:

The position stated above would imply removing the avisynth headers.  Can 
anyone who uses it comment on what would be required for that?


Avisynth headers are available from the Avisynth SDK that comes with
each installer of Avisynth. The reason why some projects made their
own forks of the header is because of the useful features added during
the (rather long) development phase of the 2.6 release, during which
the GPL exceptions regarding the header were removed. And thus people
re-implemented them basing on the 2.5.8 stable header. Not sure if the
GPL exceptions were re-instated with the final release of Avisynth
2.6, and what is the status with the headers that come with the
Avisynth+ project. In any case, the headers are readily available from
various places.

Otherwise the only difference with the included header would be
Avxsynth compatibility? Which is quite a bit less used than original
Avisynth on wine, or Vapoursynth natively on *nix at this point. It
was one of those things where a company forked an old version of a
project and made it compile on *nix, while most of the plugins etc
were quite Windows-specific. The project still does exist and is
available, though.



The short version: it's a mess.  The slightly longer version:
there's a regression in the current version of the AviSynth+
headers that would mean requiring users to jump through some
additional hoops.

The flimsiest argument would be that AviSynth support, from the
time the first demuxer was added in 2006, was always just a
simple --enable-avisynth and it just worked.  The first demuxer
didn't require external headers (because it was accessed via VfW),
so to preserve the behavior behind --enable-avisynth (and for
reasons I detail below), the correct headers were included in
compat/avisynth/.

AviSynth has two different API interfaces: the C++ interface as
implemented in avisynth.h, and the C interface as implemented in
avisynth_c.h.  FFmpeg's support of AviSynth (and x264's) use the
C interface header.

The licensing mess that occurred during AviSynth 2.6's development
only affected the C++ header: the GPL exception was never taken
out of avisynth_c.h, but upstream AviSynth 2.6 also never added
equal features to the C header until late in 2.6's dev cycle (the
proper non-alpha/beta/RC release of 2.6 didn't occur until May 2016,
and yes, by that point the GPL exception had been added back to the
C++ header). This meant that in 2011(?), when x264 wanted to add support
for 2.6's additional pixel formats (YV16/yuv422p, YV24/yuv444p), it
needed to ship a modified version of avisynth_c.h that didn't exist
upstream.

When the AviSynth demuxer in libavformat was rewritten in 2012-2013,
it used x264's forked headers.  In both x264's and FFmpeg's cases,
the local copies of the headers were provided so that users wouldn't
have to manually copy the headers around and hope they get the right
one (since 2.5's headers would not work correctly with the addition
of yuv422p, yuv444p, and gray).  AvxSynth's were included mostly so that 
the inclusion behavior is the same - AviSynth as a Windows project is

MSVC-specific and didn't have a standard installation Makefile for
cross-compiling in its source tree, so the easiest way to ensure users
grabbed the correct header was by putting them in compat.

AviSynth+ altered this dynamic in some ways.  Notably, AviSynth+ 
supports *tons* of new pixfmts (support for these was added to FFmpeg

in 2016, for the most part), and it also includes a GNUmakefile so
that the headers can be installed under a normal FHS directory
structure.  And AviSynth+ did integrate the compatible changes from
2.6's RCs and Final releases, including the restoration of the GPL
exception to the C++ header - so the licensing for AviSynth+'s headers
is the same as classic AviSynth 2.6.

But there's another problem now: AviSynth+'s source was made compliant 
with MinGW-w64 and GCC in August/September 2016, but the changes to the

capi.h header made 32-bit MSVC builds of the library incompatible
with a program built with GCC, like most times when building FFmpeg.
I never attempted to see if the newer capi.h allowed MSVC-built
AviSynth.dll to work with MSVC-built FFmpeg, but that's not a workable
solution because the AviSynth demuxer doesn't link AviSynth.dll to
FFmpeg, it dynamically loads the library through the dlopen compat
(dynamic loading is also the AviSynth-approved way of using the
library), and it still screws over those of us that cross-compile
FFmpeg for win32 with MinGW-w64 and GCC.

Since capi.h in upstream AviSynth+ causes that mess, using the
GNUmakefile in AviSynth+'s source tree will get you technically
wrong headers unless you do a git checkout first.  It would mean
FFmpeg would fall into the same trap Libav did, since they've always
required AviSynth's headers to be installed to the system rather
than provided in c

Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-10-30 Thread Marton Balint



On Mon, 30 Oct 2017, Mironov, Mikhail wrote:

[...]


I still think this would be much better off using the
send_frame()/receive_packet() API.  Even if your API doesn't expose
any information about the queue length, you only need to hold a
single input frame transiently to get around that (the user is not
allowed to call
send_frame() twice in a row without calling receive_packet()).



So to implement this I would have to:
- in the send_frame() if AMF_INPUT_FULL is returned - store input
frame (or copy?)
- In the next receive_frame() check if frame is stored
- Wait till some output is produced
- resubmit stored frame


Sounds about right.


Issues I see:
- Isn't this logic defeat the purpose of independent send()/receive()?
- How can I report a error if receive() produced a compressed frame but

the delayed submission failed?

Since this is asynchronous anyway, just report it at the next available
opportunity.


- This logic depends on the particular logic in the calling code.


The API requires this behaviour of the caller.  See the documentation in
avcodec.h.


- This logic depends on the particular HW behaviour.


How so?


- In the future, we would like to output individual slices of a compressed

frame.

When this added receive_frame() must be called several times to clear

space in the HW queue.

Granted, current implementation also does not cover this case but
truly independent send/receive implementation would.


Note that the user is required to call receive_packet() repeatedly until it
returns EAGAIN, and only then are they allowed to call send_frame() again.


The implementation will be cumbersome at least. Note that calling Drain()
may also return AMF_INPUT_FULL and therefore will have to be remembered and
called again in receive(). But I will implement as you suggests. It is not a 
huge change.



I see some confusion. The user can call send_frame/receive_packet in 
any order, and you can implement send_frame and receive_packet any way you 
want, the only thing you have to guarantee is that you cannot return 
EAGAIN for both send_frame and receive_packet. Not even temporarily.


If you returned EAGAIN in send_frame, you must return success or a 
normal error in receive_packet. If you returned EAGAIN in 
receive_packet, you must return success or a normal error in 
send_frame.


By returning EAGAIN in receive_packet you make sure that the API user 
submits as many frames as needed to fill your pipeline.


The simplest solution really seems to me what Mark proposed:

send_frame:

if (have_stored_frame)
  return EAGAIN;
if (amd_send_frame() == INPUT_FULL)
  store_frame;
return 0;

receive_packet:

if (have_stored_frame) {
  if (amd_send_frame() == OK)
 unstore_frame;
  block_until_have_packet
  return packet
} else {
  return EAGAIN
}

I hope I did not mess it up, proper draining and error handling obviously 
needs some minor changes.


Regards,
Marton
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix quadratic memory use in ff_h2645_extract_rbsp() when multiple NALUs exist in packet.

2017-10-30 Thread Kieran Kunhya
On Tue, 24 Oct 2017 at 00:56 Carl Eugen Hoyos  wrote:

> 2017-10-24 1:43 GMT+02:00 Niki Bowe :
> > On Thu, Oct 19, 2017 at 3:39 PM, Carl Eugen Hoyos 
> > wrote:
>
> >> Does the patch have a measurable speed impact?
> >>
> > Is there a standard set of benchmarks I can run?
> >
> > For typical videos the speed impact is small, due to NALU fitting in
> cache,
> > but for videos with many large NALUs there can be some slowdown.
>
> (5% overall slowdown would make every patch unacceptable
> but I doubt this is the case.)
>
> Use the TIMER makros from libavutil/timer.h, put them around all
> calls to ff_h2645_extract_rbsp().
>

Related to https://trac.ffmpeg.org/ticket/6789, we see huge memory
allocations in this code so if this patch fixes, it should be committed
irrespective of any speed loss.

Kieran
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix quadratic memory use in ff_h2645_extract_rbsp() when multiple NALUs exist in packet.

2017-10-30 Thread Kieran Kunhya
On Tue, 31 Oct 2017 at 01:42 Kieran Kunhya  wrote:

> On Tue, 24 Oct 2017 at 00:56 Carl Eugen Hoyos  wrote:
>
>> 2017-10-24 1:43 GMT+02:00 Niki Bowe :
>> > On Thu, Oct 19, 2017 at 3:39 PM, Carl Eugen Hoyos 
>> > wrote:
>>
>> >> Does the patch have a measurable speed impact?
>> >>
>> > Is there a standard set of benchmarks I can run?
>> >
>> > For typical videos the speed impact is small, due to NALU fitting in
>> cache,
>> > but for videos with many large NALUs there can be some slowdown.
>>
>> (5% overall slowdown would make every patch unacceptable
>> but I doubt this is the case.)
>>
>> Use the TIMER makros from libavutil/timer.h, put them around all
>> calls to ff_h2645_extract_rbsp().
>>
>
> Related to https://trac.ffmpeg.org/ticket/6789, we see huge memory
> allocations in this code so if this patch fixes, it should be committed
> irrespective of any speed loss.
>
> Kieran
>

I confirm this patch fixes ticket 6789.

Kieran
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix quadratic memory use in ff_h2645_extract_rbsp() when multiple NALUs exist in packet.

2017-10-30 Thread Michael Niedermayer
Hi

On Mon, Oct 23, 2017 at 04:43:55PM -0700, Niki Bowe wrote:
> On Thu, Oct 19, 2017 at 3:39 PM, Carl Eugen Hoyos 
> wrote:
> 
> > 2017-10-19 20:46 GMT+02:00 Nikolas Bowe :
> > > Found via fuzzing.
> > > /tmp/poc is a 1 MB mpegts file generated via fuzzing, where 1 packet has
> > many NALUs
> > > Before this change:
> > >   $ /usr/bin/time -f "\t%M Max Resident Set Size (Kb)"  ./ffprobe
> > /tmp/poc 2>&1 | tail -n 1
> > > 2158192 Max Resident Set Size (Kb)
> > > After this change:
> > >   $ /usr/bin/time -f "\t%M Max Resident Set Size (Kb)"  ./ffprobe
> > /tmp/poc 2>&1 | tail -n 1
> > > 1046812 Max Resident Set Size (Kb)
> >
> > This does not look like a fix for a "quadratic" memory consumption or
> > do I misunderstand?
> >
> 
> Before this patch, for each NALU in the packet, rbsp_buffer would be sized
> from the start of the NALU to the end of the packet, not the end of the
> NALU.
> This would occur for each NALU in the packet. Total memory allocated in all
> the rbsp_buffers for all the NALUs in the packet would be N + (N+x1) +
> (N+x2) + ...
> This is quadratic in the number of NALUs in the packet.

A better solution would be to allocate a buffer sized based on the
whole packet and then have all teh NALs point into that.
That way theres no need to know the size of the first NAL during
allocation.
This should have even lower memory overhead than your code
fewer alloc/free calls, and no speed loss

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I am the wisest man alive, for I know one thing, and that is that I know
nothing. -- Socrates


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH]lavc/dnxhddata: Do not print frame rates with supported profiles

2017-10-30 Thread Carl Eugen Hoyos
Hi!

Attached patch is meant to fix ticket #4815: Nobody can maintain the
list of frame-rates, the current output primarily leads to confusion
instead of helping users.

Please comment, Carl Eugen
From ab2371ace24d4e2ed99b4f5beb2676433cf15e88 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos 
Date: Tue, 31 Oct 2017 02:56:31 +0100
Subject: [PATCH] lavc/dnxhddata: Do not print frame rates with supported
 profiles.

The list is unmaintained, misleading and the frame rates are
ignored by all compatible players.
---
 libavcodec/dnxhddata.c |   50 ++--
 libavcodec/dnxhddata.h |1 -
 2 files changed, 19 insertions(+), 32 deletions(-)

diff --git a/libavcodec/dnxhddata.c b/libavcodec/dnxhddata.c
index 4462df3..e995449 100644
--- a/libavcodec/dnxhddata.c
+++ b/libavcodec/dnxhddata.c
@@ -939,96 +939,84 @@ const CIDEntry ff_dnxhd_cid_table[] = {
   dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
   dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
   dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
-  { 175, 185, 365, 440 },
-  { { 24000, 1001 }, { 25, 1 }, { 50, 1 }, { 6, 1001 } } },
+  { 175, 185, 365, 440 } },
 { 1237, 1920, 1080, 606208, 606208,
   0, 4, 8, 3,
   dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
   dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
   dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
   dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-  { 115, 120, 145, 240, 290 },
-  { { 24000, 1001 }, { 25, 1 }, { 3, 1001 }, { 50, 1 }, { 6, 1001 } } },
+  { 115, 120, 145, 240, 290 } },
 { 1238, 1920, 1080, 917504, 917504,
   0, 4, 8, 4,
   dnxhd_1238_luma_weight, dnxhd_1238_chroma_weight,
   dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
   dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_info,
   dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1238_run,
-  { 175, 185, 220, 365, 440 },
-  { { 24000, 1001 }, { 25, 1 }, { 3, 1001 }, { 50, 1 }, { 6, 1001 } } },
+  { 175, 185, 220, 365, 440 } },
 { 1241, 1920, 1080, 917504, 458752,
   DNXHD_INTERLACED, 6, 10, 4,
   dnxhd_1241_luma_weight, dnxhd_1241_chroma_weight,
   dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
   dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
   dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
-  { 185, 220 },
-  { { 25, 1 }, { 3, 1001 } } },
+  { 185, 220 } },
 { 1242, 1920, 1080, 606208, 303104,
   DNXHD_INTERLACED, 4, 8, 3,
   dnxhd_1242_luma_weight, dnxhd_1242_chroma_weight,
   dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
   dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
   dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-  { 120, 145 },
-  { { 25, 1 }, { 3, 1001 } } },
+  { 120, 145 } },
 { 1243, 1920, 1080, 917504, 458752,
   DNXHD_INTERLACED, 4, 8, 4,
   dnxhd_1243_luma_weight, dnxhd_1243_chroma_weight,
   dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
   dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_info,
   dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1238_run,
-  { 185, 220 },
-  { { 25, 1 }, { 3, 1001 } } },
+  { 185, 220 } },
 { 1244, 1440, 1080, 606208, 303104,
   DNXHD_INTERLACED, 4, 8, 3,
   dnxhd_1260_luma_weight, dnxhd_1260_chroma_weight,
   dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
   dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
   dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-  { 120, 145 },
-  { { 25, 1 }, { 3, 1001 } } },
+  { 120, 145 } },
 { 1250, 1280,  720, 458752, 458752,
   0, 6, 10, 4,
   dnxhd_1250_luma_weight, dnxhd_1250_chroma_weight,
   dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
   dnxhd_1250_ac_codes, dnxhd_1250_ac_bits, dnxhd_1250_ac_info,
   dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
-  { 90, 90, 180, 220 },
-  { { 24000, 1001 }, { 25, 1 }, { 50, 1 }, { 6, 1001 } } },
+  { 90, 180, 220 } },
 { 1251, 1280,  720, 458752, 458752,
   0, 4, 8, 4,
   dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight,
   dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
   dnxhd_1251_ac_codes, dnxhd_1251_ac_bits, dnxhd_1251_ac_info,
   dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
-  { 90, 90, 110, 180, 220 },
-  { { 24000, 1001 }, { 25, 1 }, { 3, 1001 }, { 50, 1 }, { 6, 1001 } } },
+  { 90, 110, 180, 220 } },
 { 1252, 1280,  720, 303104, 303104,
   0, 4, 8, 5,
   dnxhd_1252_luma_weight, dnxhd_1252_chroma_weight,
   dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
   dnxhd_1252_ac_codes, dnxhd_1252_ac_bits, dnxhd_1252_ac_info,
   dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
-  { 60, 60, 75, 120, 145 },
-  { { 24000, 1001 }, { 25, 1 }, { 3, 1001 }, { 50, 1 }, { 60

Re: [FFmpeg-devel] [PATCH] Fix quadratic memory use in ff_h2645_extract_rbsp() when multiple NALUs exist in packet.

2017-10-30 Thread Michael Niedermayer
On Thu, Oct 19, 2017 at 11:46:47AM -0700, Nikolas Bowe wrote:
> Found via fuzzing.
> /tmp/poc is a 1 MB mpegts file generated via fuzzing, where 1 packet has many 
> NALUs
> Before this change:
>   $ /usr/bin/time -f "\t%M Max Resident Set Size (Kb)"  ./ffprobe /tmp/poc 
> 2>&1 | tail -n 1
>   2158192 Max Resident Set Size (Kb)
> After this change:
>   $ /usr/bin/time -f "\t%M Max Resident Set Size (Kb)"  ./ffprobe /tmp/poc 
> 2>&1 | tail -n 1
>   1046812 Max Resident Set Size (Kb)
> ---
>  libavcodec/h2645_parse.c | 13 +++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c
> index b0d9ff66f0..e77689f347 100644
> --- a/libavcodec/h2645_parse.c
> +++ b/libavcodec/h2645_parse.c
> @@ -32,7 +32,7 @@
>  int ff_h2645_extract_rbsp(const uint8_t *src, int length,
>H2645NAL *nal, int small_padding)
>  {
> -int i, si, di;
> +int i, si, di, nsc;
>  uint8_t *dst;
>  int64_t padding = small_padding ? 0 : MAX_MBPAIR_SIZE;
>  
> @@ -91,8 +91,17 @@ int ff_h2645_extract_rbsp(const uint8_t *src, int length,
>  } else if (i > length)
>  i = length;
>  
> +// Find next NAL start code, if present, to reduce rbsp_buffer size when
> +// multiple NALUs.
> +for (nsc = i; nsc + 2 < length; nsc++) {
> +if (src[nsc] == 0 && src[nsc + 1] == 0 && src[nsc + 2] == 1)
> +  break;
> +}
> +if (nsc + 2 == length)
> +nsc = length;
> +
>  av_fast_padded_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
> -  length + padding);
> +  nsc + padding);
>  if (!nal->rbsp_buffer)
>  return AVERROR(ENOMEM);

This reduces memory consumption to linear from qudratic but i think
it still can be made to allocate very large amounts of memory.
That is with many small NAL units MAX_MBPAIR_SIZE would be allocated
for each.in worst case.
So this does fix the qudratic issue but not the OOM issue.
Using the same buffer for all would fix it unless iam missing something.
Using the same buffer avoids the padding needs for all but the last.
So its alot less memory for many small nal units

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

When the tyrant has disposed of foreign enemies by conquest or treaty, and
there is nothing more to fear from them, then he is always stirring up
some war or other, in order that the people may require a leader. -- Plato


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix quadratic memory use in ff_h2645_extract_rbsp() when multiple NALUs exist in packet.

2017-10-30 Thread Michael Niedermayer
On Thu, Oct 19, 2017 at 11:46:47AM -0700, Nikolas Bowe wrote:
> Found via fuzzing.
> /tmp/poc is a 1 MB mpegts file generated via fuzzing, where 1 packet has many 
> NALUs
> Before this change:
>   $ /usr/bin/time -f "\t%M Max Resident Set Size (Kb)"  ./ffprobe /tmp/poc 
> 2>&1 | tail -n 1
>   2158192 Max Resident Set Size (Kb)
> After this change:
>   $ /usr/bin/time -f "\t%M Max Resident Set Size (Kb)"  ./ffprobe /tmp/poc 
> 2>&1 | tail -n 1
>   1046812 Max Resident Set Size (Kb)
> ---
>  libavcodec/h2645_parse.c | 13 +++--
>  1 file changed, 11 insertions(+), 2 deletions(-)

This patch also fixes 2145/clusterfuzz-testcase-minimized-5866217724182528
that should be added to the commit message

(though as said, this fix is not ideal or complete, I would very much
 prefer if this would be fixed by using a single buffer or any other
 solution that avoids the speedloss.)

Also please tell me in case you choose not to work on this further.

thx

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Awnsering whenever a program halts or runs forever is
On a turing machine, in general impossible (turings halting problem).
On any real computer, always possible as a real computer has a finite number
of states N, and will either halt in less than N cycles or never halt.


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/2] lavc/vaapi_encode: correct the HRD buffer size calculate.

2017-10-30 Thread Jun Zhao

From 7eef9be1c8a92bf625d62a0f97f762f1342c6d78 Mon Sep 17 00:00:00 2001
From: Jun Zhao 
Date: Tue, 31 Oct 2017 10:13:42 +0800
Subject: [PATCH 1/2] lavc/vaapi_encode: correct the HRD buffer size calculate.

when rc_buffer_size didn't setting, always use the max bit rate
per second as HRD buffer size.

Signed-off-by: Jun Zhao 
Signed-off-by: Wang, Yi A 
---
 libavcodec/vaapi_encode.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 590f4be4ed..d5f89ef346 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -1144,19 +1144,9 @@ static av_cold int 
vaapi_encode_init_rate_control(AVCodecContext *avctx)
 return AVERROR(EINVAL);
 }
 
-if (avctx->rc_buffer_size)
-hrd_buffer_size = avctx->rc_buffer_size;
-else
-hrd_buffer_size = avctx->bit_rate;
-if (avctx->rc_initial_buffer_occupancy)
-hrd_initial_buffer_fullness = avctx->rc_initial_buffer_occupancy;
-else
-hrd_initial_buffer_fullness = hrd_buffer_size * 3 / 4;
-
 if (ctx->va_rc_mode == VA_RC_CBR) {
 rc_bits_per_second   = avctx->bit_rate;
 rc_target_percentage = 100;
-rc_window_size   = 1000;
 } else {
 if (avctx->rc_max_rate < avctx->bit_rate) {
 // Max rate is unset or invalid, just use the normal bitrate.
@@ -1166,8 +1156,17 @@ static av_cold int 
vaapi_encode_init_rate_control(AVCodecContext *avctx)
 rc_bits_per_second   = avctx->rc_max_rate;
 rc_target_percentage = (avctx->bit_rate * 100) / 
rc_bits_per_second;
 }
-rc_window_size = (hrd_buffer_size * 1000) / avctx->bit_rate;
 }
+rc_window_size = (rc_bits_per_second * 1000) / avctx->bit_rate;
+
+if (avctx->rc_buffer_size)
+hrd_buffer_size = avctx->rc_buffer_size;
+else
+hrd_buffer_size = rc_bits_per_second;
+if (avctx->rc_initial_buffer_occupancy)
+hrd_initial_buffer_fullness = avctx->rc_initial_buffer_occupancy;
+else
+hrd_initial_buffer_fullness = hrd_buffer_size * 3 / 4;
 
 ctx->rc_params.misc.type = VAEncMiscParameterTypeRateControl;
 ctx->rc_params.rc = (VAEncMiscParameterRateControl) {
-- 
2.14.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/2] lavc/vaapi_encode_h264: correct bit_rate_scale setting.

2017-10-30 Thread Jun Zhao

From d1e105057e93e7c2788d6d684292db9008fbf3ac Mon Sep 17 00:00:00 2001
From: Jun Zhao 
Date: Tue, 31 Oct 2017 10:19:08 +0800
Subject: [PATCH 2/2] lavc/vaapi_encode_h264: correct bit_rate_scale setting.

As H264 Spec 2012 E.2.2, bit_rate_scale means the max input bit rate.

Signed-off-by: Jun Zhao 
Signed-off-by: Wang, Yi A 
---
 libavcodec/vaapi_encode_h264.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
index 1d43e934ef..27a810c64e 100644
--- a/libavcodec/vaapi_encode_h264.c
+++ b/libavcodec/vaapi_encode_h264.c
@@ -406,7 +406,7 @@ static int 
vaapi_encode_h264_init_sequence_params(AVCodecContext *avctx)
 // Try to scale these to a sensible range so that the
 // golomb encode of the value is not overlong.
 hrd->bit_rate_scale =
-av_clip_uintp2(av_log2(avctx->bit_rate) - 15 - 6, 4);
+av_clip_uintp2(av_log2(FFMAX(avctx->bit_rate, avctx->rc_max_rate)) 
- 15 - 6, 4);
 hrd->bit_rate_value_minus1[0] =
 (avctx->bit_rate >> hrd->bit_rate_scale + 6) - 1;
 
-- 
2.14.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 6/8] x86inc: reduce difference to x264 upstream

2017-10-30 Thread Michael Niedermayer
On Mon, Oct 30, 2017 at 02:08:33PM +0100, James Darnley wrote:
> These changes were commited to x264 in b568a256 "Experimental nasm
> support"
> ---
>  libavutil/x86/x86inc.asm | 16 ++--
>  1 file changed, 14 insertions(+), 2 deletions(-)

breaks build:

libavcodec/x86/rv34dsp.asm:211: error: parser: instruction expected
libavcodec/x86/rv34dsp.asm:211: error: label or instruction expected at start 
of line
libavcodec/x86/rv34dsp.asm:213: error: parser: instruction expected
libavcodec/x86/rv34dsp.asm:213: error: label or instruction expected at start 
of line
make: *** [libavcodec/x86/rv34dsp.o] Error 1
make: *** Waiting for unfinished jobs

NASM version 2.10.09 compiled on Dec 29 2013


[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Let us carefully observe those good qualities wherein our enemies excel us
and endeavor to excel them, by avoiding what is faulty, and imitating what
is excellent in them. -- Plutarch


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec/dca: return standard error codes in avpriv_dca_parse_core_frame_header()

2017-10-30 Thread James Almer
This prevents making the DCAParseError enum part of the ABI.

Signed-off-by: James Almer 
---
 libavcodec/dca.c | 11 ---
 libavcodec/dca.h | 12 ++--
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 942fe6c3c9..799c414259 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -149,9 +149,14 @@ int ff_dca_parse_core_frame_header(DCACoreFrameHeader *h, 
GetBitContext *gb)
 int avpriv_dca_parse_core_frame_header(DCACoreFrameHeader *h, const uint8_t 
*buf, int size)
 {
 GetBitContext gb;
+int ret;
 
-if (init_get_bits8(&gb, buf, size) < 0)
-return DCA_PARSE_ERROR_INVALIDDATA;
+ret = init_get_bits8(&gb, buf, size);
+if (ret < 0)
+return ret;
 
-return ff_dca_parse_core_frame_header(h, &gb);
+if (ff_dca_parse_core_frame_header(h, &gb) < 0);
+return AVERROR_INVALIDDATA;
+
+return 0;
 }
diff --git a/libavcodec/dca.h b/libavcodec/dca.h
index c70598af92..b05e5f896e 100644
--- a/libavcodec/dca.h
+++ b/libavcodec/dca.h
@@ -46,7 +46,6 @@ enum DCAParseError {
 DCA_PARSE_ERROR_RESERVED_BIT= -7,
 DCA_PARSE_ERROR_LFE_FLAG= -8,
 DCA_PARSE_ERROR_PCM_RES = -9,
-DCA_PARSE_ERROR_INVALIDDATA = -10,
 };
 
 typedef struct DCACoreFrameHeader {
@@ -211,10 +210,19 @@ int avpriv_dca_convert_bitstream(const uint8_t *src, int 
src_size, uint8_t *dst,
 
 /**
  * Parse and validate core frame header
- * @return 0 on success, negative DCA_PARSE_ERROR_ code on failure
+ * @param[out] hPointer to struct where header info is written.
+ * @param[in]  buf  Pointer to the data buffer
+ * @param[in]  size Size of the data buffer
+ * @return 0 on success, negative AVERROR code on failure
  */
 int avpriv_dca_parse_core_frame_header(DCACoreFrameHeader *h, const uint8_t 
*buf, int size);
 
+/**
+ * Parse and validate core frame header
+ * @param[out] h   Pointer to struct where header info is written.
+ * @param[in]  gbc BitContext containing the first 54 bits of the frame.
+ * @return 0 on success, negative DCA_PARSE_ERROR_ code on failure
+ */
 int ff_dca_parse_core_frame_header(DCACoreFrameHeader *h, GetBitContext *gb);
 
 #endif /* AVCODEC_DCA_H */
-- 
2.14.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avformat/movenc: let avpriv_ac3_parse_header() allocate the AC3HeaderInfo struct

2017-10-30 Thread James Almer
This removes sizeof(AC3HeaderInfo) from the ABI.

Signed-off-by: James Almer 
---
 libavformat/movenc.c | 72 +++-
 1 file changed, 43 insertions(+), 29 deletions(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index a920eb7c8f..cc3fc19d9b 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -345,9 +345,9 @@ struct eac3_info {
 #if CONFIG_AC3_PARSER
 static int handle_eac3(MOVMuxContext *mov, AVPacket *pkt, MOVTrack *track)
 {
-AC3HeaderInfo tmp, *hdr = &tmp;
+AC3HeaderInfo *hdr = NULL;
 struct eac3_info *info;
-int num_blocks;
+int num_blocks, ret;
 
 if (!track->eac3_priv && !(track->eac3_priv = av_mallocz(sizeof(*info
 return AVERROR(ENOMEM);
@@ -357,9 +357,10 @@ static int handle_eac3(MOVMuxContext *mov, AVPacket *pkt, 
MOVTrack *track)
 /* drop the packets until we see a good one */
 if (!track->entry) {
 av_log(mov, AV_LOG_WARNING, "Dropping invalid packet from start of 
the stream\n");
-return 0;
-}
-return AVERROR_INVALIDDATA;
+ret = 0;
+} else
+ret = AVERROR_INVALIDDATA;
+goto end;
 }
 
 info->data_rate = FFMAX(info->data_rate, hdr->bit_rate / 1000);
@@ -367,20 +368,25 @@ static int handle_eac3(MOVMuxContext *mov, AVPacket *pkt, 
MOVTrack *track)
 
 if (!info->ec3_done) {
 /* AC-3 substream must be the first one */
-if (hdr->bitstream_id <= 10 && hdr->substreamid != 0)
-return AVERROR(EINVAL);
+if (hdr->bitstream_id <= 10 && hdr->substreamid != 0) {
+ret = AVERROR(EINVAL);
+goto end;
+}
 
 /* this should always be the case, given that our AC-3 parser
  * concatenates dependent frames to their independent parent */
 if (hdr->frame_type == EAC3_FRAME_TYPE_INDEPENDENT) {
 /* substream ids must be incremental */
-if (hdr->substreamid > info->num_ind_sub + 1)
-return AVERROR(EINVAL);
+if (hdr->substreamid > info->num_ind_sub + 1) {
+ret = AVERROR(EINVAL);
+goto end;
+}
 
 if (hdr->substreamid == info->num_ind_sub + 1) {
 //info->num_ind_sub++;
 avpriv_request_sample(track->par, "Multiple independent 
substreams");
-return AVERROR_PATCHWELCOME;
+ret = AVERROR_PATCHWELCOME;
+goto end;
 } else if (hdr->substreamid < info->num_ind_sub ||
hdr->substreamid == 0 && info->substream[0].bsid) {
 info->ec3_done = 1;
@@ -402,12 +408,14 @@ static int handle_eac3(MOVMuxContext *mov, AVPacket *pkt, 
MOVTrack *track)
 
 while (cumul_size != pkt->size) {
 GetBitContext gbc;
-int i, ret;
+int i;
 ret = avpriv_ac3_parse_header(&hdr, pkt->data + cumul_size, 
pkt->size - cumul_size);
 if (ret < 0)
-return AVERROR_INVALIDDATA;
-if (hdr->frame_type != EAC3_FRAME_TYPE_DEPENDENT)
-return AVERROR(EINVAL);
+goto end;
+if (hdr->frame_type != EAC3_FRAME_TYPE_DEPENDENT) {
+ret = AVERROR(EINVAL);
+goto end;
+}
 info->substream[parent].num_dep_sub++;
 ret /= 8;
 
@@ -433,37 +441,43 @@ static int handle_eac3(MOVMuxContext *mov, AVPacket *pkt, 
MOVTrack *track)
 }
 
 concatenate:
-if (!info->num_blocks && num_blocks == 6)
-return pkt->size;
-else if (info->num_blocks + num_blocks > 6)
-return AVERROR_INVALIDDATA;
+if (!info->num_blocks && num_blocks == 6) {
+ret = pkt->size;
+goto end;
+}
+else if (info->num_blocks + num_blocks > 6) {
+ret = AVERROR_INVALIDDATA;
+goto end;
+}
 
 if (!info->num_blocks) {
-int ret = av_packet_ref(&info->pkt, pkt);
-if (ret < 0)
-return ret;
-info->num_blocks = num_blocks;
-return 0;
+ret = av_packet_ref(&info->pkt, pkt);
+if (!ret)
+info->num_blocks = num_blocks;
+goto end;
 } else {
-int ret;
 if ((ret = av_grow_packet(&info->pkt, pkt->size)) < 0)
-return ret;
+goto end;
 memcpy(info->pkt.data + info->pkt.size - pkt->size, pkt->data, 
pkt->size);
 info->num_blocks += num_blocks;
 info->pkt.duration += pkt->duration;
 if ((ret = av_copy_packet_side_data(&info->pkt, pkt)) < 0)
-return ret;
+goto end;
 if (info->num_blocks != 6)
-return 0;
+goto end;
 av_packet_unref(pkt);
 ret = av_packet_ref(pkt, &info->pkt);
 if (ret < 0)
-return ret;
+goto end;
 av_packet_un