Also disable the mmx/iwht optimization when the bitexact flag is set. With synthetically coded coefficients (i.e. these that lead to a residual well outside the [-255,255] range), our optimizations will overflow. It doesn't make sense to fix the overflows, since they can only occur on synthetic input, not on real fwht-generated input. Thus, add a bitexact flag that disables this optimization. --- libavcodec/vp9.c | 2 +- libavcodec/vp9dsp.c | 4 ++-- libavcodec/vp9dsp.h | 4 ++-- libavcodec/x86/vp9dsp_init.c | 12 +++++++----- libavcodec/x86/vp9itxfm.asm | 4 ++++ 5 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index 98fd8a3..fcdd6e1 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -360,7 +360,7 @@ static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt av_freep(&s->block_base); if (s->bpp != s->last_bpp) { - ff_vp9dsp_init(&s->dsp, s->bpp); + ff_vp9dsp_init(&s->dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT); ff_videodsp_init(&s->vdsp, s->bpp); s->last_bpp = s->bpp; } diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c index 018cb61..54e77e2 100644 --- a/libavcodec/vp9dsp.c +++ b/libavcodec/vp9dsp.c @@ -25,7 +25,7 @@ #include "libavutil/common.h" #include "vp9dsp.h" -av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp) +av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact) { if (bpp == 8) { ff_vp9dsp_init_8(dsp); @@ -36,6 +36,6 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp) ff_vp9dsp_init_12(dsp); } - if (ARCH_X86) ff_vp9dsp_init_x86(dsp, bpp); + if (ARCH_X86) ff_vp9dsp_init_x86(dsp, bpp, bitexact); if (ARCH_MIPS) ff_vp9dsp_init_mips(dsp, bpp); } diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h index 28903b3..016a9bb 100644 --- a/libavcodec/vp9dsp.h +++ b/libavcodec/vp9dsp.h @@ -120,13 +120,13 @@ typedef struct VP9DSPContext { vp9_scaled_mc_func smc[5][4][2]; } VP9DSPContext; -void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp); +void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact); void ff_vp9dsp_init_8(VP9DSPContext *dsp); void ff_vp9dsp_init_10(VP9DSPContext *dsp); void ff_vp9dsp_init_12(VP9DSPContext *dsp); -void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp); +void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact); void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp); #endif /* AVCODEC_VP9DSP_H */ diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 00e7125..f24cb67 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -307,7 +307,7 @@ ipred_func(32, tm, avx2); #endif /* HAVE_YASM */ -av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp) +av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact) { #if HAVE_YASM int cpu_flags; @@ -388,10 +388,12 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp) if (EXTERNAL_MMX(cpu_flags)) { init_fpel(4, 0, 4, put, mmx); init_fpel(3, 0, 8, put, mmx); - dsp->itxfm_add[4 /* lossless */][DCT_DCT] = - dsp->itxfm_add[4 /* lossless */][ADST_DCT] = - dsp->itxfm_add[4 /* lossless */][DCT_ADST] = - dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx; + if (!bitexact) { + dsp->itxfm_add[4 /* lossless */][DCT_DCT] = + dsp->itxfm_add[4 /* lossless */][ADST_DCT] = + dsp->itxfm_add[4 /* lossless */][DCT_ADST] = + dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx; + } init_ipred(8, mmx, v, VERT); } diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm index 02d4d0a..54462a6 100644 --- a/libavcodec/x86/vp9itxfm.asm +++ b/libavcodec/x86/vp9itxfm.asm @@ -1127,10 +1127,14 @@ IADST8_FN iadst, IADST8, iadst, IADST8, avx, 16 pmulhrsw m7, m4, [pw_16069x2] ; t6-7 pmulhrsw m4, [pw_3196x2] ; t4-5 +%if 0 ; overflows :( paddw m6, m7, m4 psubw m5, m7, m4 pmulhrsw m5, [pw_11585x2] ; t5 pmulhrsw m6, [pw_11585x2] ; t6 +%else + VP9_UNPACK_MULSUB_2W_4X 5, 6, 7, 4, 11585, 11585, [pd_8192], 0, 1 ; t5, t6 +%endif psubw m0, m3, m7 paddw m7, m3 -- 2.1.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel