ffmpeg | branch: master | James Almer <jamr...@gmail.com> | Tue Jan 31 14:49:29 2017 -0300| [ca8a3978e57c7c8f6abab8547f47483e407469b7] | committer: James Almer
Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5' * commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5': x86: hpeldsp: Split off VP3-specific bits into a separate file Merged-by: James Almer <jamr...@gmail.com> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ca8a3978e57c7c8f6abab8547f47483e407469b7 --- libavcodec/x86/Makefile | 2 + libavcodec/x86/hpeldsp.asm | 89 ------------------------------ libavcodec/x86/hpeldsp.h | 4 ++ libavcodec/x86/hpeldsp_init.c | 25 ++------- libavcodec/x86/hpeldsp_vp3.asm | 111 ++++++++++++++++++++++++++++++++++++++ libavcodec/x86/hpeldsp_vp3_init.c | 56 +++++++++++++++++++ 6 files changed, 176 insertions(+), 111 deletions(-) diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 2f0354a..2864952 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -67,6 +67,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o +OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o \ x86/vp9dsp_init_10bpp.o \ @@ -169,6 +170,7 @@ YASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o +YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \ x86/vp9intrapred_16bpp.o \ diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index 82fb893..ce5d7a4 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -175,53 +175,6 @@ INIT_MMX 3dnow PUT_NO_RND_PIXELS8_X2 -; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_X2_EXACT 0 -cglobal put_no_rnd_pixels8_x2_exact, 4,5 - lea r4, [r2*3] - pcmpeqb m6, m6 -.loop: - mova m0, [r1] - mova m2, [r1+r2] - mova m1, [r1+1] - mova m3, [r1+r2+1] - pxor m0, m6 - pxor m2, m6 - pxor m1, m6 - pxor m3, m6 - PAVGB m0, m1 - PAVGB m2, m3 - pxor m0, m6 - pxor m2, m6 - mova [r0], m0 - mova [r0+r2], m2 - mova m0, [r1+r2*2] - mova m1, [r1+r2*2+1] - mova m2, [r1+r4] - mova m3, [r1+r4+1] - pxor m0, m6 - pxor m1, m6 - pxor m2, m6 - pxor m3, m6 - PAVGB m0, m1 - PAVGB m2, m3 - pxor m0, m6 - pxor m2, m6 - mova [r0+r2*2], m0 - mova [r0+r4], m2 - lea r1, [r1+r2*4] - lea r0, [r0+r2*4] - sub r3d, 4 - jg .loop - REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_X2_EXACT -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_X2_EXACT - - ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_PIXELS8_Y2 0 %if cpuflag(sse2) @@ -300,48 +253,6 @@ INIT_MMX 3dnow PUT_NO_RND_PIXELS8_Y2 -; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 -cglobal put_no_rnd_pixels8_y2_exact, 4,5 - lea r4, [r2*3] - mova m0, [r1] - pcmpeqb m6, m6 - add r1, r2 - pxor m0, m6 -.loop: - mova m1, [r1] - mova m2, [r1+r2] - pxor m1, m6 - pxor m2, m6 - PAVGB m0, m1 - PAVGB m1, m2 - pxor m0, m6 - pxor m1, m6 - mova [r0], m0 - mova [r0+r2], m1 - mova m1, [r1+r2*2] - mova m0, [r1+r4] - pxor m1, m6 - pxor m0, m6 - PAVGB m2, m1 - PAVGB m1, m0 - pxor m2, m6 - pxor m1, m6 - mova [r0+r2*2], m2 - mova [r0+r4], m1 - lea r1, [r1+r2*4] - lea r0, [r0+r2*4] - sub r3d, 4 - jg .loop - REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_Y2_EXACT -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_Y2_EXACT - - ; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro AVG_PIXELS8 0 cglobal avg_pixels8, 4,5 diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h index 5fae990..bf97029 100644 --- a/libavcodec/x86/hpeldsp.h +++ b/libavcodec/x86/hpeldsp.h @@ -22,6 +22,8 @@ #include <stddef.h> #include <stdint.h> +#include "libavcodec/hpeldsp.h" + void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); @@ -50,4 +52,6 @@ void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags); + #endif /* AVCODEC_X86_HPELDSP_H */ diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index f1ba4be..e8da184 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -51,12 +51,6 @@ void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, @@ -65,12 +59,6 @@ void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, @@ -242,11 +230,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags) c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext; c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext; } - - if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) { - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; - } #endif /* HAVE_MMXEXT_EXTERNAL */ } @@ -278,11 +261,6 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags) c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow; c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow; } - - if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) { - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; - } #endif /* HAVE_AMD3DNOW_EXTERNAL */ } @@ -332,4 +310,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags) if (EXTERNAL_SSSE3(cpu_flags)) hpeldsp_init_ssse3(c, flags, cpu_flags); + + if (CONFIG_VP3_DECODER) + ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags); } diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm new file mode 100644 index 0000000..cba96d0 --- /dev/null +++ b/libavcodec/x86/hpeldsp_vp3.asm @@ -0,0 +1,111 @@ +;****************************************************************************** +;* SIMD-optimized halfpel functions for VP3 +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +%macro PUT_NO_RND_PIXELS8_X2_EXACT 0 +cglobal put_no_rnd_pixels8_x2_exact, 4,5 + lea r4, [r2*3] + pcmpeqb m6, m6 +.loop: + mova m0, [r1] + mova m2, [r1+r2] + mova m1, [r1+1] + mova m3, [r1+r2+1] + pxor m0, m6 + pxor m2, m6 + pxor m1, m6 + pxor m3, m6 + PAVGB m0, m1 + PAVGB m2, m3 + pxor m0, m6 + pxor m2, m6 + mova [r0], m0 + mova [r0+r2], m2 + mova m0, [r1+r2*2] + mova m1, [r1+r2*2+1] + mova m2, [r1+r4] + mova m3, [r1+r4+1] + pxor m0, m6 + pxor m1, m6 + pxor m2, m6 + pxor m3, m6 + PAVGB m0, m1 + PAVGB m2, m3 + pxor m0, m6 + pxor m2, m6 + mova [r0+r2*2], m0 + mova [r0+r4], m2 + lea r1, [r1+r2*4] + lea r0, [r0+r2*4] + sub r3d, 4 + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PUT_NO_RND_PIXELS8_X2_EXACT +INIT_MMX 3dnow +PUT_NO_RND_PIXELS8_X2_EXACT + + +; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 +cglobal put_no_rnd_pixels8_y2_exact, 4,5 + lea r4, [r2*3] + mova m0, [r1] + pcmpeqb m6, m6 + add r1, r2 + pxor m0, m6 +.loop: + mova m1, [r1] + mova m2, [r1+r2] + pxor m1, m6 + pxor m2, m6 + PAVGB m0, m1 + PAVGB m1, m2 + pxor m0, m6 + pxor m1, m6 + mova [r0], m0 + mova [r0+r2], m1 + mova m1, [r1+r2*2] + mova m0, [r1+r4] + pxor m1, m6 + pxor m0, m6 + PAVGB m2, m1 + PAVGB m1, m0 + pxor m2, m6 + pxor m1, m6 + mova [r0+r2*2], m2 + mova [r0+r4], m1 + lea r1, [r1+r2*4] + lea r0, [r0+r2*4] + sub r3d, 4 + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PUT_NO_RND_PIXELS8_Y2_EXACT +INIT_MMX 3dnow +PUT_NO_RND_PIXELS8_Y2_EXACT diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c new file mode 100644 index 0000000..5979f41 --- /dev/null +++ b/libavcodec/x86/hpeldsp_vp3_init.c @@ -0,0 +1,56 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" + +#include "libavcodec/avcodec.h" +#include "libavcodec/hpeldsp.h" + +#include "hpeldsp.h" + +void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); + +av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags) +{ + if (EXTERNAL_AMD3DNOW(cpu_flags)) { + if (flags & AV_CODEC_FLAG_BITEXACT) { + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; + } + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + if (flags & AV_CODEC_FLAG_BITEXACT) { + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; + } + } +} ====================================================================== diff --cc libavcodec/x86/Makefile index 2f0354a,3208699..2864952 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@@ -57,22 -46,14 +57,23 @@@ OBJS-$(CONFIG_MLP_DECODER) OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct_init.o OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o +OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o -OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o -OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o +OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o +OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o +OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o +OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o +OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o +OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o + OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o -OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o +OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o \ + x86/vp9dsp_init_10bpp.o \ + x86/vp9dsp_init_12bpp.o \ + x86/vp9dsp_init_16bpp.o +OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp_init.o # GCC inline assembly optimizations @@@ -133,49 -111,18 +134,50 @@@ YASM-OBJS-$(CONFIG_VP8DSP) x86/vp8dsp_loopfilter.o # decoders/encoders -YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o -YASM-OBJS-$(CONFIG_APE_DECODER) += x86/apedsp.o -YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o +YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/aacpsdsp.o \ + x86/sbrdsp.o +YASM-OBJS-$(CONFIG_AAC_ENCODER) += x86/aacencdsp.o +YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o +YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o +YASM-OBJS-$(CONFIG_ALAC_DECODER) += x86/alacdsp.o +YASM-OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp.o +YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o x86/synth_filter.o +YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp.o \ + x86/dirac_dwt.o YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o -YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_deblock.o \ - x86/hevc_mc.o \ - x86/hevc_idct.o +YASM-OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp.o +ifdef CONFIG_GPL +YASM-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flac_dsp_gpl.o +endif +YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_mc.o \ + x86/hevc_deblock.o \ + x86/hevc_idct.o \ + x86/hevc_res_add.o \ + x86/hevc_sao.o \ + x86/hevc_sao_10bit.o +YASM-OBJS-$(CONFIG_JPEG2000_DECODER) += x86/jpeg2000dsp.o +YASM-OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o +YASM-OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct.o YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o +YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o +YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o +YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o +YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o +YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o +YASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o +YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o + YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o -YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp.o +YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \ + x86/vp9intrapred_16bpp.o \ + x86/vp9itxfm.o \ + x86/vp9itxfm_16bpp.o \ + x86/vp9lpf.o \ + x86/vp9lpf_16bpp.o \ + x86/vp9mc.o \ + x86/vp9mc_16bpp.o +YASM-OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp.o diff --cc libavcodec/x86/hpeldsp.asm index 82fb893,8e21114..ce5d7a4 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@@ -175,66 -142,15 +175,19 @@@ INIT_MMX 3dno PUT_NO_RND_PIXELS8_X2 - ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) - %macro PUT_NO_RND_PIXELS8_X2_EXACT 0 - cglobal put_no_rnd_pixels8_x2_exact, 4,5 - lea r4, [r2*3] - pcmpeqb m6, m6 - .loop: - mova m0, [r1] - mova m2, [r1+r2] - mova m1, [r1+1] - mova m3, [r1+r2+1] - pxor m0, m6 - pxor m2, m6 - pxor m1, m6 - pxor m3, m6 - PAVGB m0, m1 - PAVGB m2, m3 - pxor m0, m6 - pxor m2, m6 - mova [r0], m0 - mova [r0+r2], m2 - mova m0, [r1+r2*2] - mova m1, [r1+r2*2+1] - mova m2, [r1+r4] - mova m3, [r1+r4+1] - pxor m0, m6 - pxor m1, m6 - pxor m2, m6 - pxor m3, m6 - PAVGB m0, m1 - PAVGB m2, m3 - pxor m0, m6 - pxor m2, m6 - mova [r0+r2*2], m0 - mova [r0+r4], m2 - lea r1, [r1+r2*4] - lea r0, [r0+r2*4] - sub r3d, 4 - jg .loop - REP_RET - %endmacro - - INIT_MMX mmxext - PUT_NO_RND_PIXELS8_X2_EXACT - INIT_MMX 3dnow - PUT_NO_RND_PIXELS8_X2_EXACT - - ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_PIXELS8_Y2 0 +%if cpuflag(sse2) +cglobal put_pixels16_y2, 4,5,3 +%else cglobal put_pixels8_y2, 4,5 +%endif lea r4, [r2*2] - mova m0, [r1] + movu m0, [r1] sub r0, r2 .loop: - mova m1, [r1+r2] - mova m2, [r1+r4] + movu m1, [r1+r2] + movu m2, [r1+r4] add r1, r4 PAVGB m0, m1 PAVGB m1, m2 diff --cc libavcodec/x86/hpeldsp.h index 5fae990,d624ed9..bf97029 --- a/libavcodec/x86/hpeldsp.h +++ b/libavcodec/x86/hpeldsp.h @@@ -41,13 -34,9 +43,15 @@@ void ff_avg_pixels16_xy2_ssse3(uint8_t void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); + void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags); + #endif /* AVCODEC_X86_HPELDSP_H */ diff --cc libavcodec/x86/hpeldsp_init.c index f1ba4be,6731428..e8da184 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@@ -239,14 -194,9 +227,9 @@@ static void hpeldsp_init_mmxext(HpelDSP c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext; c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext; - c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext; + c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext; + c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext; } - - if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) { - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; - } #endif /* HAVE_MMXEXT_EXTERNAL */ } @@@ -275,14 -223,9 +258,9 @@@ static void hpeldsp_init_3dnow(HpelDSPC c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow; c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; - c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow; + c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow; + c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow; } - - if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) { - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; - } #endif /* HAVE_AMD3DNOW_EXTERNAL */ } @@@ -330,6 -257,6 +308,9 @@@ av_cold void ff_hpeldsp_init_x86(HpelDS if (EXTERNAL_SSE2(cpu_flags)) hpeldsp_init_sse2(c, flags, cpu_flags); + if (EXTERNAL_SSSE3(cpu_flags)) + hpeldsp_init_ssse3(c, flags, cpu_flags); ++ + if (CONFIG_VP3_DECODER) + ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags); } diff --cc libavcodec/x86/hpeldsp_vp3.asm index 0000000,513f14e..cba96d0 mode 000000,100644..100644 --- a/libavcodec/x86/hpeldsp_vp3.asm +++ b/libavcodec/x86/hpeldsp_vp3.asm @@@ -1,0 -1,111 +1,111 @@@ + ;****************************************************************************** + ;* SIMD-optimized halfpel functions for VP3 + ;* -;* This file is part of Libav. ++;* This file is part of FFmpeg. + ;* -;* Libav is free software; you can redistribute it and/or ++;* FFmpeg is free software; you can redistribute it and/or + ;* modify it under the terms of the GNU Lesser General Public + ;* License as published by the Free Software Foundation; either + ;* version 2.1 of the License, or (at your option) any later version. + ;* -;* Libav is distributed in the hope that it will be useful, ++;* FFmpeg is distributed in the hope that it will be useful, + ;* but WITHOUT ANY WARRANTY; without even the implied warranty of + ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + ;* Lesser General Public License for more details. + ;* + ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software ++;* License along with FFmpeg; if not, write to the Free Software + ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ;****************************************************************************** + + %include "libavutil/x86/x86util.asm" + + SECTION .text + + ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) + %macro PUT_NO_RND_PIXELS8_X2_EXACT 0 + cglobal put_no_rnd_pixels8_x2_exact, 4,5 + lea r4, [r2*3] + pcmpeqb m6, m6 + .loop: + mova m0, [r1] + mova m2, [r1+r2] + mova m1, [r1+1] + mova m3, [r1+r2+1] + pxor m0, m6 + pxor m2, m6 + pxor m1, m6 + pxor m3, m6 + PAVGB m0, m1 + PAVGB m2, m3 + pxor m0, m6 + pxor m2, m6 + mova [r0], m0 + mova [r0+r2], m2 + mova m0, [r1+r2*2] + mova m1, [r1+r2*2+1] + mova m2, [r1+r4] + mova m3, [r1+r4+1] + pxor m0, m6 + pxor m1, m6 + pxor m2, m6 + pxor m3, m6 + PAVGB m0, m1 + PAVGB m2, m3 + pxor m0, m6 + pxor m2, m6 + mova [r0+r2*2], m0 + mova [r0+r4], m2 + lea r1, [r1+r2*4] + lea r0, [r0+r2*4] + sub r3d, 4 + jg .loop + REP_RET + %endmacro + + INIT_MMX mmxext + PUT_NO_RND_PIXELS8_X2_EXACT + INIT_MMX 3dnow + PUT_NO_RND_PIXELS8_X2_EXACT + + + ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) + %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 + cglobal put_no_rnd_pixels8_y2_exact, 4,5 + lea r4, [r2*3] + mova m0, [r1] + pcmpeqb m6, m6 + add r1, r2 + pxor m0, m6 + .loop: + mova m1, [r1] + mova m2, [r1+r2] + pxor m1, m6 + pxor m2, m6 + PAVGB m0, m1 + PAVGB m1, m2 + pxor m0, m6 + pxor m1, m6 + mova [r0], m0 + mova [r0+r2], m1 + mova m1, [r1+r2*2] + mova m0, [r1+r4] + pxor m1, m6 + pxor m0, m6 + PAVGB m2, m1 + PAVGB m1, m0 + pxor m2, m6 + pxor m1, m6 + mova [r0+r2*2], m2 + mova [r0+r4], m1 + lea r1, [r1+r2*4] + lea r0, [r0+r2*4] + sub r3d, 4 + jg .loop + REP_RET + %endmacro + + INIT_MMX mmxext + PUT_NO_RND_PIXELS8_Y2_EXACT + INIT_MMX 3dnow + PUT_NO_RND_PIXELS8_Y2_EXACT diff --cc libavcodec/x86/hpeldsp_vp3_init.c index 0000000,2510c11..5979f41 mode 000000,100644..100644 --- a/libavcodec/x86/hpeldsp_vp3_init.c +++ b/libavcodec/x86/hpeldsp_vp3_init.c @@@ -1,0 -1,56 +1,56 @@@ + /* - * This file is part of Libav. ++ * This file is part of FFmpeg. + * - * Libav is free software; you can redistribute it and/or ++ * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * - * Libav is distributed in the hope that it will be useful, ++ * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software ++ * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + #include "libavutil/attributes.h" + #include "libavutil/cpu.h" + #include "libavutil/x86/cpu.h" + + #include "libavcodec/avcodec.h" + #include "libavcodec/hpeldsp.h" + + #include "hpeldsp.h" + + void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); + void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); + void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); + void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); + + av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags) + { + if (EXTERNAL_AMD3DNOW(cpu_flags)) { + if (flags & AV_CODEC_FLAG_BITEXACT) { + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; + } + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + if (flags & AV_CODEC_FLAG_BITEXACT) { + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; + } + } + } _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog