--- configure | 2 + doc/APIchanges | 3 ++ libavutil/Makefile | 2 + libavutil/pixelutils.c | 85 ++++++++++++++++++++++++++++++ libavutil/pixelutils.h | 76 +++++++++++++++++++++++++++ libavutil/version.h | 2 +- libavutil/x86/Makefile | 4 ++ libavutil/x86/pixelutils.asm | 111 ++++++++++++++++++++++++++++++++++++++++ libavutil/x86/pixelutils.h | 26 ++++++++++ libavutil/x86/pixelutils_init.c | 54 +++++++++++++++++++ 10 files changed, 364 insertions(+), 1 deletion(-) create mode 100644 libavutil/pixelutils.c create mode 100644 libavutil/pixelutils.h create mode 100644 libavutil/x86/pixelutils.asm create mode 100644 libavutil/x86/pixelutils.h create mode 100644 libavutil/x86/pixelutils_init.c
diff --git a/configure b/configure index 9c3af50..57edd1d 100755 --- a/configure +++ b/configure @@ -144,6 +144,7 @@ Component options: --disable-mdct disable MDCT code --disable-rdft disable RDFT code --disable-fft disable FFT code + --disable-pixelutils disable pixel utils in libavutil Hardware accelerators: --disable-dxva2 disable DXVA2 code [autodetect] @@ -1451,6 +1452,7 @@ SUBSYSTEM_LIST=" lsp lzo mdct + pixelutils network rdft " diff --git a/doc/APIchanges b/doc/APIchanges index abca377..69ca682 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,9 @@ libavutil: 2012-10-22 API changes, most recent first: +2014-08-02 - xxxxxxx - lavu 52.95.100 - pixelutils.h + Add pixelutils API with SAD functions + 2014-07-30 - ba3e331 - lavu 52.94.100 - frame.h Add av_frame_side_data_name() diff --git a/libavutil/Makefile b/libavutil/Makefile index 91751dc..6114cc9 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -44,6 +44,7 @@ HEADERS = adler32.h \ opt.h \ parseutils.h \ pixdesc.h \ + pixelutils.h \ pixfmt.h \ random_seed.h \ replaygain.h \ @@ -113,6 +114,7 @@ OBJS = adler32.o \ opt.o \ parseutils.o \ pixdesc.o \ + pixelutils.o \ random_seed.o \ rational.o \ rc4.o \ diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c new file mode 100644 index 0000000..146a882 --- /dev/null +++ b/libavutil/pixelutils.c @@ -0,0 +1,85 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "common.h" +#include "pixelutils.h" + +#if CONFIG_PIXELUTILS + +#include "x86/pixelutils.h" + +static av_always_inline int sad_wxh(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2, + int w, int h) +{ + int x, y, sum = 0; + + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) + sum += abs(src1[x] - src2[x]); + src1 += stride1; + src2 += stride2; + } + return sum; +} + +#define DECLARE_BLOCK_FUNCTIONS(size) \ +static int block_sad_##size##x##size##_c(const uint8_t *src1, ptrdiff_t stride1, \ + const uint8_t *src2, ptrdiff_t stride2) \ +{ \ + return sad_wxh(src1, stride1, src2, stride2, size, size); \ +} + +DECLARE_BLOCK_FUNCTIONS(2) +DECLARE_BLOCK_FUNCTIONS(4) +DECLARE_BLOCK_FUNCTIONS(8) +DECLARE_BLOCK_FUNCTIONS(16) + +#endif /* CONFIG_PIXELUTILS */ + +av_cold AVPixelUtils *av_pixelutils_alloc(void) +{ + return av_mallocz(sizeof(AVPixelUtils)); +} + +av_cold int av_pixelutils_init(AVPixelUtils *s, void *log_ctx) +{ +#if !CONFIG_PIXELUTILS + av_log(log_ctx, AV_LOG_ERROR, "pixelutils support is required " + "but libavutil is not compiled with it\n"); + return AVERROR(EINVAL); +#else + +#define ASSIGN_BLOCK_FUNCTIONS(size) do { \ + s->sad [AV_PIXEL_BLOCKSZ_##size##x##size] = \ + s->sad_u[AV_PIXEL_BLOCKSZ_##size##x##size] = block_sad_##size##x##size##_c; \ +} while (0) + + ASSIGN_BLOCK_FUNCTIONS(2); + ASSIGN_BLOCK_FUNCTIONS(4); + ASSIGN_BLOCK_FUNCTIONS(8); + ASSIGN_BLOCK_FUNCTIONS(16); + +#if ARCH_X86 + ff_pixelutils_init_x86(s); +#endif + + return 0; +#endif /* CONFIG_PIXELUTILS */ +} diff --git a/libavutil/pixelutils.h b/libavutil/pixelutils.h new file mode 100644 index 0000000..862d607 --- /dev/null +++ b/libavutil/pixelutils.h @@ -0,0 +1,76 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_PIXELUTILS_H +#define AVUTIL_PIXELUTILS_H + +#include <stddef.h> +#include <stdint.h> +#include "common.h" + +enum AVPixelUtilsBlockSize { + // Warning: only add entries at the end for ABI compatibility. + // Ideally, keep only squared dimensions in that enum. + AV_PIXEL_BLOCKSZ_2x2, + AV_PIXEL_BLOCKSZ_4x4, + AV_PIXEL_BLOCKSZ_8x8, + AV_PIXEL_BLOCKSZ_16x16, + AV_PIXEL_BLOCKSZ_NB +}; + +/** + * Sum of abs(src1[x] - src2[x]) + */ +typedef int (*av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +typedef struct AVPixelUtils { + /** + * Sum of absolute differences block functions. + * src1 and src2 addresses need to be aligned to the block size. + */ + av_pixelutils_sad_fn sad[AV_PIXEL_BLOCKSZ_NB]; + + /** + * Sum of absolute differences block functions, (half) unaligned version. + * src1 address needs to be aligned to the block size. + * src2 has no alignment requirement; the 'u' refers to this second source. + */ + av_pixelutils_sad_fn sad_u[AV_PIXEL_BLOCKSZ_NB]; +} AVPixelUtils; + +/** + * Allocate and initialize pixel utils context. + * + * The context remains uninitialized and you need to use av_pixelutils_init() + * on it. + */ +AVPixelUtils *av_pixelutils_alloc(void); + +/** + * Initialize the pixel utils context. + * + * @param s a pointer to a AVPixelUtils structure to initialize + * @param log_ctx context used for logging + * @return >= 0 on success, AVERROR(EINVAL) if libavutil pixelutils code wasn't + * compiled in (an explicit error message will be automatically emitted in this + * case), and another value < 0 for any other error. + */ +int av_pixelutils_init(AVPixelUtils *s, void *log_ctx); + +#endif /* AVUTIL_PIXELUTILS_H */ diff --git a/libavutil/version.h b/libavutil/version.h index 42e2db5..9af8f5f 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -56,7 +56,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 52 -#define LIBAVUTIL_VERSION_MINOR 94 +#define LIBAVUTIL_VERSION_MINOR 95 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile index 1e19082..ad3bdfc 100644 --- a/libavutil/x86/Makefile +++ b/libavutil/x86/Makefile @@ -2,7 +2,11 @@ OBJS += x86/cpu.o \ x86/float_dsp_init.o \ x86/lls_init.o \ +OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \ + YASM-OBJS += x86/cpuid.o \ x86/emms.o \ x86/float_dsp.o \ x86/lls.o \ + +YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \ diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm new file mode 100644 index 0000000..b4db2cd --- /dev/null +++ b/libavutil/x86/pixelutils.asm @@ -0,0 +1,111 @@ +;****************************************************************************** +;* Pixel utilities SIMD +;* +;* Copyright (C) 2002-2004 Michael Niedermayer <michae...@gmx.at> +;* Copyright (C) 2014 Clément Bœsch <u pkh me> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86util.asm" + +SECTION_TEXT + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmx +cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 + pxor m7, m7 + pxor m6, m6 +%rep 4 + mova m0, [src1q] + mova m2, [src1q + stride1q] + mova m1, [src2q] + mova m3, [src2q + stride2q] + psubusb m4, m0, m1 + psubusb m5, m2, m3 + psubusb m1, m0 + psubusb m3, m2 + por m1, m4 + por m3, m5 + punpcklbw m0, m1, m7 + punpcklbw m2, m3, m7 + punpckhbw m1, m7 + punpckhbw m3, m7 + paddw m0, m1 + paddw m2, m3 + paddw m0, m2 + paddw m6, m0 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + psrlq m0, m6, 32 + paddw m6, m0 + psrlq m0, m6, 16 + paddw m6, m0 + movd eax, m6 + movzx eax, ax + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 4 + mova m0, [src1q] + mova m1, [src1q + stride1q] + psadbw m0, [src2q] + psadbw m1, [src2q + stride2q] + paddw m2, m0 + paddw m2, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movd eax, m2 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +%macro SAD_16x16 1 +INIT_XMM sse +cglobal pixelutils_sad_%1_16x16, 4,4,3, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 8 + mov%1 m0, [src2q] + mov%1 m1, [src2q + stride1q] + psadbw m0, [src1q] + psadbw m1, [src1q + stride2q] + paddw m2, m0 + paddw m2, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movhlps m0, m2 + paddw m2, m0 + movd eax, m2 + RET +%endmacro + +SAD_16x16 a +SAD_16x16 u diff --git a/libavutil/x86/pixelutils.h b/libavutil/x86/pixelutils.h new file mode 100644 index 0000000..386b490 --- /dev/null +++ b/libavutil/x86/pixelutils.h @@ -0,0 +1,26 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_X86_PIXELUTILS_H +#define AVUTIL_X86_PIXELUTILS_H + +#include "libavutil/pixelutils.h" + +void ff_pixelutils_init_x86(AVPixelUtils *s); + +#endif /* AVUTIL_X86_PIXELUTILS_H */ diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c new file mode 100644 index 0000000..701492f --- /dev/null +++ b/libavutil/x86/pixelutils_init.c @@ -0,0 +1,54 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "pixelutils.h" +#include "cpu.h" + +int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_a_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_u_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +void ff_pixelutils_init_x86(AVPixelUtils *s) +{ + int cpu_flags = av_get_cpu_flags(); + + //TODO: add 16x16 mmx + //TODO: add 32x32 + + if (EXTERNAL_MMX(cpu_flags)) { + s->sad [AV_PIXEL_BLOCKSZ_8x8] = + s->sad_u[AV_PIXEL_BLOCKSZ_8x8] = ff_pixelutils_sad_8x8_mmx; + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + s->sad [AV_PIXEL_BLOCKSZ_8x8] = + s->sad_u[AV_PIXEL_BLOCKSZ_8x8] = ff_pixelutils_sad_8x8_mmxext; + } + + if (EXTERNAL_SSE(cpu_flags)) { + s->sad [AV_PIXEL_BLOCKSZ_16x16] = ff_pixelutils_sad_a_16x16_sse; + s->sad_u[AV_PIXEL_BLOCKSZ_16x16] = ff_pixelutils_sad_u_16x16_sse; + } +} -- 2.0.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel