--- This is 100% untested and probably doesn't even compile. Can anyone with PPC/Altivec HW test or provide such access?
After the altivec optims are ported we can drop the duplicated version in libavcodec entirely. The fate-pixelutils tests should cover the alignment checks; there might be some fixes in this area as I'm not sure about the requirement. About the copyright, I put Brian Foley only, because of 59925ef2044e3ba2b5be49a35d37929550e3d6bc. If someone feels like some more people should be added, please tell me. --- libavutil/pixelutils.c | 6 +-- libavutil/ppc/Makefile | 3 ++ libavutil/ppc/pixelutils.h | 26 +++++++++ libavutil/ppc/pixelutils_altivec.c | 106 +++++++++++++++++++++++++++++++++++++ libavutil/ppc/pixelutils_altivec.h | 30 +++++++++++ libavutil/ppc/pixelutils_init.c | 33 ++++++++++++ 6 files changed, 201 insertions(+), 3 deletions(-) create mode 100644 libavutil/ppc/pixelutils.h create mode 100644 libavutil/ppc/pixelutils_altivec.c create mode 100644 libavutil/ppc/pixelutils_altivec.h create mode 100644 libavutil/ppc/pixelutils_init.c diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c index 10ff7e8..9a7b7f1 100644 --- a/libavutil/pixelutils.c +++ b/libavutil/pixelutils.c @@ -23,6 +23,7 @@ #if CONFIG_PIXELUTILS #include "x86/pixelutils.h" +#include "ppc/pixelutils.h" static av_always_inline int sad_wxh(const uint8_t *src1, ptrdiff_t stride1, const uint8_t *src2, ptrdiff_t stride2, @@ -77,9 +78,8 @@ av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligne if (w_bits != h_bits) // only squared sad for now return NULL; -#if ARCH_X86 - ff_pixelutils_sad_init_x86(sad, aligned); -#endif + if (ARCH_X86) ff_pixelutils_sad_init_x86(sad, aligned); + if (ARCH_PPC) ff_pixelutils_sad_init_ppc(sad, aligned); return sad[w_bits - 1]; #endif diff --git a/libavutil/ppc/Makefile b/libavutil/ppc/Makefile index 4fd8d6d..295e4e5 100644 --- a/libavutil/ppc/Makefile +++ b/libavutil/ppc/Makefile @@ -1,4 +1,7 @@ OBJS += ppc/cpu.o \ ppc/float_dsp_init.o \ +OBJS-$(CONFIG_PIXELUTILS) += ppc/pixelutils_init.o + ALTIVEC-OBJS += ppc/float_dsp_altivec.o \ +ALTIVEC-OBJS-$(CONFIG_PIXELUTILS) += ppc/pixelutils_altivec.o \ diff --git a/libavutil/ppc/pixelutils.h b/libavutil/ppc/pixelutils.h new file mode 100644 index 0000000..c737a69 --- /dev/null +++ b/libavutil/ppc/pixelutils.h @@ -0,0 +1,26 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_PPC_PIXELUTILS_H +#define AVUTIL_PPC_PIXELUTILS_H + +#include "libavutil/pixelutils.h" + +void ff_pixelutils_sad_init_ppc(av_pixelutils_sad_fn *sad, int aligned); + +#endif /* AVUTIL_PPC_PIXELUTILS_H */ diff --git a/libavutil/ppc/pixelutils_altivec.c b/libavutil/ppc/pixelutils_altivec.c new file mode 100644 index 0000000..5cf338a --- /dev/null +++ b/libavutil/ppc/pixelutils_altivec.c @@ -0,0 +1,106 @@ +/* + * This file is part of FFmpeg. + * + * Copyright (c) 2002 Brian Foley + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "util_altivec.h" +#include "pixelutils_altivec.h" + +int ff_pixelutils_sad_8x8_altivec(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2) +{ + int i, s; + const vector unsigned int zero = + (const vector unsigned int) vec_splat_u32(0); + const vector unsigned char permclear = + (vector unsigned char) + { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 }; + vector unsigned char perm1 = vec_lvsl(0, src1); + vector unsigned char perm2 = vec_lvsl(0, src2); + vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); + vector signed int sumdiffs; + + for (i = 0; i < 8; i++) { + /* Read potentially unaligned pixels into t1 and t2. + * Since we're reading 16 pixels, and actually only want 8, + * mask out the last 8 pixels. The 0s don't change the sum. */ + vector unsigned char src1l = vec_ld(0, src1); + vector unsigned char src1r = vec_ld(7, src1); + vector unsigned char src2l = vec_ld(0, src2); + vector unsigned char src2r = vec_ld(7, src2); + vector unsigned char t1 = vec_and(vec_perm(src1l, src1r, perm1), + permclear); + vector unsigned char t2 = vec_and(vec_perm(src2l, src2r, perm2), + permclear); + + /* Calculate a sum of abs differences vector. */ + vector unsigned char t3 = vec_max(t1, t2); + vector unsigned char t4 = vec_min(t1, t2); + vector unsigned char t5 = vec_sub(t3, t4); + + /* Add each 4 pixel group together and put 4 results into sad. */ + sad = vec_sum4s(t5, sad); + + src1 += stride1; + src2 += stride2; + } + + /* Sum up the four partial sums, and put the result into s. */ + sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); + sumdiffs = vec_splat(sumdiffs, 3); + vec_ste(sumdiffs, 0, &s); + + return s; +} + +int ff_pixelutils_sad_16x16_altivec(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2) +{ + int i, s; + const vector unsigned int zero = + (const vector unsigned int) vec_splat_u32(0); + vector unsigned char perm = vec_lvsl(0, src2); + vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); + vector signed int sumdiffs; + + for (i = 0; i < 16; i++) { + /* Read potentially unaligned pixels into t1 and t2. */ + vector unsigned char src2l = vec_ld(0, src2); + vector unsigned char src2r = vec_ld(15, src2); + vector unsigned char t1 = vec_ld(0, src1); + vector unsigned char t2 = vec_perm(src2l, src2r, perm); + + /* Calculate a sum of abs differences vector. */ + vector unsigned char t3 = vec_max(t1, t2); + vector unsigned char t4 = vec_min(t1, t2); + vector unsigned char t5 = vec_sub(t3, t4); + + /* Add each 4 pixel group together and put 4 results into sad. */ + sad = vec_sum4s(t5, sad); + + src1 += stride1; + src2 += stride2; + } + + /* Sum up the four partial sums, and put the result into s. */ + sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); + sumdiffs = vec_splat(sumdiffs, 3); + vec_ste(sumdiffs, 0, &s); + + return s; +} diff --git a/libavutil/ppc/pixelutils_altivec.h b/libavutil/ppc/pixelutils_altivec.h new file mode 100644 index 0000000..435ba36 --- /dev/null +++ b/libavutil/ppc/pixelutils_altivec.h @@ -0,0 +1,30 @@ +/* + * This file is part of FFmpeg. + * + * Copyright (c) 2002 Brian Foley + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_PPC_PIXELUTILS_ALTIVEC_H +#define AVUTIL_PPC_PIXELUTILS_ALTIVEC_H + +int ff_pixelutils_sad_8x8_altivec(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +int ff_pixelutils_sad_16x16_altivec(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +#endif /* AVUTIL_PPC_PIXELUTILS_ALTIVEC_H */ diff --git a/libavutil/ppc/pixelutils_init.c b/libavutil/ppc/pixelutils_init.c new file mode 100644 index 0000000..ed69232 --- /dev/null +++ b/libavutil/ppc/pixelutils_init.c @@ -0,0 +1,33 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/ppc/cpu.h" +#include "pixelutils.h" +#include "pixelutils_altivec.h" + +void ff_pixelutils_sad_init_ppc(av_pixelutils_sad_fn *sad, int aligned) +{ + if (PPC_ALTIVEC(av_get_cpu_flags())) { + /* XXX: aligned? */ + sad[2] = ff_pixelutils_sad_8x8_altivec; + sad[3] = ff_pixelutils_sad_16x16_altivec; + } +} -- 2.1.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel