On Fri, Jun 23, 2017 at 05:01:35PM +0200, Matthieu Bouron wrote: > On Thu, Jun 22, 2017 at 12:53:52PM -0300, James Almer wrote: > > On 6/22/2017 9:56 AM, Matthieu Bouron wrote: > > > --- > > > > > > The following patchset applies on top of Clément's aacpsdsp patchset. > > > > > > --- > > > tests/checkasm/Makefile | 3 +- > > > tests/checkasm/checkasm.c | 1 + > > > tests/checkasm/checkasm.h | 1 + > > > tests/checkasm/sbrdsp.c | 297 > > > ++++++++++++++++++++++++++++++++++++++++++++++ > > > 4 files changed, 301 insertions(+), 1 deletion(-) > > > create mode 100644 tests/checkasm/sbrdsp.c > > > > > > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile > > > index 638e811931..60e80ab738 100644 > > > --- a/tests/checkasm/Makefile > > > +++ b/tests/checkasm/Makefile > > > @@ -13,7 +13,8 @@ AVCODECOBJS-$(CONFIG_VP8DSP) += vp8dsp.o > > > AVCODECOBJS-$(CONFIG_VIDEODSP) += videodsp.o > > > > > > # decoders/encoders > > > -AVCODECOBJS-$(CONFIG_AAC_DECODER) += aacpsdsp.o > > > +AVCODECOBJS-$(CONFIG_AAC_DECODER) += aacpsdsp.o \ > > > + sbrdsp.o > > > AVCODECOBJS-$(CONFIG_ALAC_DECODER) += alacdsp.o > > > AVCODECOBJS-$(CONFIG_DCA_DECODER) += synth_filter.o > > > AVCODECOBJS-$(CONFIG_JPEG2000_DECODER) += jpeg2000dsp.o > > > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c > > > index e66744b162..29f201b1b3 100644 > > > --- a/tests/checkasm/checkasm.c > > > +++ b/tests/checkasm/checkasm.c > > > @@ -67,6 +67,7 @@ static const struct { > > > #if CONFIG_AVCODEC > > > #if CONFIG_AAC_DECODER > > > { "aacpsdsp", checkasm_check_aacpsdsp }, > > > + { "sbrdsp", checkasm_check_sbrdsp }, > > > #endif > > > #if CONFIG_ALAC_DECODER > > > { "alacdsp", checkasm_check_alacdsp }, > > > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h > > > index dfb0ce561c..fa51e71e4b 100644 > > > --- a/tests/checkasm/checkasm.h > > > +++ b/tests/checkasm/checkasm.h > > > @@ -50,6 +50,7 @@ void checkasm_check_hevc_idct(void); > > > void checkasm_check_jpeg2000dsp(void); > > > void checkasm_check_llviddsp(void); > > > void checkasm_check_pixblockdsp(void); > > > +void checkasm_check_sbrdsp(void); > > > void checkasm_check_synth_filter(void); > > > void checkasm_check_v210enc(void); > > > void checkasm_check_vp8dsp(void); > > > diff --git a/tests/checkasm/sbrdsp.c b/tests/checkasm/sbrdsp.c > > > new file mode 100644 > > > index 0000000000..8333510c6b > > > --- /dev/null > > > +++ b/tests/checkasm/sbrdsp.c > > > @@ -0,0 +1,297 @@ > > > +/* > > > + * This file is part of FFmpeg. > > > + * > > > + * FFmpeg is free software; you can redistribute it and/or modify > > > + * it under the terms of the GNU General Public License as published by > > > + * the Free Software Foundation; either version 2 of the License, or > > > + * (at your option) any later version. > > > + * > > > + * FFmpeg is distributed in the hope that it will be useful, > > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > > + * GNU General Public License for more details. > > > + * > > > + * You should have received a copy of the GNU General Public License > > > along > > > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > > > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > > > + */ > > > + > > > +#include "libavcodec/sbrdsp.h" > > > + > > > +#include "checkasm.h" > > > + > > > +#define randomize(buf, len) do { \ > > > + int i; \ > > > + for (i = 0; i < len; i++) { \ > > > + const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \ > > > + (buf)[i] = f; \ > > > + } \ > > > +} while (0) > > > + > > > +#define EPS 0.0001 > > > + > > > +static void test_sum64x5(void) > > > +{ > > > + LOCAL_ALIGNED_16(INTFLOAT, dst0, [64 + 256]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst1, [64 + 256]); > > > + > > > + declare_func(void, INTFLOAT *z); > > > + > > > + randomize((INTFLOAT *)dst0, 64 + 256); > > > + memcpy(dst1, dst0, (64 + 256) * sizeof(INTFLOAT)); > > > + call_ref(dst0); > > > + call_new(dst1); > > > + if (!float_near_abs_eps_array(dst0, dst1, EPS, 64 + 256)) > > > + fail(); > > > + bench_new(dst1); > > > +} > > > + > > > +static void test_sum_square(void) > > > +{ > > > + INTFLOAT res0; > > > + INTFLOAT res1; > > > + LOCAL_ALIGNED_16(INTFLOAT, src, [256], [2]); > > > + > > > + declare_func(INTFLOAT, INTFLOAT (*x)[2], int n); > > > + > > > + randomize((INTFLOAT *)src, 256 * 2); > > > + res0 = call_ref(src, 256); > > > + res1 = call_new(src, 256); > > > > This one is failing on x86 because the second argument has garbage in > > the upper 32 bits of the grp. > > > > The solution is to either change n from int to ptrdiff_t, or zero the > > upper bits of n in ff_sbr_sum_square_sse(), which can be done implicitly > > in the first instruction. > > > > > + if (!float_near_abs_eps(res0, res1, EPS)) > > > + fail(); > > > + bench_new(src, 256); > > > +} > > > + > > > +static void test_neg_odd_64(void) > > > +{ > > > + LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]); > > > + > > > + declare_func(void, INTFLOAT *x); > > > + > > > + randomize((INTFLOAT *)dst0, 64); > > > + memcpy(dst1, dst0, (64) * sizeof(INTFLOAT)); > > > + call_ref(dst0); > > > + call_new(dst1); > > > + if (!float_near_abs_eps_array(dst0, dst1, EPS, 64)) > > > + fail(); > > > + bench_new(dst1); > > > +} > > > + > > > +static void test_qmf_pre_shuffle(void) > > > +{ > > > + LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]); > > > + > > > + declare_func(void, INTFLOAT *z); > > > + > > > + randomize((INTFLOAT *)dst0, 128); > > > + memcpy(dst1, dst0, (128) * sizeof(INTFLOAT)); > > > + call_ref(dst0); > > > + call_new(dst1); > > > + if (!float_near_abs_eps_array(dst0, dst1, EPS, 128)) > > > + fail(); > > > + bench_new(dst1); > > > +} > > > + > > > +static void test_qmf_post_shuffle(void) > > > +{ > > > + LOCAL_ALIGNED_16(INTFLOAT, src, [64]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst0, [32], [2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst1, [32], [2]); > > > + > > > + declare_func(void, INTFLOAT W[32][2], const INTFLOAT *z); > > > + > > > + randomize((INTFLOAT *)src, 64); > > > + call_ref(dst0, src); > > > + call_new(dst1, src); > > > + if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, > > > EPS, 64)) > > > + fail(); > > > + bench_new(dst1, src); > > > +} > > > + > > > +static void test_qmf_deint_neg(void) > > > +{ > > > + LOCAL_ALIGNED_16(INTFLOAT, src, [64]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]); > > > + > > > + declare_func(void, INTFLOAT *v, const INTFLOAT *src); > > > + > > > + randomize((INTFLOAT *)src, 64); > > > + call_ref(dst0, src); > > > + call_new(dst1, src); > > > + if (!float_near_abs_eps_array(dst0, dst1, EPS, 64)) > > > + fail(); > > > + bench_new(dst1, src); > > > +} > > > + > > > +static void test_qmf_deint_bfly(void) > > > +{ > > > + LOCAL_ALIGNED_16(INTFLOAT, src0, [64]); > > > + LOCAL_ALIGNED_16(INTFLOAT, src1, [64]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]); > > > + > > > + declare_func(void, INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT > > > *src1); > > > + > > > + memset(dst0, 0, 128 * sizeof(INTFLOAT)); > > > + memset(dst1, 0, 128 * sizeof(INTFLOAT)); > > > + > > > + randomize((INTFLOAT *)src0, 64); > > > + randomize((INTFLOAT *)src1, 64); > > > + call_ref(dst0, src0, src1); > > > + call_new(dst1, src0, src1); > > > + if (!float_near_abs_eps_array(dst0, dst1, EPS, 128)) > > > + fail(); > > > + bench_new(dst1, src0, src1); > > > +} > > > + > > > +static void test_autocorrelate(void) > > > +{ > > > + LOCAL_ALIGNED_16(INTFLOAT, src, [40], [2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst0, [3], [2][2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst1, [3], [2][2]); > > > + > > > + declare_func(void, const INTFLOAT x[40][2], INTFLOAT phi[3][2][2]); > > > + > > > + memset(dst0, 0, 3 * 2 * 2 * sizeof(INTFLOAT)); > > > + memset(dst1, 0, 3 * 2 * 2 * sizeof(INTFLOAT)); > > > + > > > + randomize((INTFLOAT *)src, 80); > > > + call_ref(src, dst0); > > > + call_new(src, dst1); > > > + if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, > > > EPS, 3 * 2 * 2)) > > > + fail(); > > > + bench_new(src, dst1); > > > +} > > > + > > > +static void test_hf_gen(void) > > > +{ > > > + LOCAL_ALIGNED_16(INTFLOAT, low, [128], [2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, alpha0, [2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, alpha1, [2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]); > > > + INTFLOAT bw = (INTFLOAT)rnd() / UINT_MAX; > > > + int i; > > > + > > > + declare_func(void, INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2], > > > + const INTFLOAT alpha0[2], const INTFLOAT > > > alpha1[2], > > > + INTFLOAT bw, int start, int end); > > > + > > > + randomize((INTFLOAT *)low, 128 * 2); > > > + randomize((INTFLOAT *)alpha0, 2); > > > + randomize((INTFLOAT *)alpha1, 2); > > > + for (i = 2; i < 64; i += 2) { > > > + memset(dst0, 0, 128 * 2 * sizeof(INTFLOAT)); > > > + memset(dst1, 0, 128 * 2 * sizeof(INTFLOAT)); > > > + call_ref(dst0, low, alpha0, alpha1, 0.0, i, 128); > > > + call_new(dst1, low, alpha0, alpha1, 0.0, i, 128); > > > + if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT > > > *)dst1, EPS, 128 * 2)) > > > + fail(); > > > + bench_new(dst1, low, alpha0, alpha1, bw, i, 128); > > > + } > > > +} > > > + > > > +static void test_hf_g_filt(void) > > > +{ > > > + LOCAL_ALIGNED_16(INTFLOAT, high, [128], [40][2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, g_filt, [128]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]); > > > + > > > + declare_func(void, INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2], > > > + const INTFLOAT *g_filt, int m_max, intptr_t ixh); > > > + > > > + randomize((INTFLOAT *)high, 128 * 40 * 2); > > > + randomize((INTFLOAT *)g_filt, 128); > > > + > > > + call_ref(dst0, high, g_filt, 128, 20); > > > + call_new(dst1, high, g_filt, 128, 20); > > > + if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, > > > EPS, 128 * 2)) > > > + fail(); > > > + bench_new(dst1, high, g_filt, 128, 20); > > > +} > > > + > > > +static void test_hf_apply_noise(void) > > > +{ > > > + LOCAL_ALIGNED_16(AAC_FLOAT, s_m, [128]); > > > + LOCAL_ALIGNED_16(AAC_FLOAT, q_filt, [128]); > > > + LOCAL_ALIGNED_16(INTFLOAT, ref, [128], [2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]); > > > + LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]); > > > + int noise = 0x2a; > > > + int i; > > > + > > > + declare_func(void, INTFLOAT (*Y)[2], const AAC_FLOAT *s_m, > > > + const AAC_FLOAT *q_filt, int noise, > > > + int kx, int m_max); > > > + > > > + randomize((INTFLOAT *)ref, 128 * 2); > > > + randomize((INTFLOAT *)s_m, 128); > > > + randomize((INTFLOAT *)q_filt, 128); > > > + for (i = 0; i < 2; i++) { > > > + memcpy(dst0, ref, 128 * 2 * sizeof(INTFLOAT)); > > > + memcpy(dst1, ref, 128 * 2 * sizeof(INTFLOAT)); > > > + call_ref(dst0, s_m, q_filt, noise, i, 128); > > > + call_new(dst1, s_m, q_filt, noise, i, 128); > > > + if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT > > > *)dst1, EPS, 128 * 2)) > > > + fail(); > > > + bench_new(dst1, s_m, q_filt, noise, i, 128); > > > + } > > > +} > > > + > > > +void checkasm_check_sbrdsp(void) > > > +{ > > > + int i; > > > + SBRDSPContext sbrdsp; > > > + > > > + ff_sbrdsp_init(&sbrdsp); > > > + > > > + if (check_func(sbrdsp.sum64x5, "sum64x5")) > > > + test_sum64x5(); > > > + report("sum64x5"); > > > + > > > + if (check_func(sbrdsp.sum_square, "sum_square")) > > > + test_sum_square(); > > > + report("sum_square"); > > > + > > > + if (check_func(sbrdsp.neg_odd_64, "neg_odd_64")) > > > + test_neg_odd_64(); > > > + report("neg_odd_64"); > > > + > > > + if (check_func(sbrdsp.qmf_pre_shuffle, "qmf_pre_shuffle")) > > > + test_qmf_pre_shuffle(); > > > + report("qmf_pre_shuffle"); > > > + > > > + if (check_func(sbrdsp.qmf_post_shuffle, "qmf_post_shuffle")) > > > + test_qmf_post_shuffle(); > > > + report("qmf_post_shuffle"); > > > + > > > + if (check_func(sbrdsp.qmf_deint_neg, "qmf_deint_neg")) > > > + test_qmf_deint_neg(); > > > + report("qmf_deint_neg"); > > > + > > > + if (check_func(sbrdsp.qmf_deint_bfly, "qmf_deint_bfly")) > > > + test_qmf_deint_bfly(); > > > + report("qmf_deint_bfly"); > > > + > > > + if (check_func(sbrdsp.autocorrelate, "autocorrelate")) > > > + test_autocorrelate(); > > > + report("autocorrelate"); > > > + > > > + if (check_func(sbrdsp.hf_gen, "hf_gen")) > > > + test_hf_gen(); > > > + report("hf_gen"); > > > + > > > + if (check_func(sbrdsp.hf_g_filt, "hf_g_filt")) > > > + test_hf_g_filt(); > > > + report("hf_g_filt"); > > > + > > > + for (i = 0; i < 4; i++) { > > > > You could instead add this loop inside test_hf_apply_noise(). It would > > save you a bunch of unnecessary calls to randomize(). > > Done in the following version of the patch.
I will apply the patchset in two days if there is no objections. [...] -- Matthieu B. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel