James Almer: > quant_bands_signed_sse2: 417.0 > quant_bands_signed_avx: 202.0
Missing benchmark numbers for the C code > > Signed-off-by: James Almer <jamr...@gmail.com> > --- > libavcodec/aacenc.h | 2 +- > libavcodec/x86/aacencdsp.asm | 27 ++++++++++++++++++++++++--- > libavcodec/x86/aacencdsp_init.c | 6 ++++++ > tests/checkasm/aacencdsp.c | 4 ++-- > 4 files changed, 33 insertions(+), 6 deletions(-) > > diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h > index d07960620e..ae15f91e06 100644 > --- a/libavcodec/aacenc.h > +++ b/libavcodec/aacenc.h > @@ -242,7 +242,7 @@ typedef struct AACEncContext { > enum RawDataBlockType cur_type; ///< channel group type > cur_channel belongs to > > AudioFrameQueue afq; > - DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients > + DECLARE_ALIGNED(32, int, qcoefs)[96]; ///< quantized coefficients > DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients > > uint16_t quantize_band_cost_cache_generation; > diff --git a/libavcodec/x86/aacencdsp.asm b/libavcodec/x86/aacencdsp.asm > index 0d3ba4b89d..99be2d87f5 100644 > --- a/libavcodec/x86/aacencdsp.asm > +++ b/libavcodec/x86/aacencdsp.asm > @@ -53,8 +53,19 @@ cglobal abs_pow34, 3, 3, 3, out, in, size > ; int size, int is_signed, int maxval, const float > Q34, > ; const float rounding) > ;******************************************************************* > -INIT_XMM sse2 > +%macro AAC_QUANTIZE_BANDS 0 > cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, > maxval, Q34, rounding > +%if mmsize == 32 > + vbroadcastss m0, Q34m > + vbroadcastss m1, roundingm > +%if UNIX64 == 0 > + cvtsi2ss xm3, dword maxvalm > +%else > + cvtsi2ss xm3, maxvald > +%endif > + shufps xm3, xm3, xm3, 0 > + vinsertf128 m3, m3, xm3, 1 > +%else ; mmsize == 16 > %if UNIX64 == 0 > movss m0, Q34m > movss m1, roundingm > @@ -65,9 +76,13 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, > size, is_signed, maxval, Q > shufps m0, m0, 0 > shufps m1, m1, 0 > shufps m3, m3, 0 > +%endif > shl is_signedd, 31 > - movd m4, is_signedd > - shufps m4, m4, 0 > + movd xm4, is_signedd > + shufps xm4, xm4, xm4, 0 > +%if mmsize == 32 > + vinsertf128 m4, m4, xm4, 1 > +%endif > shl sized, 2 > add inq, sizeq > add outq, sizeq > @@ -84,3 +99,9 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, > is_signed, maxval, Q > add sizeq, mmsize > jl .loop > RET > +%endmacro > + > +INIT_XMM sse2 > +AAC_QUANTIZE_BANDS > +INIT_YMM avx > +AAC_QUANTIZE_BANDS > diff --git a/libavcodec/x86/aacencdsp_init.c b/libavcodec/x86/aacencdsp_init.c > index e0d8dec4f8..cf17dbf91d 100644 > --- a/libavcodec/x86/aacencdsp_init.c > +++ b/libavcodec/x86/aacencdsp_init.c > @@ -30,6 +30,9 @@ void ff_abs_pow34_sse(float *out, const float *in, const > int size); > void ff_aac_quantize_bands_sse2(int *out, const float *in, const float > *scaled, > int size, int is_signed, int maxval, const > float Q34, > const float rounding); > +void ff_aac_quantize_bands_avx(int *out, const float *in, const float > *scaled, > + int size, int is_signed, int maxval, const > float Q34, > + const float rounding); > > av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s) > { > @@ -40,4 +43,7 @@ av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s) > > if (EXTERNAL_SSE2(cpu_flags)) > s->quant_bands = ff_aac_quantize_bands_sse2; Seems like the commit message is wrong: You are not adding an SSE2 version. > + > + if (EXTERNAL_AVX_FAST(cpu_flags)) > + s->quant_bands = ff_aac_quantize_bands_avx; > } > diff --git a/tests/checkasm/aacencdsp.c b/tests/checkasm/aacencdsp.c > index 791dd30320..5308a2ac03 100644 > --- a/tests/checkasm/aacencdsp.c > +++ b/tests/checkasm/aacencdsp.c > @@ -81,8 +81,8 @@ static void test_quant_bands(AACEncDSPContext *s) > for (int sign = 0; sign <= 1; sign++) { > if (check_func(s->quant_bands, "quant_bands_%s", > sign ? "signed" : "unsigned")) { > - LOCAL_ALIGNED_16(int, out, [BUF_SIZE]); > - LOCAL_ALIGNED_16(int, out2, [BUF_SIZE]); > + LOCAL_ALIGNED_32(int, out, [BUF_SIZE]); > + LOCAL_ALIGNED_32(int, out2, [BUF_SIZE]); > > call_ref(out, in, scaled, BUF_SIZE, sign, maxval, q34, rounding); > call_new(out2, in, scaled, BUF_SIZE, sign, maxval, q34, > rounding); _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".