This exploits an approach based on the sieve of Eratosthenes, a popular method for generating prime numbers.
Tables are identical to previous ones. Tested with FATE. Does not work yet with --enable-hardcoded-tables due to the union and lack of proper WRITE_ARRAY for it. Want to get feedback on this; if we always dynamically init it this won't need addressing. Sample benchmark (Haswell, GNU/Linux+gcc): prev: 7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips 7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips [...] 7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips 7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips new: 2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips 2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips [...] 1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips 1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips Both small and large run count given as this is called once so small run count may give a better picture, small numbers are fairly consistent, and there is a consistent downward trend from small to large runs, at which point it stabilizes to a new value. Signed-off-by: Ganesh Ajjanagadde <gajjanaga...@gmail.com> --- libavcodec/aacdec_fixed.c | 4 ++-- libavcodec/aacdec_template.c | 2 +- libavcodec/cbrt_tablegen.h | 53 +++++++++++++++++++++++++++++++------------- 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c index 923fbe0..ebc585e 100644 --- a/libavcodec/aacdec_fixed.c +++ b/libavcodec/aacdec_fixed.c @@ -154,9 +154,9 @@ static void vector_pow43(int *coefs, int len) for (i=0; i<len; i++) { coef = coefs[i]; if (coef < 0) - coef = -(int)cbrt_tab[-coef]; + coef = -(int)cbrt_tab[-coef].i; else - coef = (int)cbrt_tab[coef]; + coef = (int)cbrt_tab[coef].i; coefs[i] = coef; } } diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c index 620600c..b3ec9e6 100644 --- a/libavcodec/aacdec_template.c +++ b/libavcodec/aacdec_template.c @@ -1791,7 +1791,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024], v = -v; *icf++ = v; #else - *icf++ = cbrt_tab[n] | (bits & 1U<<31); + *icf++ = cbrt_tab[n].i | (bits & 1U<<31); #endif /* USE_FIXED */ bits <<= 1; } else { diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h index 59b5a1d..f5ae03e 100644 --- a/libavcodec/cbrt_tablegen.h +++ b/libavcodec/cbrt_tablegen.h @@ -26,14 +26,9 @@ #include <stdint.h> #include <math.h> #include "libavutil/attributes.h" +#include "libavutil/intfloat.h" #include "libavcodec/aac_defines.h" -#if USE_FIXED -#define CBRT(x) lrint((x).f * 8192) -#else -#define CBRT(x) x.i -#endif - #if CONFIG_HARDCODED_TABLES #if USE_FIXED #define cbrt_tableinit_fixed() @@ -43,20 +38,46 @@ #include "libavcodec/cbrt_tables.h" #endif #else -static uint32_t cbrt_tab[1 << 13]; +union ff_int32float64 { + uint32_t i; + double f; +}; +static union ff_int32float64 cbrt_tab[1 << 13]; static av_cold void AAC_RENAME(cbrt_tableinit)(void) { - if (!cbrt_tab[(1<<13) - 1]) { - int i; - for (i = 0; i < 1<<13; i++) { - union { - float f; - uint32_t i; - } f; - f.f = cbrt(i) * i; - cbrt_tab[i] = CBRT(f); + int i, j, k; + double cbrt_val; + + if (!cbrt_tab[(1<<13) - 1].i) { + cbrt_tab[0].f = 0; + for (i = 1; i < 1<<13; i++) + cbrt_tab[i].f = 1; + + /* have to worry about non-squarefree numbers */ + for (i = 2; i < 90; i++) { + if (cbrt_tab[i].f == 1) { + cbrt_val = i * cbrt(i); + for (k = i; k < (1<<13); k*= i) + for (j = k; j < (1<<13); j+=k) + cbrt_tab[j].f *= cbrt_val; + } } + + for (i = 91; i <= 8191; i+=2) { + if (cbrt_tab[i].f == 1) { + cbrt_val = i * cbrt(i); + for (j = i; j < (1<<13); j+=i) + cbrt_tab[j].f *= cbrt_val; + } + } +#if USE_FIXED + for (i = 0; i < 1<<13; i++) + cbrt_tab[i].i = lrint(cbrt_tab[i].f * 8192); +#else + for (i = 0; i < 1<<13; i++) + cbrt_tab[i].i = av_float2int((float)cbrt_tab[i].f); +#endif } } #endif /* CONFIG_HARDCODED_TABLES */ -- 2.6.4 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel