ffmpeg | branch: master | Michael Niedermayer <mich...@niedermayer.cc> | Fri Mar 4 15:39:55 2016 +0100| [305344d89e21ed11c74274167cf597f151778c42] | committer: Michael Niedermayer
avcodec/fft: Add revtab32 for FFTs with more than 65536 samples x86 optimizations are used only for the cases they support (<=65536 samples) Signed-off-by: Michael Niedermayer <mich...@niedermayer.cc> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=305344d89e21ed11c74274167cf597f151778c42 --- libavcodec/fft.h | 1 + libavcodec/fft_template.c | 31 ++++++++++++++++++++++++++----- libavcodec/x86/fft_init.c | 3 +++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/libavcodec/fft.h b/libavcodec/fft.h index 60df239..c858570 100644 --- a/libavcodec/fft.h +++ b/libavcodec/fft.h @@ -110,6 +110,7 @@ struct FFTContext { void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input); enum fft_permutation_type fft_permutation; enum mdct_permutation_type mdct_permutation; + uint32_t *revtab32; }; #if CONFIG_HARDCODED_TABLES diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c index 2781a33..480557f 100644 --- a/libavcodec/fft_template.c +++ b/libavcodec/fft_template.c @@ -143,14 +143,23 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) { int i, j, n; + s->revtab = NULL; + s->revtab32 = NULL; + if (nbits < 2 || nbits > 17) goto fail; s->nbits = nbits; n = 1 << nbits; - s->revtab = av_malloc(n * sizeof(uint16_t)); - if (!s->revtab) - goto fail; + if (nbits <= 16) { + s->revtab = av_malloc(n * sizeof(uint16_t)); + if (!s->revtab) + goto fail; + } else { + s->revtab32 = av_malloc(n * sizeof(uint32_t)); + if (!s->revtab32) + goto fail; + } s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); if (!s->tmp_buf) goto fail; @@ -192,16 +201,22 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) fft_perm_avx(s); } else { for(i=0; i<n; i++) { + int k; j = i; if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); - s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j; + k = -split_radix_permutation(i, n, s->inverse) & (n-1); + if (s->revtab) + s->revtab[k] = j; + if (s->revtab32) + s->revtab32[k] = j; } } return 0; fail: av_freep(&s->revtab); + av_freep(&s->revtab32); av_freep(&s->tmp_buf); return -1; } @@ -210,15 +225,21 @@ static void fft_permute_c(FFTContext *s, FFTComplex *z) { int j, np; const uint16_t *revtab = s->revtab; + const uint32_t *revtab32 = s->revtab32; np = 1 << s->nbits; /* TODO: handle split-radix permute in a more optimal way, probably in-place */ - for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j]; + if (revtab) { + for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j]; + } else + for(j=0;j<np;j++) s->tmp_buf[revtab32[j]] = z[j]; + memcpy(z, s->tmp_buf, np * sizeof(FFTComplex)); } av_cold void ff_fft_end(FFTContext *s) { av_freep(&s->revtab); + av_freep(&s->revtab32); av_freep(&s->tmp_buf); } diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c index 5085f11..337f32d 100644 --- a/libavcodec/x86/fft_init.c +++ b/libavcodec/x86/fft_init.c @@ -26,6 +26,9 @@ av_cold void ff_fft_init_x86(FFTContext *s) { int cpu_flags = av_get_cpu_flags(); + if (s->nbits > 16) + return; + #if ARCH_X86_32 if (EXTERNAL_AMD3DNOW(cpu_flags)) { /* 3DNow! for K6-2/3 */ _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog