It is redundant with costable. The first half of sintable is identical with the second half of costable. The second half of sintable is negative value of the first half of sintable.
The computation is changed to handle sign of sin values. Signed-off-by: Muhammad Faiz <mfc...@gmail.com> --- libavcodec/Makefile | 3 +- libavcodec/arm/rdft_neon.S | 20 ++++++++------ libavcodec/rdft.c | 68 ++++++++++++++++------------------------------ libavcodec/rdft.h | 26 ++---------------- 4 files changed, 39 insertions(+), 78 deletions(-) diff --git a/libavcodec/Makefile b/libavcodec/Makefile index b440a00..59029a8 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV) += qsv.o OBJS-$(CONFIG_QSVDEC) += qsvdec.o OBJS-$(CONFIG_QSVENC) += qsvenc.o OBJS-$(CONFIG_RANGECODER) += rangecoder.o -RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o -OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) +OBJS-$(CONFIG_RDFT) += rdft.o OBJS-$(CONFIG_RV34DSP) += rv34dsp.o OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S index 781d976..3bea8b4 100644 --- a/libavcodec/arm/rdft_neon.S +++ b/libavcodec/arm/rdft_neon.S @@ -22,7 +22,7 @@ #include "libavutil/arm/asm.S" function ff_rdft_calc_neon, export=1 - push {r4-r8,lr} + push {r4-r9,lr} ldr r6, [r0, #4] @ inverse mov r4, r0 @@ -30,9 +30,9 @@ function ff_rdft_calc_neon, export=1 lsls r6, r6, #31 bne 1f - add r0, r4, #20 + add r0, r4, #24 bl X(ff_fft_permute_neon) - add r0, r4, #20 + add r0, r4, #24 mov r1, r5 bl X(ff_fft_calc_neon) 1: @@ -46,8 +46,10 @@ function ff_rdft_calc_neon, export=1 sub r12, r12, #2 ldr r3, [r4, #16] @ tsin mov r7, r0 + ldr r9, [r4, #20] @ negative_sin sub r1, r1, #8 mov lr, r1 + lsl r9, r9, #31 mov r8, #-8 vld1.32 {d0}, [r0,:64]! @ d1[0,1] vld1.32 {d1}, [r1,:64], r8 @ d2[0,1] @@ -61,8 +63,10 @@ function ff_rdft_calc_neon, export=1 vmov.i32 d17, #1<<31 pld [r1, #-32] vtrn.32 d16, d17 + vdup.32 d16, r9 pld [r2, #32] - vrev64.32 d16, d16 @ d16=1,0 d17=0,1 + veor d17, d16, d17 + vrev64.32 d16, d17 @ negative_sin ? d16=0,1 d17=1,0 : d16=1,0 d17=0,1 pld [r3, #32] 2: veor q1, q0, q8 @ -d1[0],d1[1], d2[0],-d2[1] @@ -136,15 +140,15 @@ function ff_rdft_calc_neon, export=1 cmp r6, #0 it eq - popeq {r4-r8,pc} + popeq {r4-r9,pc} vmul.f32 d22, d22, d18 vst1.32 {d22}, [r5,:64] - add r0, r4, #20 + add r0, r4, #24 mov r1, r5 bl X(ff_fft_permute_neon) - add r0, r4, #20 + add r0, r4, #24 mov r1, r5 - pop {r4-r8,lr} + pop {r4-r9,lr} b X(ff_fft_calc_neon) endfunc diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c index c318aa8..194e0bc 100644 --- a/libavcodec/rdft.c +++ b/libavcodec/rdft.c @@ -28,28 +28,6 @@ * (Inverse) Real Discrete Fourier Transforms. */ -/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */ -#if !CONFIG_HARDCODED_TABLES -SINTABLE(16); -SINTABLE(32); -SINTABLE(64); -SINTABLE(128); -SINTABLE(256); -SINTABLE(512); -SINTABLE(1024); -SINTABLE(2048); -SINTABLE(4096); -SINTABLE(8192); -SINTABLE(16384); -SINTABLE(32768); -SINTABLE(65536); -#endif -static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = { - NULL, NULL, NULL, NULL, - ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, ff_sin_1024, - ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, ff_sin_65536, -}; - /** Map one real FFT into two parallel real even and odd FFTs. Then interleave * the two real FFTs into one complex FFT. Unmangle the results. * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM @@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample *data) ev.re = data[0]; data[0] = ev.re+data[1]; data[1] = ev.re-data[1]; - for (i = 1; i < (n>>2); i++) { - i1 = 2*i; - i2 = n-i1; - /* Separate even and odd FFTs */ - ev.re = k1*(data[i1 ]+data[i2 ]); - od.im = -k2*(data[i1 ]-data[i2 ]); - ev.im = k1*(data[i1+1]-data[i2+1]); - od.re = k2*(data[i1+1]+data[i2+1]); - /* Apply twiddle factors to the odd FFT and add to the even FFT */ - data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i]; - data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i]; - data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i]; - data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i]; + +#define RDFT_UNMANGLE(sign0, sign1) \ + for (i = 1; i < (n>>2); i++) { \ + i1 = 2*i; \ + i2 = n-i1; \ + /* Separate even and odd FFTs */ \ + ev.re = k1*(data[i1 ]+data[i2 ]); \ + od.im = -k2*(data[i1 ]-data[i2 ]); \ + ev.im = k1*(data[i1+1]-data[i2+1]); \ + od.re = k2*(data[i1+1]+data[i2+1]); \ + /* Apply twiddle factors to the odd FFT and add to the even FFT */ \ + data[i1 ] = ev.re + od.re*tcos[i] sign0 od.im*tsin[i]; \ + data[i1+1] = ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \ + data[i2 ] = ev.re - od.re*tcos[i] sign1 od.im*tsin[i]; \ + data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \ + } + + if (s->negative_sin) { + RDFT_UNMANGLE(+,-) + } else { + RDFT_UNMANGLE(-,+) } + data[2*i+1]=s->sign_convention*data[2*i+1]; if (s->inverse) { data[0] *= k1; @@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans) s->nbits = nbits; s->inverse = trans == IDFT_C2R || trans == DFT_C2R; s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1; + s->negative_sin = trans == DFT_C2R || trans == DFT_R2C; if (nbits < 4 || nbits > 16) return AVERROR(EINVAL); @@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans) ff_init_ff_cos_tabs(nbits); s->tcos = ff_cos_tabs[nbits]; - s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2); -#if !CONFIG_HARDCODED_TABLES - { - int i; - const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) * 2 * M_PI / n; - for (i = 0; i < (n >> 2); i++) - s->tsin[i] = sin(i * theta); - } -#endif + s->tsin = ff_cos_tabs[nbits] + (n >> 2); s->rdft_calc = rdft_calc_c; if (ARCH_ARM) ff_rdft_init_arm(s); diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h index 37c40e7..ffafca7 100644 --- a/libavcodec/rdft.h +++ b/libavcodec/rdft.h @@ -25,29 +25,6 @@ #include "config.h" #include "fft.h" -#if CONFIG_HARDCODED_TABLES -# define SINTABLE_CONST const -#else -# define SINTABLE_CONST -#endif - -#define SINTABLE(size) \ - SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2] - -extern SINTABLE(16); -extern SINTABLE(32); -extern SINTABLE(64); -extern SINTABLE(128); -extern SINTABLE(256); -extern SINTABLE(512); -extern SINTABLE(1024); -extern SINTABLE(2048); -extern SINTABLE(4096); -extern SINTABLE(8192); -extern SINTABLE(16384); -extern SINTABLE(32768); -extern SINTABLE(65536); - struct RDFTContext { int nbits; int inverse; @@ -55,7 +32,8 @@ struct RDFTContext { /* pre/post rotation tables */ const FFTSample *tcos; - SINTABLE_CONST FFTSample *tsin; + const FFTSample *tsin; + int negative_sin; FFTContext fft; void (*rdft_calc)(struct RDFTContext *s, FFTSample *z); }; -- 2.9.3 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel