On Thu, Jul 6, 2017 at 4:18 PM, Muhammad Faiz <mfc...@gmail.com> wrote: > It is redundant with costable. The first half of sintable is > identical with the second half of costable. The second half > of sintable is negative value of the first half of sintable. > > The computation is changed to handle sign of sin values. > > Signed-off-by: Muhammad Faiz <mfc...@gmail.com> > --- > libavcodec/Makefile | 3 +- > libavcodec/arm/rdft_neon.S | 20 ++++++++------ > libavcodec/rdft.c | 68 > ++++++++++++++++------------------------------ > libavcodec/rdft.h | 26 ++---------------- > 4 files changed, 39 insertions(+), 78 deletions(-) > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index b440a00..59029a8 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV) += qsv.o > OBJS-$(CONFIG_QSVDEC) += qsvdec.o > OBJS-$(CONFIG_QSVENC) += qsvenc.o > OBJS-$(CONFIG_RANGECODER) += rangecoder.o > -RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o > -OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) > +OBJS-$(CONFIG_RDFT) += rdft.o > OBJS-$(CONFIG_RV34DSP) += rv34dsp.o > OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o > OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o > diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S > index 781d976..3bea8b4 100644 > --- a/libavcodec/arm/rdft_neon.S > +++ b/libavcodec/arm/rdft_neon.S > @@ -22,7 +22,7 @@ > #include "libavutil/arm/asm.S" > > function ff_rdft_calc_neon, export=1 > - push {r4-r8,lr} > + push {r4-r9,lr} > > ldr r6, [r0, #4] @ inverse > mov r4, r0 > @@ -30,9 +30,9 @@ function ff_rdft_calc_neon, export=1 > > lsls r6, r6, #31 > bne 1f > - add r0, r4, #20 > + add r0, r4, #24 > bl X(ff_fft_permute_neon) > - add r0, r4, #20 > + add r0, r4, #24 > mov r1, r5 > bl X(ff_fft_calc_neon) > 1: > @@ -46,8 +46,10 @@ function ff_rdft_calc_neon, export=1 > sub r12, r12, #2 > ldr r3, [r4, #16] @ tsin > mov r7, r0 > + ldr r9, [r4, #20] @ negative_sin > sub r1, r1, #8 > mov lr, r1 > + lsl r9, r9, #31 > mov r8, #-8 > vld1.32 {d0}, [r0,:64]! @ d1[0,1] > vld1.32 {d1}, [r1,:64], r8 @ d2[0,1] > @@ -61,8 +63,10 @@ function ff_rdft_calc_neon, export=1 > vmov.i32 d17, #1<<31 > pld [r1, #-32] > vtrn.32 d16, d17 > + vdup.32 d16, r9 > pld [r2, #32] > - vrev64.32 d16, d16 @ d16=1,0 d17=0,1 > + veor d17, d16, d17 > + vrev64.32 d16, d17 @ negative_sin ? d16=0,1 > d17=1,0 : d16=1,0 d17=0,1 > pld [r3, #32] > 2: > veor q1, q0, q8 @ -d1[0],d1[1], d2[0],-d2[1] > @@ -136,15 +140,15 @@ function ff_rdft_calc_neon, export=1 > > cmp r6, #0 > it eq > - popeq {r4-r8,pc} > + popeq {r4-r9,pc} > > vmul.f32 d22, d22, d18 > vst1.32 {d22}, [r5,:64] > - add r0, r4, #20 > + add r0, r4, #24 > mov r1, r5 > bl X(ff_fft_permute_neon) > - add r0, r4, #20 > + add r0, r4, #24 > mov r1, r5 > - pop {r4-r8,lr} > + pop {r4-r9,lr} > b X(ff_fft_calc_neon) > endfunc > diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c > index c318aa8..194e0bc 100644 > --- a/libavcodec/rdft.c > +++ b/libavcodec/rdft.c > @@ -28,28 +28,6 @@ > * (Inverse) Real Discrete Fourier Transforms. > */ > > -/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */ > -#if !CONFIG_HARDCODED_TABLES > -SINTABLE(16); > -SINTABLE(32); > -SINTABLE(64); > -SINTABLE(128); > -SINTABLE(256); > -SINTABLE(512); > -SINTABLE(1024); > -SINTABLE(2048); > -SINTABLE(4096); > -SINTABLE(8192); > -SINTABLE(16384); > -SINTABLE(32768); > -SINTABLE(65536); > -#endif > -static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = { > - NULL, NULL, NULL, NULL, > - ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, > ff_sin_1024, > - ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, > ff_sin_65536, > -}; > - > /** Map one real FFT into two parallel real even and odd FFTs. Then > interleave > * the two real FFTs into one complex FFT. Unmangle the results. > * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM > @@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample *data) > ev.re = data[0]; > data[0] = ev.re+data[1]; > data[1] = ev.re-data[1]; > - for (i = 1; i < (n>>2); i++) { > - i1 = 2*i; > - i2 = n-i1; > - /* Separate even and odd FFTs */ > - ev.re = k1*(data[i1 ]+data[i2 ]); > - od.im = -k2*(data[i1 ]-data[i2 ]); > - ev.im = k1*(data[i1+1]-data[i2+1]); > - od.re = k2*(data[i1+1]+data[i2+1]); > - /* Apply twiddle factors to the odd FFT and add to the even FFT */ > - data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i]; > - data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i]; > - data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i]; > - data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i]; > + > +#define RDFT_UNMANGLE(sign0, sign1) \ > + for (i = 1; i < (n>>2); i++) { \ > + i1 = 2*i; \ > + i2 = n-i1; \ > + /* Separate even and odd FFTs */ \ > + ev.re = k1*(data[i1 ]+data[i2 ]); \ > + od.im = -k2*(data[i1 ]-data[i2 ]); \ > + ev.im = k1*(data[i1+1]-data[i2+1]); \ > + od.re = k2*(data[i1+1]+data[i2+1]); \ > + /* Apply twiddle factors to the odd FFT and add to the even FFT */ \ > + data[i1 ] = ev.re + od.re*tcos[i] sign0 od.im*tsin[i]; \ > + data[i1+1] = ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \ > + data[i2 ] = ev.re - od.re*tcos[i] sign1 od.im*tsin[i]; \ > + data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \ > + } > + > + if (s->negative_sin) { > + RDFT_UNMANGLE(+,-) > + } else { > + RDFT_UNMANGLE(-,+) > } > + > data[2*i+1]=s->sign_convention*data[2*i+1]; > if (s->inverse) { > data[0] *= k1; > @@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum > RDFTransformType trans) > s->nbits = nbits; > s->inverse = trans == IDFT_C2R || trans == DFT_C2R; > s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1; > + s->negative_sin = trans == DFT_C2R || trans == DFT_R2C; > > if (nbits < 4 || nbits > 16) > return AVERROR(EINVAL); > @@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum > RDFTransformType trans) > > ff_init_ff_cos_tabs(nbits); > s->tcos = ff_cos_tabs[nbits]; > - s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == > DFT_C2R)*(n>>2); > -#if !CONFIG_HARDCODED_TABLES > - { > - int i; > - const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) > * 2 * M_PI / n; > - for (i = 0; i < (n >> 2); i++) > - s->tsin[i] = sin(i * theta); > - } > -#endif > + s->tsin = ff_cos_tabs[nbits] + (n >> 2); > s->rdft_calc = rdft_calc_c; > > if (ARCH_ARM) ff_rdft_init_arm(s); > diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h > index 37c40e7..ffafca7 100644 > --- a/libavcodec/rdft.h > +++ b/libavcodec/rdft.h > @@ -25,29 +25,6 @@ > #include "config.h" > #include "fft.h" > > -#if CONFIG_HARDCODED_TABLES > -# define SINTABLE_CONST const > -#else > -# define SINTABLE_CONST > -#endif > - > -#define SINTABLE(size) \ > - SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2] > - > -extern SINTABLE(16); > -extern SINTABLE(32); > -extern SINTABLE(64); > -extern SINTABLE(128); > -extern SINTABLE(256); > -extern SINTABLE(512); > -extern SINTABLE(1024); > -extern SINTABLE(2048); > -extern SINTABLE(4096); > -extern SINTABLE(8192); > -extern SINTABLE(16384); > -extern SINTABLE(32768); > -extern SINTABLE(65536); > - > struct RDFTContext { > int nbits; > int inverse; > @@ -55,7 +32,8 @@ struct RDFTContext { > > /* pre/post rotation tables */ > const FFTSample *tcos; > - SINTABLE_CONST FFTSample *tsin; > + const FFTSample *tsin; > + int negative_sin; > FFTContext fft; > void (*rdft_calc)(struct RDFTContext *s, FFTSample *z); > }; > -- > 2.9.3 >
Break fate, so dropped. Will post new patch. Thank's _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel