On Fri, Jul 7, 2017 at 2:50 PM, Muhammad Faiz <mfc...@gmail.com> wrote: > It is redundant with costable. The first half of sintable is > identical with the second half of costable. The second half > of sintable is negative value of the first half of sintable. > > The computation is changed to handle sign of sin values, in > C code and ARM assembly code. > > Signed-off-by: Muhammad Faiz <mfc...@gmail.com> > --- > libavcodec/Makefile | 3 +- > libavcodec/arm/rdft_neon.S | 13 ++++++--- > libavcodec/rdft.c | 68 > ++++++++++++++++------------------------------ > libavcodec/rdft.h | 26 ++---------------- > 4 files changed, 36 insertions(+), 74 deletions(-) > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index b440a00..59029a8 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV) += qsv.o > OBJS-$(CONFIG_QSVDEC) += qsvdec.o > OBJS-$(CONFIG_QSVENC) += qsvenc.o > OBJS-$(CONFIG_RANGECODER) += rangecoder.o > -RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o > -OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) > +OBJS-$(CONFIG_RDFT) += rdft.o > OBJS-$(CONFIG_RV34DSP) += rv34dsp.o > OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o > OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o > diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S > index 781d976..eabb92b 100644 > --- a/libavcodec/arm/rdft_neon.S > +++ b/libavcodec/arm/rdft_neon.S > @@ -30,18 +30,21 @@ function ff_rdft_calc_neon, export=1 > > lsls r6, r6, #31 > bne 1f > - add r0, r4, #20 > + add r0, r4, #24 > bl X(ff_fft_permute_neon) > - add r0, r4, #20 > + add r0, r4, #24 > mov r1, r5 > bl X(ff_fft_calc_neon) > 1: > ldr r12, [r4, #0] @ nbits > mov r2, #1 > + ldr r8, [r4, #20] @ negative_sin > lsl r12, r2, r12 > add r0, r5, #8 > + lsl r8, r8, #31 > add r1, r5, r12, lsl #2 > lsr r12, r12, #2 > + vdup.32 d26, r8 > ldr r2, [r4, #12] @ tcos > sub r12, r12, #2 > ldr r3, [r4, #16] @ tsin > @@ -55,6 +58,7 @@ function ff_rdft_calc_neon, export=1 > vld1.32 {d5}, [r3,:64]! @ tsin[i] > vmov.f32 d18, #0.5 @ k1 > vdup.32 d19, r6 > + veor d5, d26, d5 > pld [r0, #32] > veor d19, d18, d19 @ k2 > vmov.i32 d16, #0 > @@ -90,6 +94,7 @@ function ff_rdft_calc_neon, export=1 > vld1.32 {d5}, [r3,:64]! @ tsin[i] > veor d24, d22, d17 @ ev.re,-ev.im > vrev64.32 d3, d23 @ od.re, od.im > + veor d5, d26, d5 > pld [r2, #32] > veor d2, d3, d16 @ -od.re, od.im > pld [r3, #32] > @@ -140,10 +145,10 @@ function ff_rdft_calc_neon, export=1 > > vmul.f32 d22, d22, d18 > vst1.32 {d22}, [r5,:64] > - add r0, r4, #20 > + add r0, r4, #24 > mov r1, r5 > bl X(ff_fft_permute_neon) > - add r0, r4, #20 > + add r0, r4, #24 > mov r1, r5 > pop {r4-r8,lr} > b X(ff_fft_calc_neon) > diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c > index c318aa8..194e0bc 100644 > --- a/libavcodec/rdft.c > +++ b/libavcodec/rdft.c > @@ -28,28 +28,6 @@ > * (Inverse) Real Discrete Fourier Transforms. > */ > > -/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */ > -#if !CONFIG_HARDCODED_TABLES > -SINTABLE(16); > -SINTABLE(32); > -SINTABLE(64); > -SINTABLE(128); > -SINTABLE(256); > -SINTABLE(512); > -SINTABLE(1024); > -SINTABLE(2048); > -SINTABLE(4096); > -SINTABLE(8192); > -SINTABLE(16384); > -SINTABLE(32768); > -SINTABLE(65536); > -#endif > -static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = { > - NULL, NULL, NULL, NULL, > - ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, > ff_sin_1024, > - ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, > ff_sin_65536, > -}; > - > /** Map one real FFT into two parallel real even and odd FFTs. Then > interleave > * the two real FFTs into one complex FFT. Unmangle the results. > * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM > @@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample *data) > ev.re = data[0]; > data[0] = ev.re+data[1]; > data[1] = ev.re-data[1]; > - for (i = 1; i < (n>>2); i++) { > - i1 = 2*i; > - i2 = n-i1; > - /* Separate even and odd FFTs */ > - ev.re = k1*(data[i1 ]+data[i2 ]); > - od.im = -k2*(data[i1 ]-data[i2 ]); > - ev.im = k1*(data[i1+1]-data[i2+1]); > - od.re = k2*(data[i1+1]+data[i2+1]); > - /* Apply twiddle factors to the odd FFT and add to the even FFT */ > - data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i]; > - data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i]; > - data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i]; > - data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i]; > + > +#define RDFT_UNMANGLE(sign0, sign1) \ > + for (i = 1; i < (n>>2); i++) { \ > + i1 = 2*i; \ > + i2 = n-i1; \ > + /* Separate even and odd FFTs */ \ > + ev.re = k1*(data[i1 ]+data[i2 ]); \ > + od.im = -k2*(data[i1 ]-data[i2 ]); \ > + ev.im = k1*(data[i1+1]-data[i2+1]); \ > + od.re = k2*(data[i1+1]+data[i2+1]); \ > + /* Apply twiddle factors to the odd FFT and add to the even FFT */ \ > + data[i1 ] = ev.re + od.re*tcos[i] sign0 od.im*tsin[i]; \ > + data[i1+1] = ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \ > + data[i2 ] = ev.re - od.re*tcos[i] sign1 od.im*tsin[i]; \ > + data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \ > + } > + > + if (s->negative_sin) { > + RDFT_UNMANGLE(+,-) > + } else { > + RDFT_UNMANGLE(-,+) > } > + > data[2*i+1]=s->sign_convention*data[2*i+1]; > if (s->inverse) { > data[0] *= k1; > @@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum > RDFTransformType trans) > s->nbits = nbits; > s->inverse = trans == IDFT_C2R || trans == DFT_C2R; > s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1; > + s->negative_sin = trans == DFT_C2R || trans == DFT_R2C; > > if (nbits < 4 || nbits > 16) > return AVERROR(EINVAL); > @@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum > RDFTransformType trans) > > ff_init_ff_cos_tabs(nbits); > s->tcos = ff_cos_tabs[nbits]; > - s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == > DFT_C2R)*(n>>2); > -#if !CONFIG_HARDCODED_TABLES > - { > - int i; > - const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) > * 2 * M_PI / n; > - for (i = 0; i < (n >> 2); i++) > - s->tsin[i] = sin(i * theta); > - } > -#endif > + s->tsin = ff_cos_tabs[nbits] + (n >> 2); > s->rdft_calc = rdft_calc_c; > > if (ARCH_ARM) ff_rdft_init_arm(s); > diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h > index 37c40e7..ffafca7 100644 > --- a/libavcodec/rdft.h > +++ b/libavcodec/rdft.h > @@ -25,29 +25,6 @@ > #include "config.h" > #include "fft.h" > > -#if CONFIG_HARDCODED_TABLES > -# define SINTABLE_CONST const > -#else > -# define SINTABLE_CONST > -#endif > - > -#define SINTABLE(size) \ > - SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2] > - > -extern SINTABLE(16); > -extern SINTABLE(32); > -extern SINTABLE(64); > -extern SINTABLE(128); > -extern SINTABLE(256); > -extern SINTABLE(512); > -extern SINTABLE(1024); > -extern SINTABLE(2048); > -extern SINTABLE(4096); > -extern SINTABLE(8192); > -extern SINTABLE(16384); > -extern SINTABLE(32768); > -extern SINTABLE(65536); > - > struct RDFTContext { > int nbits; > int inverse; > @@ -55,7 +32,8 @@ struct RDFTContext { > > /* pre/post rotation tables */ > const FFTSample *tcos; > - SINTABLE_CONST FFTSample *tsin; > + const FFTSample *tsin; > + int negative_sin; > FFTContext fft; > void (*rdft_calc)(struct RDFTContext *s, FFTSample *z); > }; > -- > 2.9.3 >
Applied. Thank's. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel