On Wed, Jul 5, 2017 at 10:22 PM, Rostislav Pehlivanov <atomnu...@gmail.com> wrote: > On 5 July 2017 at 11:33, Muhammad Faiz <mfc...@gmail.com> wrote: > >> It is redundant with costable. The first half of sintable is >> identical with the second half of costable. The second half >> of sintable is negative value of the first half of sintable. >> >> The computation is changed to handle sign of sin values, >> so disable arm optimization. >> >> Signed-off-by: Muhammad Faiz <mfc...@gmail.com> >> --- >> libavcodec/Makefile | 3 +-- >> libavcodec/rdft.c | 70 +++++++++++++++++++----------- >> ----------------------- >> libavcodec/rdft.h | 25 +------------------ >> 3 files changed, 27 insertions(+), 71 deletions(-) >> >> diff --git a/libavcodec/Makefile b/libavcodec/Makefile >> index b440a00..59029a8 100644 >> --- a/libavcodec/Makefile >> +++ b/libavcodec/Makefile >> @@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV) += qsv.o >> OBJS-$(CONFIG_QSVDEC) += qsvdec.o >> OBJS-$(CONFIG_QSVENC) += qsvenc.o >> OBJS-$(CONFIG_RANGECODER) += rangecoder.o >> -RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o >> -OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) >> +OBJS-$(CONFIG_RDFT) += rdft.o >> OBJS-$(CONFIG_RV34DSP) += rv34dsp.o >> OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o >> OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o >> diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c >> index c318aa8..9590673 100644 >> --- a/libavcodec/rdft.c >> +++ b/libavcodec/rdft.c >> @@ -28,28 +28,6 @@ >> * (Inverse) Real Discrete Fourier Transforms. >> */ >> >> -/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */ >> -#if !CONFIG_HARDCODED_TABLES >> -SINTABLE(16); >> -SINTABLE(32); >> -SINTABLE(64); >> -SINTABLE(128); >> -SINTABLE(256); >> -SINTABLE(512); >> -SINTABLE(1024); >> -SINTABLE(2048); >> -SINTABLE(4096); >> -SINTABLE(8192); >> -SINTABLE(16384); >> -SINTABLE(32768); >> -SINTABLE(65536); >> -#endif >> -static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = { >> - NULL, NULL, NULL, NULL, >> - ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, >> ff_sin_1024, >> - ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, >> ff_sin_65536, >> -}; >> - >> /** Map one real FFT into two parallel real even and odd FFTs. Then >> interleave >> * the two real FFTs into one complex FFT. Unmangle the results. >> * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM >> @@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample >> *data) >> ev.re = data[0]; >> data[0] = ev.re+data[1]; >> data[1] = ev.re-data[1]; >> - for (i = 1; i < (n>>2); i++) { >> - i1 = 2*i; >> - i2 = n-i1; >> - /* Separate even and odd FFTs */ >> - ev.re = k1*(data[i1 ]+data[i2 ]); >> - od.im = -k2*(data[i1 ]-data[i2 ]); >> - ev.im = k1*(data[i1+1]-data[i2+1]); >> - od.re = k2*(data[i1+1]+data[i2+1]); >> - /* Apply twiddle factors to the odd FFT and add to the even FFT */ >> - data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i]; >> - data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i]; >> - data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i]; >> - data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i]; >> + >> +#define RDFT_UNMANGLE(sign0, sign1) >> \ >> + for (i = 1; i < (n>>2); i++) { >> \ >> + i1 = 2*i; >> \ >> + i2 = n-i1; >> \ >> + /* Separate even and odd FFTs */ >> \ >> + ev.re = k1*(data[i1 ]+data[i2 ]); >> \ >> + od.im = -k2*(data[i1 ]-data[i2 ]); >> \ >> + ev.im = k1*(data[i1+1]-data[i2+1]); >> \ >> + od.re = k2*(data[i1+1]+data[i2+1]); >> \ >> + /* Apply twiddle factors to the odd FFT and add to the even FFT >> */ \ >> + data[i1 ] = ev.re + od.re*tcos[i] sign0 od.im*tsin[i]; >> \ >> + data[i1+1] = ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; >> \ >> + data[i2 ] = ev.re - od.re*tcos[i] sign1 od.im*tsin[i]; >> \ >> + data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; >> \ >> + } >> + >> + if (s->inverse != (s->sign_convention == 1)) { >> + RDFT_UNMANGLE(-,+) >> + } else { >> + RDFT_UNMANGLE(+,-) >> } >> + >> data[2*i+1]=s->sign_convention*data[2*i+1]; >> if (s->inverse) { >> data[0] *= k1; >> @@ -113,18 +100,11 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, >> enum RDFTransformType trans) >> >> ff_init_ff_cos_tabs(nbits); >> s->tcos = ff_cos_tabs[nbits]; >> - s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == >> DFT_C2R)*(n>>2); >> -#if !CONFIG_HARDCODED_TABLES >> - { >> - int i; >> - const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : >> 1) * 2 * M_PI / n; >> - for (i = 0; i < (n >> 2); i++) >> - s->tsin[i] = sin(i * theta); >> - } >> -#endif >> + s->tsin = ff_cos_tabs[nbits] + (n >> 2); >> s->rdft_calc = rdft_calc_c; >> >> - if (ARCH_ARM) ff_rdft_init_arm(s); >> + // FIXME change arm implementation >> + //if (ARCH_ARM) ff_rdft_init_arm(s); >> >> > You could just change the sintable back to what it used to be in the arm > init function. Or you could try to figure out how to change the asm to work > with it.
I tried to figure out how to change asm. Unfortunately I didn't test it because I failed to setup cross compile environment. However I'll post the patch. Thank's. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel