The tables that are used take 256B; the code to initialize them uses 281B here (GCC 9.3, x64, -O3, but in av_cold functions). On top of that, removing this code also allows to remove the array of AVOnce used to guard the cosine tables against multiple initializations; this also removes relocations.
Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@gmail.com> --- libavcodec/Makefile | 2 +- libavcodec/fft.h | 19 +++++++++--------- libavcodec/fft_fixed.c | 42 +++++++++++++++++++++++++++++++++++++++ libavcodec/fft_template.c | 27 +++++++++---------------- 4 files changed, 62 insertions(+), 28 deletions(-) diff --git a/libavcodec/Makefile b/libavcodec/Makefile index fea37ef3c9..8e03feb7d1 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -83,7 +83,7 @@ OBJS-$(CONFIG_EXIF) += exif.o tiff_common.o OBJS-$(CONFIG_FAANDCT) += faandct.o OBJS-$(CONFIG_FAANIDCT) += faanidct.o OBJS-$(CONFIG_FDCTDSP) += fdctdsp.o jfdctfst.o jfdctint.o -FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o +FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o OBJS-$(CONFIG_FFT) += avfft.o fft_fixed.o fft_float.o \ fft_fixed_32.o fft_init_table.o \ $(FFT-OBJS-yes) diff --git a/libavcodec/fft.h b/libavcodec/fft.h index fedc0c5ef0..4b54265b7f 100644 --- a/libavcodec/fft.h +++ b/libavcodec/fft.h @@ -114,10 +114,18 @@ struct FFTContext { uint32_t *revtab32; }; -#if CONFIG_HARDCODED_TABLES +#if CONFIG_HARDCODED_TABLES || !FFT_FLOAT #define COSTABLE_CONST const +#define ff_init_ff_cos_tabs(...) #else #define COSTABLE_CONST +#define ff_init_ff_cos_tabs FFT_NAME(ff_init_ff_cos_tabs) + +/** + * Initialize the cosine table in ff_cos_tabs[index] + * @param index index in ff_cos_tabs array of the table to initialize + */ +void ff_init_ff_cos_tabs(int index); #endif #define COSTABLE(size) \ @@ -138,16 +146,9 @@ extern COSTABLE(16384); extern COSTABLE(32768); extern COSTABLE(65536); extern COSTABLE(131072); -#endif /* FFT_FLOAT */ extern COSTABLE_CONST FFTSample* const FFT_NAME(ff_cos_tabs)[]; -#define ff_init_ff_cos_tabs FFT_NAME(ff_init_ff_cos_tabs) - -/** - * Initialize the cosine table in ff_cos_tabs[index] - * @param index index in ff_cos_tabs array of the table to initialize - */ -void ff_init_ff_cos_tabs(int index); +#endif /* FFT_FLOAT */ #define ff_fft_init FFT_NAME(ff_fft_init) #define ff_fft_end FFT_NAME(ff_fft_end) diff --git a/libavcodec/fft_fixed.c b/libavcodec/fft_fixed.c index 52d225ee09..ce52dec7fd 100644 --- a/libavcodec/fft_fixed.c +++ b/libavcodec/fft_fixed.c @@ -19,4 +19,46 @@ #define FFT_FLOAT 0 #define FFT_FIXED_32 0 #define MAX_BITS 7 + +#include "fft.h" + +COSTABLE(16) = { + 32767, 30274, 23170, 12540, + 0, 12540, 23170, 30274, +}; +COSTABLE(32) = { + 32767, 32138, 30274, 27246, + 23170, 18205, 12540, 6393, + 0, 6393, 12540, 18205, + 23170, 27246, 30274, 32138, +}; +COSTABLE(64) = { + 32767, 32610, 32138, 31357, + 30274, 28899, 27246, 25330, + 23170, 20788, 18205, 15447, + 12540, 9512, 6393, 3212, + 0, 3212, 6393, 9512, + 12540, 15447, 18205, 20788, + 23170, 25330, 27246, 28899, + 30274, 31357, 32138, 32610, +}; +COSTABLE(128) = { + 32767, 32729, 32610, 32413, + 32138, 31786, 31357, 30853, + 30274, 29622, 28899, 28106, + 27246, 26320, 25330, 24279, + 23170, 22006, 20788, 19520, + 18205, 16846, 15447, 14010, + 12540, 11039, 9512, 7962, + 6393, 4808, 3212, 1608, + 0, 1608, 3212, 4808, + 6393, 7962, 9512, 11039, + 12540, 14010, 15447, 16846, + 18205, 19520, 20788, 22006, + 23170, 24279, 25330, 26320, + 27246, 28106, 28899, 29622, + 30274, 30853, 31357, 31786, + 32138, 32413, 32610, 32729, +}; + #include "fft_template.c" diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c index 7a7d51a6b4..9d125de073 100644 --- a/libavcodec/fft_template.c +++ b/libavcodec/fft_template.c @@ -42,12 +42,12 @@ #else /* FFT_FIXED_32 */ /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ +#if FFT_FLOAT #if !CONFIG_HARDCODED_TABLES COSTABLE(16); COSTABLE(32); COSTABLE(64); COSTABLE(128); -#if FFT_FLOAT COSTABLE(256); COSTABLE(512); COSTABLE(1024); @@ -58,7 +58,6 @@ COSTABLE(16384); COSTABLE(32768); COSTABLE(65536); COSTABLE(131072); -#endif /* FFT_FLOAT */ static av_cold void init_ff_cos_tabs(int index) { @@ -87,7 +86,6 @@ INIT_FF_COS_TABS_FUNC(4, 16) INIT_FF_COS_TABS_FUNC(5, 32) INIT_FF_COS_TABS_FUNC(6, 64) INIT_FF_COS_TABS_FUNC(7, 128) -#if FFT_FLOAT INIT_FF_COS_TABS_FUNC(8, 256) INIT_FF_COS_TABS_FUNC(9, 512) INIT_FF_COS_TABS_FUNC(10, 1024) @@ -98,7 +96,6 @@ INIT_FF_COS_TABS_FUNC(14, 16384) INIT_FF_COS_TABS_FUNC(15, 32768) INIT_FF_COS_TABS_FUNC(16, 65536) INIT_FF_COS_TABS_FUNC(17, 131072) -#endif /* FFT_FLOAT */ static CosTabsInitOnce cos_tabs_init_once[] = { { NULL }, @@ -109,7 +106,6 @@ static CosTabsInitOnce cos_tabs_init_once[] = { { init_ff_cos_tabs_32, AV_ONCE_INIT }, { init_ff_cos_tabs_64, AV_ONCE_INIT }, { init_ff_cos_tabs_128, AV_ONCE_INIT }, -#if FFT_FLOAT { init_ff_cos_tabs_256, AV_ONCE_INIT }, { init_ff_cos_tabs_512, AV_ONCE_INIT }, { init_ff_cos_tabs_1024, AV_ONCE_INIT }, @@ -120,17 +116,20 @@ static CosTabsInitOnce cos_tabs_init_once[] = { { init_ff_cos_tabs_32768, AV_ONCE_INIT }, { init_ff_cos_tabs_65536, AV_ONCE_INIT }, { init_ff_cos_tabs_131072, AV_ONCE_INIT }, -#endif /* FFT_FLOAT */ }; +av_cold void ff_init_ff_cos_tabs(int index) +{ + ff_thread_once(&cos_tabs_init_once[index].control, cos_tabs_init_once[index].func); +} #endif + COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = { NULL, NULL, NULL, NULL, FFT_NAME(ff_cos_16), FFT_NAME(ff_cos_32), FFT_NAME(ff_cos_64), FFT_NAME(ff_cos_128), -#if FFT_FLOAT FFT_NAME(ff_cos_256), FFT_NAME(ff_cos_512), FFT_NAME(ff_cos_1024), @@ -141,8 +140,8 @@ COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = { FFT_NAME(ff_cos_32768), FFT_NAME(ff_cos_65536), FFT_NAME(ff_cos_131072), -#endif /* FFT_FLOAT */ }; +#endif /* FFT_FLOAT */ #endif /* FFT_FIXED_32 */ @@ -160,13 +159,6 @@ static int split_radix_permutation(int i, int n, int inverse) else return split_radix_permutation(i, m, inverse)*4 - 1; } -av_cold void ff_init_ff_cos_tabs(int index) -{ -#if (!CONFIG_HARDCODED_TABLES) && (!FFT_FIXED_32) - ff_thread_once(&cos_tabs_init_once[index].control, cos_tabs_init_once[index].func); -#endif -} - static const int avx_tab[] = { 0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15 }; @@ -250,13 +242,12 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) if (ARCH_X86) ff_fft_init_x86(s); if (CONFIG_MDCT) s->mdct_calcw = s->mdct_calc; if (HAVE_MIPSFPU) ff_fft_init_mips(s); + for (j = 4; !CONFIG_HARDCODED_TABLES && j <= nbits; j++) + ff_init_ff_cos_tabs(j); #else if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c; if (ARCH_ARM) ff_fft_fixed_init_arm(s); #endif - for(j=4; j<=nbits; j++) { - ff_init_ff_cos_tabs(j); - } #endif /* FFT_FIXED_32 */ -- 2.25.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".