By default, all globals in C/C++ compiled by clang are allocated in non-large data sections. See [1] for background on code models. For PIC (Position independent code), this is fine as long as binary is small but as binary size increases, users maybe want to use medium/large code models (-mcmodel=medium) which moves data in to large sections. As data in these large sections cannot be accessed using PIC code anymore (as it may be too far away), compiler ends up using a different instruction sequence when building C/C++ code -- using GOT to access these globals (which can be relaxed by linker at link time if binary ends up being smaller).
However, hardcoded assembly (external asm files, as well as inline assembly) continue to access these globals defined in C/C++ files using older (and invalid instruction sequence). So, we mark all such globals with an attribute that forces them to be allocated in small sections allowing them to validly be accessed from the assembly code. This patch should not have any affect on builds that use small code model, which is the default mode. [1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models --- libavcodec/ac3dsp.h | 4 +- libavcodec/cabac.h | 4 +- libavcodec/h263dsp.h | 3 +- libavcodec/sbrdsp.h | 3 +- libavcodec/x86/constants.c | 4 +- libavcodec/x86/constants.h | 79 +++++++++++++++++---------------- libavutil/attributes.h | 6 +++ libavutil/attributes_internal.h | 16 +++++++ libavutil/mem_internal.h | 16 ++++--- 9 files changed, 84 insertions(+), 51 deletions(-) diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h index b1b2bced8f..914824025f 100644 --- a/libavcodec/ac3dsp.h +++ b/libavcodec/ac3dsp.h @@ -25,11 +25,13 @@ #include <stddef.h> #include <stdint.h> +#include "libavutil/mem_internal.h" + /** * Number of mantissa bits written for each bap value. * bap values with fractional bits are set to 0 and are calculated separately. */ -extern const uint16_t ff_ac3_bap_bits[16]; +EXTERN_ASM_VAR(const uint16_t, ff_ac3_bap_bits)[16]; typedef struct AC3DSPContext { /** diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index 38d06b2842..70730e4059 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -29,7 +29,9 @@ #include <stdint.h> -extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63]; +#include "libavutil/mem_internal.h" + +EXTERN_ASM_VAR(const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 4*64 + 63]; #define H264_NORM_SHIFT_OFFSET 0 #define H264_LPS_RANGE_OFFSET 512 #define H264_MLPS_STATE_OFFSET 1024 diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h index 2dccd23392..fd107e7546 100644 --- a/libavcodec/h263dsp.h +++ b/libavcodec/h263dsp.h @@ -20,8 +20,9 @@ #define AVCODEC_H263DSP_H #include <stdint.h> +#include "libavutil/mem_internal.h" -extern const uint8_t ff_h263_loop_filter_strength[32]; +EXTERN_ASM_VAR(const uint8_t, ff_h263_loop_filter_strength)[32]; typedef struct H263DSPContext { void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); diff --git a/libavcodec/sbrdsp.h b/libavcodec/sbrdsp.h index 09b2cbfc10..8ce467c0fb 100644 --- a/libavcodec/sbrdsp.h +++ b/libavcodec/sbrdsp.h @@ -23,6 +23,7 @@ #include <stdint.h> #include "aac_defines.h" +#include "libavutil/mem_internal.h" typedef struct SBRDSPContext { void (*sum64x5)(INTFLOAT *z); @@ -43,7 +44,7 @@ typedef struct SBRDSPContext { int kx, int m_max); } SBRDSPContext; -extern const INTFLOAT AAC_RENAME(ff_sbr_noise_table)[][2]; +EXTERN_ASM_VAR(const INTFLOAT, AAC_RENAME(ff_sbr_noise_table))[][2]; void AAC_RENAME(ff_sbrdsp_init)(SBRDSPContext *s); void ff_sbrdsp_init_arm(SBRDSPContext *s); diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c index c5f3c6428e..ed0d456d99 100644 --- a/libavcodec/x86/constants.c +++ b/libavcodec/x86/constants.c @@ -46,7 +46,7 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pw_256) = { 0x0100010001000100ULL, 0x010 0x0100010001000100ULL, 0x0100010001000100ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL, 0x0200020002000200ULL, 0x0200020002000200ULL }; -DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL }; +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL}; DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL, @@ -70,7 +70,7 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pb_2) = { 0x0202020202020202ULL, 0x020 0x0202020202020202ULL, 0x0202020202020202ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL, 0x0303030303030303ULL, 0x0303030303030303ULL }; -DECLARE_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL }; +DECLARE_ASM_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL, 0x8080808080808080ULL, 0x8080808080808080ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL, diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h index 4a55adb5b3..c2e0fd8f02 100644 --- a/libavcodec/x86/constants.h +++ b/libavcodec/x86/constants.h @@ -23,49 +23,50 @@ #include <stdint.h> +#include "libavutil/mem_internal.h" #include "libavutil/x86/asm.h" -extern const ymm_reg ff_pw_1; -extern const ymm_reg ff_pw_2; -extern const xmm_reg ff_pw_3; -extern const ymm_reg ff_pw_4; -extern const xmm_reg ff_pw_5; -extern const xmm_reg ff_pw_8; -extern const xmm_reg ff_pw_9; -extern const uint64_t ff_pw_15; -extern const xmm_reg ff_pw_16; -extern const xmm_reg ff_pw_18; -extern const xmm_reg ff_pw_20; -extern const xmm_reg ff_pw_32; -extern const uint64_t ff_pw_53; -extern const xmm_reg ff_pw_64; -extern const uint64_t ff_pw_128; -extern const ymm_reg ff_pw_255; -extern const ymm_reg ff_pw_256; -extern const ymm_reg ff_pw_512; -extern const ymm_reg ff_pw_1023; -extern const ymm_reg ff_pw_1024; -extern const ymm_reg ff_pw_2048; -extern const ymm_reg ff_pw_4095; -extern const ymm_reg ff_pw_4096; -extern const ymm_reg ff_pw_8192; -extern const ymm_reg ff_pw_m1; +EXTERN_ASM_VAR(const ymm_reg, ff_pw_1); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_2); +EXTERN_ASM_VAR(const xmm_reg, ff_pw_3); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_4); +EXTERN_ASM_VAR(const xmm_reg, ff_pw_5); +EXTERN_ASM_VAR(const xmm_reg, ff_pw_8); +EXTERN_ASM_VAR(const xmm_reg, ff_pw_9); +EXTERN_ASM_VAR(const uint64_t, ff_pw_15); +EXTERN_ASM_VAR(const xmm_reg, ff_pw_16); +EXTERN_ASM_VAR(const xmm_reg, ff_pw_18); +EXTERN_ASM_VAR(const xmm_reg, ff_pw_20); +EXTERN_ASM_VAR(const xmm_reg, ff_pw_32); +EXTERN_ASM_VAR(const uint64_t, ff_pw_53); +EXTERN_ASM_VAR(const xmm_reg, ff_pw_64); +EXTERN_ASM_VAR(const uint64_t, ff_pw_128); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_255); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_256); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_512); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_1023); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_1024); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_2048); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_4095); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_4096); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_8192); +EXTERN_ASM_VAR(const ymm_reg, ff_pw_m1); -extern const ymm_reg ff_pb_0; -extern const ymm_reg ff_pb_1; -extern const ymm_reg ff_pb_2; -extern const ymm_reg ff_pb_3; -extern const ymm_reg ff_pb_80; -extern const ymm_reg ff_pb_FE; -extern const uint64_t ff_pb_FC; +EXTERN_ASM_VAR(const ymm_reg, ff_pb_0); +EXTERN_ASM_VAR(const ymm_reg, ff_pb_1); +EXTERN_ASM_VAR(const ymm_reg, ff_pb_2); +EXTERN_ASM_VAR(const ymm_reg, ff_pb_3); +EXTERN_ASM_VAR(const ymm_reg, ff_pb_80); +EXTERN_ASM_VAR(const ymm_reg, ff_pb_FE); +EXTERN_ASM_VAR(const uint64_t, ff_pb_FC); -extern const xmm_reg ff_ps_neg; +EXTERN_ASM_VAR(const xmm_reg, ff_ps_neg); -extern const ymm_reg ff_pd_1; -extern const ymm_reg ff_pd_16; -extern const ymm_reg ff_pd_32; -extern const ymm_reg ff_pd_64; -extern const ymm_reg ff_pd_8192; -extern const ymm_reg ff_pd_65535; +EXTERN_ASM_VAR(const ymm_reg, ff_pd_1); +EXTERN_ASM_VAR(const ymm_reg, ff_pd_16); +EXTERN_ASM_VAR(const ymm_reg, ff_pd_32); +EXTERN_ASM_VAR(const ymm_reg, ff_pd_64); +EXTERN_ASM_VAR(const ymm_reg, ff_pd_8192); +EXTERN_ASM_VAR(const ymm_reg, ff_pd_65535); #endif /* AVCODEC_X86_CONSTANTS_H */ diff --git a/libavutil/attributes.h b/libavutil/attributes.h index 04c615c952..dfc35fa31e 100644 --- a/libavutil/attributes.h +++ b/libavutil/attributes.h @@ -40,6 +40,12 @@ # define AV_HAS_BUILTIN(x) 0 #endif +#ifdef __has_attribute +# define AV_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +# define AV_HAS_ATTRIBUTE(x) 0 +#endif + #ifndef av_always_inline #if AV_GCC_VERSION_AT_LEAST(3,1) # define av_always_inline __attribute__((always_inline)) inline diff --git a/libavutil/attributes_internal.h b/libavutil/attributes_internal.h index bc85ce77ff..c557fa0af0 100644 --- a/libavutil/attributes_internal.h +++ b/libavutil/attributes_internal.h @@ -19,6 +19,7 @@ #ifndef AVUTIL_ATTRIBUTES_INTERNAL_H #define AVUTIL_ATTRIBUTES_INTERNAL_H +#include "config.h" #include "attributes.h" #if (AV_GCC_VERSION_AT_LEAST(4,0) || defined(__clang__)) && (defined(__ELF__) || defined(__MACH__)) @@ -33,4 +34,19 @@ #define EXTERN extern attribute_visibility_hidden +/** + * Some globals defined in C files are used from hardcoded asm that assumes small + * code model (that is, accessing these globals without GOT). This is a problem + * when FFMpeg is built with medium code model (-mcmodel=medium) which allocates + * all globals in a data section that's unreachable with PC relative instructions + * (small code model instruction sequence). We mark all such globals with this + * attribute_mcmodel_small to ensure assembly accessible globals continue to be + * allocated in sections reachable from PC relative instructions. + */ +#if ARCH_X86_64 && defined(__ELF__) && AV_HAS_ATTRIBUTE(model) +# define attribute_mcmodel_small __attribute__((model("small"))) +#else +# define attribute_mcmodel_small +#endif + #endif /* AVUTIL_ATTRIBUTES_INTERNAL_H */ diff --git a/libavutil/mem_internal.h b/libavutil/mem_internal.h index c027fa51c3..c621fa861f 100644 --- a/libavutil/mem_internal.h +++ b/libavutil/mem_internal.h @@ -29,6 +29,7 @@ #endif #include "attributes.h" +#include "attributes_internal.h" #include "macros.h" /** @@ -78,16 +79,19 @@ #if defined(__DJGPP__) #define DECLARE_ALIGNED_T(n,t,v) alignas(FFMIN(n, 16)) t v - #define DECLARE_ASM_ALIGNED(n,t,v) alignas(FFMIN(n, 16)) t av_used v - #define DECLARE_ASM_CONST(n,t,v) alignas(FFMIN(n, 16)) static const t av_used v + #define DECLARE_ASM_ALIGNED(n,t,v) alignas(FFMIN(n, 16)) t av_used attribute_mcmodel_small v + #define DECLARE_ASM_CONST(n,t,v) alignas(FFMIN(n, 16)) static const t av_used attribute_mcmodel_small v + #define EXTERN_ASM_VAR(t,v) extern t attribute_mcmodel_small v #elif defined(_MSC_VER) #define DECLARE_ALIGNED_T(n,t,v) __declspec(align(n)) t v - #define DECLARE_ASM_ALIGNED(n,t,v) __declspec(align(n)) t v - #define DECLARE_ASM_CONST(n,t,v) __declspec(align(n)) static const t v + #define DECLARE_ASM_ALIGNED(n,t,v) __declspec(align(n)) t attribute_mcmodel_small v + #define DECLARE_ASM_CONST(n,t,v) __declspec(align(n)) static const t attribute_mcmodel_small v + #define EXTERN_ASM_VAR(t,v) extern t attribute_mcmodel_small v #else #define DECLARE_ALIGNED_T(n,t,v) alignas(n) t v - #define DECLARE_ASM_ALIGNED(n,t,v) alignas(n) t av_used v - #define DECLARE_ASM_CONST(n,t,v) alignas(n) static const t av_used v + #define DECLARE_ASM_ALIGNED(n,t,v) alignas(n) t av_used attribute_mcmodel_small v + #define DECLARE_ASM_CONST(n,t,v) alignas(n) static const t av_used attribute_mcmodel_small v + #define EXTERN_ASM_VAR(t,v) extern t attribute_mcmodel_small v #endif #if HAVE_SIMD_ALIGN_64 -- 2.49.0.987.g0cc8ee98dc-goog _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".