[FFmpeg-devel] [PATCH] Mark C globals with small code model

2025-02-26 Thread Pranav Kant via ffmpeg-devel
By default, all globals in C/C++ compiled by clang are allocated
in non-large data sections. See [1] for background on code models.
For PIC (Position independent code), this is fine as long as binary is
small but as binary size increases, users maybe want to use medium/large
code models (-mcmodel=medium) which moves data in to large sections.
As data in these large sections cannot be accessed using PIC code
anymore (as it may be too far away), compiler ends up using a different
instruction sequence when building C/C++ code -- using GOT to access
these globals (which can be relaxed by linker at link time if binary
ends up being smaller). However, assembly files continue to access these
globals defined in C/C++ files using older (and invalid instruction
sequence). So, we mark all such globals with an attribute that forces
them to be allocated in small sections allowing them to validly be
accessed from the assembly code.

This patch should not have any affect on builds that use small code
model, which is the default mode.

[1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models

Signed-off-by: Pranav Kant 
---
 libavcodec/ac3dsp.c |  2 ++
 libavcodec/cabac.c  |  2 ++
 libavcodec/x86/constants.c  |  8 
 libavutil/attributes_internal.h | 15 +++
 4 files changed, 27 insertions(+)

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 730fa70fff..d16b6c24c3 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -25,6 +25,7 @@
 
 #include "config.h"
 #include "libavutil/attributes.h"
+#include "libavutil/attributes_internal.h"
 #include "libavutil/common.h"
 #include "libavutil/intmath.h"
 #include "libavutil/mem_internal.h"
@@ -104,6 +105,7 @@ static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], 
uint8_t *bap,
 mant_cnt[bap[len]]++;
 }
 
+attribute_mcmodel_small
 DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
 0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
 };
diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index 7d41cd2ae6..b8c6db29a2 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -24,11 +24,13 @@
  * Context Adaptive Binary Arithmetic Coder.
  */
 
+#include "libavutil/attributes_internal.h"
 #include "libavutil/error.h"
 #include "libavutil/mem_internal.h"
 
 #include "cabac.h"
 
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 
4*64 + 63] = {
 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index bc7f2b17b8..347b7dd1d3 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -18,17 +18,21 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/attributes_internal.h"
 #include "libavutil/mem_internal.h"
 #include "libavutil/x86/asm.h" // for xmm_reg
 #include "constants.h"
 
+attribute_mcmodel_small
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)= { 0x0001000100010001ULL, 
0x0001000100010001ULL,
 0x0001000100010001ULL, 
0x0001000100010001ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)= { 0x0002000200020002ULL, 
0x0002000200020002ULL,
 0x0002000200020002ULL, 
0x0002000200020002ULL };
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_3)= { 0x0003000300030003ULL, 
0x0003000300030003ULL };
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(32, const ymm_reg,  ff_pw_4)= { 0x0004000400040004ULL, 
0x0004000400040004ULL,
 0x0004000400040004ULL, 
0x0004000400040004ULL };
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_5)= { 0x0005000500050005ULL, 
0x0005000500050005ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)= { 0x0008000800080008ULL, 
0x0008000800080008ULL };
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_9)= { 0x0009000900090009ULL, 
0x0009000900090009ULL };
@@ -49,6 +53,7 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = { 
0x0100010001000100ULL, 0x010
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 
0x0200020002000200ULL,
 0x0200020002000200ULL, 
0x0200020002000200ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 
0x03FB03FB03FB03FBULL };
+attribute_mcmodel_small
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 
0x03ff03ff03ff03ffULL,
 0x03ff03ff03ff03ffULL, 
0x03ff03ff03ff03ffULL};
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1024) = { 0x0400040004000400ULL, 
0x0400040004000400ULL,
@@ -66,13 +71,16 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_m1)   = { 
0xULL, 0xFFF
 
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_0)= { 0x00

Re: [FFmpeg-devel] [PATCH] Mark C globals with small code model

2025-02-26 Thread Pranav Kant via ffmpeg-devel
I added it to attributes_internal.h. The existing attribute in
attributes_internal.h (attribute_visibility_hidden) is also being used with
DECLARE_ALIGNED macros (see libavcodec/sbrdsp_template.c). My new macro is
similar in nature.

On Wed, Feb 26, 2025 at 11:44 AM Pranav Kant  wrote:

> By default, all globals in C/C++ compiled by clang are allocated
> in non-large data sections. See [1] for background on code models.
> For PIC (Position independent code), this is fine as long as binary is
> small but as binary size increases, users maybe want to use medium/large
> code models (-mcmodel=medium) which moves data in to large sections.
> As data in these large sections cannot be accessed using PIC code
> anymore (as it may be too far away), compiler ends up using a different
> instruction sequence when building C/C++ code -- using GOT to access
> these globals (which can be relaxed by linker at link time if binary
> ends up being smaller). However, assembly files continue to access these
> globals defined in C/C++ files using older (and invalid instruction
> sequence). So, we mark all such globals with an attribute that forces
> them to be allocated in small sections allowing them to validly be
> accessed from the assembly code.
>
> This patch should not have any affect on builds that use small code
> model, which is the default mode.
>
> [1]
> https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
>
> Signed-off-by: Pranav Kant 
> ---
>  libavcodec/ac3dsp.c |  2 ++
>  libavcodec/cabac.c  |  2 ++
>  libavcodec/x86/constants.c  |  8 
>  libavutil/attributes_internal.h | 15 +++
>  4 files changed, 27 insertions(+)
>
> diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
> index 730fa70fff..d16b6c24c3 100644
> --- a/libavcodec/ac3dsp.c
> +++ b/libavcodec/ac3dsp.c
> @@ -25,6 +25,7 @@
>
>  #include "config.h"
>  #include "libavutil/attributes.h"
> +#include "libavutil/attributes_internal.h"
>  #include "libavutil/common.h"
>  #include "libavutil/intmath.h"
>  #include "libavutil/mem_internal.h"
> @@ -104,6 +105,7 @@ static void ac3_update_bap_counts_c(uint16_t
> mant_cnt[16], uint8_t *bap,
>  mant_cnt[bap[len]]++;
>  }
>
> +attribute_mcmodel_small
>  DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
>  0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
>  };
> diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
> index 7d41cd2ae6..b8c6db29a2 100644
> --- a/libavcodec/cabac.c
> +++ b/libavcodec/cabac.c
> @@ -24,11 +24,13 @@
>   * Context Adaptive Binary Arithmetic Coder.
>   */
>
> +#include "libavutil/attributes_internal.h"
>  #include "libavutil/error.h"
>  #include "libavutil/mem_internal.h"
>
>  #include "cabac.h"
>
> +attribute_mcmodel_small
>  DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64
> + 4*64 + 63] = {
>  9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
>  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
> diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
> index bc7f2b17b8..347b7dd1d3 100644
> --- a/libavcodec/x86/constants.c
> +++ b/libavcodec/x86/constants.c
> @@ -18,17 +18,21 @@
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
>   */
>
> +#include "libavutil/attributes_internal.h"
>  #include "libavutil/mem_internal.h"
>  #include "libavutil/x86/asm.h" // for xmm_reg
>  #include "constants.h"
>
> +attribute_mcmodel_small
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)= {
> 0x0001000100010001ULL, 0x0001000100010001ULL,
>
>  0x0001000100010001ULL, 0x0001000100010001ULL };
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)= {
> 0x0002000200020002ULL, 0x0002000200020002ULL,
>
>  0x0002000200020002ULL, 0x0002000200020002ULL };
>  DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_3)= {
> 0x0003000300030003ULL, 0x0003000300030003ULL };
> +attribute_mcmodel_small
>  DECLARE_ASM_ALIGNED(32, const ymm_reg,  ff_pw_4)= {
> 0x0004000400040004ULL, 0x0004000400040004ULL,
>
>  0x0004000400040004ULL, 0x0004000400040004ULL };
> +attribute_mcmodel_small
>  DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_5)= {
> 0x0005000500050005ULL, 0x0005000500050005ULL };
>  DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)= {
> 0x0008000800080008ULL, 0x0008000800080008ULL };
>  DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_9)= {
> 0x0009000900090009ULL, 0x0009000900090009ULL };
> @@ -49,6 +53,7 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = {
> 0x0100010001000100ULL, 0x010
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = {
> 0x0200020002000200ULL, 0x0200020002000200ULL,
>
>  0x0200020002000200ULL, 0x0200020002000200ULL };
>  DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = {
> 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
> +attribute_mcmodel_small
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = {
> 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL,
>
>  0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL};
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_p

[FFmpeg-devel] [PATCH] Mark C globals with small code model

2025-02-25 Thread Pranav Kant via ffmpeg-devel
By default, all globals in C/C++ compiled by clang are allocated
in non-large data sections. See [1] for background on code models.
For PIC (Position independent code), this is fine as long as binary is
small but as binary size increases, users maybe want to use medium/large
code models (-mcmodel=medium) which moves data in to large sections.
As data in these large sections cannot be accessed using PIC code
anymore (as it may be too far away), compiler ends up using a different
instruction sequence when building C/C++ code -- using GOT to access
these globals (which can be relaxed by linker at link time if binary
ends up being smaller). However, assembly files continue to access these
globals defined in C/C++ files using older (and invalid instruction
sequence). So, we mark all such globals with an attribute that forces
them to be allocated in small sections allowing them to validly be
accessed from the assembly code.

This patch should not have any affect on builds that use small code
model, which is the default mode.

[1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
---
 libavcodec/ac3dsp.c| 2 +-
 libavcodec/cabac.c | 3 ++-
 libavcodec/x86/constants.c | 8 
 libavutil/attributes.h | 6 ++
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 730fa70fff..43b4fcbda9 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -104,7 +104,7 @@ static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], 
uint8_t *bap,
 mant_cnt[bap[len]]++;
 }
 
-DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
+av_mcmodel_small DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
 0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
 };
 
diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index 7d41cd2ae6..dfc3ba135a 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -24,12 +24,13 @@
  * Context Adaptive Binary Arithmetic Coder.
  */
 
+#include "libavutil/attributes.h"
 #include "libavutil/error.h"
 #include "libavutil/mem_internal.h"
 
 #include "cabac.h"
 
-DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 
4*64 + 63] = {
+av_mcmodel_small DECLARE_ASM_ALIGNED(1, const uint8_t, 
ff_h264_cabac_tables)[512 + 4*2*64 + 4*64 + 63] = {
 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index bc7f2b17b8..9a5af2871c 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -18,17 +18,21 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/attributes.h"
 #include "libavutil/mem_internal.h"
 #include "libavutil/x86/asm.h" // for xmm_reg
 #include "constants.h"
 
+av_mcmodel_small
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)= { 0x0001000100010001ULL, 
0x0001000100010001ULL,
 0x0001000100010001ULL, 
0x0001000100010001ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)= { 0x0002000200020002ULL, 
0x0002000200020002ULL,
 0x0002000200020002ULL, 
0x0002000200020002ULL };
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_3)= { 0x0003000300030003ULL, 
0x0003000300030003ULL };
+av_mcmodel_small
 DECLARE_ASM_ALIGNED(32, const ymm_reg,  ff_pw_4)= { 0x0004000400040004ULL, 
0x0004000400040004ULL,
 0x0004000400040004ULL, 
0x0004000400040004ULL };
+av_mcmodel_small
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_5)= { 0x0005000500050005ULL, 
0x0005000500050005ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)= { 0x0008000800080008ULL, 
0x0008000800080008ULL };
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_9)= { 0x0009000900090009ULL, 
0x0009000900090009ULL };
@@ -49,6 +53,7 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = { 
0x0100010001000100ULL, 0x010
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 
0x0200020002000200ULL,
 0x0200020002000200ULL, 
0x0200020002000200ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 
0x03FB03FB03FB03FBULL };
+av_mcmodel_small
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 
0x03ff03ff03ff03ffULL,
 0x03ff03ff03ff03ffULL, 
0x03ff03ff03ff03ffULL};
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1024) = { 0x0400040004000400ULL, 
0x0400040004000400ULL,
@@ -66,13 +71,16 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_m1)   = { 
0xULL, 0xFFF
 
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_0)= { 0xULL, 
0xULL,
 0xULL, 
0xULL };

[FFmpeg-devel] [PATCH v3] Mark C globals with small code model

2025-03-03 Thread Pranav Kant via ffmpeg-devel
By default, all globals in C/C++ compiled by clang are allocated
in non-large data sections. See [1] for background on code models.
For PIC (Position independent code), this is fine as long as binary is
small but as binary size increases, users maybe want to use medium/large
code models (-mcmodel=medium) which moves data in to large sections.
As data in these large sections cannot be accessed using PIC code
anymore (as it may be too far away), compiler ends up using a different
instruction sequence when building C/C++ code -- using GOT to access
these globals (which can be relaxed by linker at link time if binary
ends up being smaller). However, assembly files continue to access these
globals defined in C/C++ files using older (and invalid instruction
sequence). So, we mark all such globals with an attribute that forces
them to be allocated in small sections allowing them to validly be
accessed from the assembly code.

This patch should not have any affect on builds that use small code
model, which is the default mode.

[1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models

Signed-off-by: Pranav Kant 
---
 libavcodec/ac3dsp.c |  2 ++
 libavcodec/cabac.c  |  2 ++
 libavcodec/x86/constants.c  |  8 
 libavutil/attributes.h  |  6 ++
 libavutil/attributes_internal.h | 16 
 5 files changed, 34 insertions(+)

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 730fa70fff..d16b6c24c3 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -25,6 +25,7 @@
 
 #include "config.h"
 #include "libavutil/attributes.h"
+#include "libavutil/attributes_internal.h"
 #include "libavutil/common.h"
 #include "libavutil/intmath.h"
 #include "libavutil/mem_internal.h"
@@ -104,6 +105,7 @@ static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], 
uint8_t *bap,
 mant_cnt[bap[len]]++;
 }
 
+attribute_mcmodel_small
 DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
 0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
 };
diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index 7d41cd2ae6..b8c6db29a2 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -24,11 +24,13 @@
  * Context Adaptive Binary Arithmetic Coder.
  */
 
+#include "libavutil/attributes_internal.h"
 #include "libavutil/error.h"
 #include "libavutil/mem_internal.h"
 
 #include "cabac.h"
 
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 
4*64 + 63] = {
 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index bc7f2b17b8..347b7dd1d3 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -18,17 +18,21 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/attributes_internal.h"
 #include "libavutil/mem_internal.h"
 #include "libavutil/x86/asm.h" // for xmm_reg
 #include "constants.h"
 
+attribute_mcmodel_small
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)= { 0x0001000100010001ULL, 
0x0001000100010001ULL,
 0x0001000100010001ULL, 
0x0001000100010001ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)= { 0x0002000200020002ULL, 
0x0002000200020002ULL,
 0x0002000200020002ULL, 
0x0002000200020002ULL };
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_3)= { 0x0003000300030003ULL, 
0x0003000300030003ULL };
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(32, const ymm_reg,  ff_pw_4)= { 0x0004000400040004ULL, 
0x0004000400040004ULL,
 0x0004000400040004ULL, 
0x0004000400040004ULL };
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_5)= { 0x0005000500050005ULL, 
0x0005000500050005ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)= { 0x0008000800080008ULL, 
0x0008000800080008ULL };
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_9)= { 0x0009000900090009ULL, 
0x0009000900090009ULL };
@@ -49,6 +53,7 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = { 
0x0100010001000100ULL, 0x010
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 
0x0200020002000200ULL,
 0x0200020002000200ULL, 
0x0200020002000200ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 
0x03FB03FB03FB03FBULL };
+attribute_mcmodel_small
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 
0x03ff03ff03ff03ffULL,
 0x03ff03ff03ff03ffULL, 
0x03ff03ff03ff03ffULL};
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1024) = { 0x0400040004000400ULL, 
0x0400040004000400ULL,
@@ -66,13 +71,16 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_m1)   = { 
0xULL, 0xFFF
 
 DECLARE_ALIGNED(32, co

Re: [FFmpeg-devel] [PATCH] Mark C globals with small code model

2025-03-06 Thread Pranav Kant via ffmpeg-devel
I think you were looking at an older version of the patch. Newer version
didn't have this. Anyhow, there's a new version I uploaded (v3).

On Thu, Feb 27, 2025 at 6:31 PM Lynne  wrote:

> On 25/02/2025 22:37, Pranav Kant via ffmpeg-devel wrote:
> > By default, all globals in C/C++ compiled by clang are allocated
> > in non-large data sections. See [1] for background on code models.
> > For PIC (Position independent code), this is fine as long as binary is
> > small but as binary size increases, users maybe want to use medium/large
> > code models (-mcmodel=medium) which moves data in to large sections.
> > As data in these large sections cannot be accessed using PIC code
> > anymore (as it may be too far away), compiler ends up using a different
> > instruction sequence when building C/C++ code -- using GOT to access
> > these globals (which can be relaxed by linker at link time if binary
> > ends up being smaller). However, assembly files continue to access these
> > globals defined in C/C++ files using older (and invalid instruction
> > sequence). So, we mark all such globals with an attribute that forces
> > them to be allocated in small sections allowing them to validly be
> > accessed from the assembly code.
> >
> > This patch should not have any affect on builds that use small code
> > model, which is the default mode.
> >
> > [1]
> https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
> > ---
> >   libavcodec/ac3dsp.c| 2 +-
> >   libavcodec/cabac.c | 3 ++-
> >   libavcodec/x86/constants.c | 8 
> >   libavutil/attributes.h | 6 ++
> >   4 files changed, 17 insertions(+), 2 deletions(-)
> >
> > diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
> > index 730fa70fff..43b4fcbda9 100644
> > --- a/libavcodec/ac3dsp.c
> > +++ b/libavcodec/ac3dsp.c
> > @@ -104,7 +104,7 @@ static void ac3_update_bap_counts_c(uint16_t
> mant_cnt[16], uint8_t *bap,
> >   mant_cnt[bap[len]]++;
> >   }
> >
> > -DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
> > +av_mcmodel_small DECLARE_ALIGNED(16, const uint16_t,
> ff_ac3_bap_bits)[16] = {
> >   0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
> >   };
> >
> > diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
> > index 7d41cd2ae6..dfc3ba135a 100644
> > --- a/libavcodec/cabac.c
> > +++ b/libavcodec/cabac.c
> > @@ -24,12 +24,13 @@
> >* Context Adaptive Binary Arithmetic Coder.
> >*/
> >
> > +#include "libavutil/attributes.h"
> >   #include "libavutil/error.h"
> >   #include "libavutil/mem_internal.h"
> >
> >   #include "cabac.h"
> >
> > -DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 +
> 4*2*64 + 4*64 + 63] = {
> > +av_mcmodel_small DECLARE_ASM_ALIGNED(1, const uint8_t,
> ff_h264_cabac_tables)[512 + 4*2*64 + 4*64 + 63] = {
> >   9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
> >   4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
> >   3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
> > diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
> > index bc7f2b17b8..9a5af2871c 100644
> > --- a/libavcodec/x86/constants.c
> > +++ b/libavcodec/x86/constants.c
> > @@ -18,17 +18,21 @@
> >* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> >*/
> >
> > +#include "libavutil/attributes.h"
> >   #include "libavutil/mem_internal.h"
> >   #include "libavutil/x86/asm.h" // for xmm_reg
> >   #include "constants.h"
> >
> > +av_mcmodel_small
> >   DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)= {
> 0x0001000100010001ULL, 0x0001000100010001ULL,
> >
>  0x0001000100010001ULL, 0x0001000100010001ULL };
> >   DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)= {
> 0x0002000200020002ULL, 0x0002000200020002ULL,
> >
>  0x0002000200020002ULL, 0x0002000200020002ULL };
> >   DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_3)= {
> 0x0003000300030003ULL, 0x0003000300030003ULL };
> > +av_mcmodel_small
> >   DECLARE_ASM_ALIGNED(32, const ymm_reg,  ff_pw_4)= {
> 0x0004000400040004ULL, 0x0004000400040004ULL,
> >
>  0x0004000400040004ULL, 0x0004000400040004ULL };
> > +av_mcmodel_small
> >   DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_5)= {
> 0x0005000500050005ULL, 0x0005000500050005ULL };
> >   DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)= {
> 0x0008000800080008ULL, 0x0008000800080008ULL };
> >   DECLARE_ASM_ALIGN

Re: [FFmpeg-devel] [PATCH] Mark C globals with small code model

2025-03-06 Thread Pranav Kant via ffmpeg-devel
I uploaded a new patch (v3) that addresses these concerns.

On Thu, Feb 27, 2025 at 5:14 PM Michael Niedermayer 
wrote:

> On Wed, Feb 26, 2025 at 07:44:37PM +0000, Pranav Kant via ffmpeg-devel
> wrote:
> > By default, all globals in C/C++ compiled by clang are allocated
> > in non-large data sections. See [1] for background on code models.
> > For PIC (Position independent code), this is fine as long as binary is
> > small but as binary size increases, users maybe want to use medium/large
> > code models (-mcmodel=medium) which moves data in to large sections.
> > As data in these large sections cannot be accessed using PIC code
> > anymore (as it may be too far away), compiler ends up using a different
> > instruction sequence when building C/C++ code -- using GOT to access
> > these globals (which can be relaxed by linker at link time if binary
> > ends up being smaller). However, assembly files continue to access these
> > globals defined in C/C++ files using older (and invalid instruction
> > sequence). So, we mark all such globals with an attribute that forces
> > them to be allocated in small sections allowing them to validly be
> > accessed from the assembly code.
> >
> > This patch should not have any affect on builds that use small code
> > model, which is the default mode.
> >
> > [1]
> https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
> >
> > Signed-off-by: Pranav Kant 
> > ---
> >  libavcodec/ac3dsp.c |  2 ++
> >  libavcodec/cabac.c  |  2 ++
> >  libavcodec/x86/constants.c  |  8 
> >  libavutil/attributes_internal.h | 15 +++
> >  4 files changed, 27 insertions(+)
>
> This produces many warnings:
>
> CC  libavcodec/svq1.o
> In file included from libavcodec/svq1.h:40,
>  from libavcodec/svq1.c:35:
> ./libavutil/attributes_internal.h:43:5: warning: "ARCH_X86_64" is not
> defined, evaluates to 0 [-Wundef]
>43 | #if ARCH_X86_64 && defined(__ELF__) && __has_attribute(model)
>   | ^~~
>
> [...]
>
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> If you fake or manipulate statistics in a paper in physics you will never
> get a job again.
> If you fake or manipulate statistics in a paper in medicin you will get
> a job for life at the pharma industry.
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v4] Mark C globals with small code model

2025-03-11 Thread Pranav Kant via ffmpeg-devel
By default, all globals in C/C++ compiled by clang are allocated
in non-large data sections. See [1] for background on code models.
For PIC (Position independent code), this is fine as long as binary is
small but as binary size increases, users maybe want to use medium/large
code models (-mcmodel=medium) which moves data in to large sections.
As data in these large sections cannot be accessed using PIC code
anymore (as it may be too far away), compiler ends up using a different
instruction sequence when building C/C++ code -- using GOT to access
these globals (which can be relaxed by linker at link time if binary
ends up being smaller). However, assembly files continue to access these
globals defined in C/C++ files using older (and invalid instruction
sequence). So, we mark all such globals with an attribute that forces
them to be allocated in small sections allowing them to validly be
accessed from the assembly code.

This patch should not have any affect on builds that use small code
model, which is the default mode.

[1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models

Signed-off-by: Pranav Kant 
---
 libavcodec/ac3dsp.c |  2 ++
 libavcodec/cabac.c  |  2 ++
 libavcodec/x86/constants.c  |  8 
 libavutil/attributes.h  |  6 ++
 libavutil/attributes_internal.h | 16 
 5 files changed, 34 insertions(+)

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 730fa70fff..d16b6c24c3 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -25,6 +25,7 @@
 
 #include "config.h"
 #include "libavutil/attributes.h"
+#include "libavutil/attributes_internal.h"
 #include "libavutil/common.h"
 #include "libavutil/intmath.h"
 #include "libavutil/mem_internal.h"
@@ -104,6 +105,7 @@ static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], 
uint8_t *bap,
 mant_cnt[bap[len]]++;
 }
 
+attribute_mcmodel_small
 DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
 0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
 };
diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index 7d41cd2ae6..b8c6db29a2 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -24,11 +24,13 @@
  * Context Adaptive Binary Arithmetic Coder.
  */
 
+#include "libavutil/attributes_internal.h"
 #include "libavutil/error.h"
 #include "libavutil/mem_internal.h"
 
 #include "cabac.h"
 
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 
4*64 + 63] = {
 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index bc7f2b17b8..347b7dd1d3 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -18,17 +18,21 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/attributes_internal.h"
 #include "libavutil/mem_internal.h"
 #include "libavutil/x86/asm.h" // for xmm_reg
 #include "constants.h"
 
+attribute_mcmodel_small
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)= { 0x0001000100010001ULL, 
0x0001000100010001ULL,
 0x0001000100010001ULL, 
0x0001000100010001ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)= { 0x0002000200020002ULL, 
0x0002000200020002ULL,
 0x0002000200020002ULL, 
0x0002000200020002ULL };
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_3)= { 0x0003000300030003ULL, 
0x0003000300030003ULL };
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(32, const ymm_reg,  ff_pw_4)= { 0x0004000400040004ULL, 
0x0004000400040004ULL,
 0x0004000400040004ULL, 
0x0004000400040004ULL };
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_5)= { 0x0005000500050005ULL, 
0x0005000500050005ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)= { 0x0008000800080008ULL, 
0x0008000800080008ULL };
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_9)= { 0x0009000900090009ULL, 
0x0009000900090009ULL };
@@ -49,6 +53,7 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = { 
0x0100010001000100ULL, 0x010
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 
0x0200020002000200ULL,
 0x0200020002000200ULL, 
0x0200020002000200ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 
0x03FB03FB03FB03FBULL };
+attribute_mcmodel_small
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 
0x03ff03ff03ff03ffULL,
 0x03ff03ff03ff03ffULL, 
0x03ff03ff03ff03ffULL};
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1024) = { 0x0400040004000400ULL, 
0x0400040004000400ULL,
@@ -66,13 +71,16 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_m1)   = { 
0xULL, 0xFFF
 
 DECLARE_ALIGNED(32, co

Re: [FFmpeg-devel] [PATCH v4] Mark C globals with small code model

2025-03-11 Thread Pranav Kant via ffmpeg-devel
Patch version v4.
- Rebased
- Missing "(" in __attribute__((model("small)) in the earlier patch version.

On Tue, Mar 11, 2025 at 12:17 PM Pranav Kant  wrote:

> By default, all globals in C/C++ compiled by clang are allocated
> in non-large data sections. See [1] for background on code models.
> For PIC (Position independent code), this is fine as long as binary is
> small but as binary size increases, users maybe want to use medium/large
> code models (-mcmodel=medium) which moves data in to large sections.
> As data in these large sections cannot be accessed using PIC code
> anymore (as it may be too far away), compiler ends up using a different
> instruction sequence when building C/C++ code -- using GOT to access
> these globals (which can be relaxed by linker at link time if binary
> ends up being smaller). However, assembly files continue to access these
> globals defined in C/C++ files using older (and invalid instruction
> sequence). So, we mark all such globals with an attribute that forces
> them to be allocated in small sections allowing them to validly be
> accessed from the assembly code.
>
> This patch should not have any affect on builds that use small code
> model, which is the default mode.
>
> [1]
> https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
>
> Signed-off-by: Pranav Kant 
> ---
>  libavcodec/ac3dsp.c |  2 ++
>  libavcodec/cabac.c  |  2 ++
>  libavcodec/x86/constants.c  |  8 
>  libavutil/attributes.h  |  6 ++
>  libavutil/attributes_internal.h | 16 
>  5 files changed, 34 insertions(+)
>
> diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
> index 730fa70fff..d16b6c24c3 100644
> --- a/libavcodec/ac3dsp.c
> +++ b/libavcodec/ac3dsp.c
> @@ -25,6 +25,7 @@
>
>  #include "config.h"
>  #include "libavutil/attributes.h"
> +#include "libavutil/attributes_internal.h"
>  #include "libavutil/common.h"
>  #include "libavutil/intmath.h"
>  #include "libavutil/mem_internal.h"
> @@ -104,6 +105,7 @@ static void ac3_update_bap_counts_c(uint16_t
> mant_cnt[16], uint8_t *bap,
>  mant_cnt[bap[len]]++;
>  }
>
> +attribute_mcmodel_small
>  DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
>  0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
>  };
> diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
> index 7d41cd2ae6..b8c6db29a2 100644
> --- a/libavcodec/cabac.c
> +++ b/libavcodec/cabac.c
> @@ -24,11 +24,13 @@
>   * Context Adaptive Binary Arithmetic Coder.
>   */
>
> +#include "libavutil/attributes_internal.h"
>  #include "libavutil/error.h"
>  #include "libavutil/mem_internal.h"
>
>  #include "cabac.h"
>
> +attribute_mcmodel_small
>  DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64
> + 4*64 + 63] = {
>  9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
>  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
> diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
> index bc7f2b17b8..347b7dd1d3 100644
> --- a/libavcodec/x86/constants.c
> +++ b/libavcodec/x86/constants.c
> @@ -18,17 +18,21 @@
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
>   */
>
> +#include "libavutil/attributes_internal.h"
>  #include "libavutil/mem_internal.h"
>  #include "libavutil/x86/asm.h" // for xmm_reg
>  #include "constants.h"
>
> +attribute_mcmodel_small
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)= {
> 0x0001000100010001ULL, 0x0001000100010001ULL,
>
>  0x0001000100010001ULL, 0x0001000100010001ULL };
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)= {
> 0x0002000200020002ULL, 0x0002000200020002ULL,
>
>  0x0002000200020002ULL, 0x0002000200020002ULL };
>  DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_3)= {
> 0x0003000300030003ULL, 0x0003000300030003ULL };
> +attribute_mcmodel_small
>  DECLARE_ASM_ALIGNED(32, const ymm_reg,  ff_pw_4)= {
> 0x0004000400040004ULL, 0x0004000400040004ULL,
>
>  0x0004000400040004ULL, 0x0004000400040004ULL };
> +attribute_mcmodel_small
>  DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_5)= {
> 0x0005000500050005ULL, 0x0005000500050005ULL };
>  DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)= {
> 0x0008000800080008ULL, 0x0008000800080008ULL };
>  DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_9)= {
> 0x0009000900090009ULL, 0x0009000900090009ULL };
> @@ -49,6 +53,7 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = {
> 0x0100010001000100ULL, 0x010
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = {
> 0x0200020002000200ULL, 0x0200020002000200ULL,
>
>  0x0200020002000200ULL, 0x0200020002000200ULL };
>  DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = {
> 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
> +attribute_mcmodel_small
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = {
> 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL,
>
>  0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL};
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1024) = {
> 0x0400040004000400ULL, 0x0400040004000400ULL,
> @@ -66,13 +71,16 @

Re: [FFmpeg-devel] [PATCH v4] Mark C globals with small code model

2025-03-13 Thread Pranav Kant via ffmpeg-devel
Thank you for taking a look.

On Tue, Mar 11, 2025 at 4:45 PM Andreas Rheinhardt <
andreas.rheinha...@outlook.com> wrote:

> Pranav Kant via ffmpeg-devel:
> > By default, all globals in C/C++ compiled by clang are allocated
> > in non-large data sections. See [1] for background on code models.
> > For PIC (Position independent code), this is fine as long as binary is
> > small but as binary size increases, users maybe want to use medium/large
> > code models (-mcmodel=medium) which moves data in to large sections.
> > As data in these large sections cannot be accessed using PIC code
> > anymore (as it may be too far away), compiler ends up using a different
> > instruction sequence when building C/C++ code -- using GOT to access
> > these globals (which can be relaxed by linker at link time if binary
> > ends up being smaller). However, assembly files continue to access these
> > globals defined in C/C++ files using older (and invalid instruction
> > sequence). So, we mark all such globals with an attribute that forces
> > them to be allocated in small sections allowing them to validly be
> > accessed from the assembly code.
> >
> > This patch should not have any affect on builds that use small code
> > model, which is the default mode.
> >
> > [1]
> https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
> >
> > Signed-off-by: Pranav Kant 
> > ---
> >  libavcodec/ac3dsp.c |  2 ++
> >  libavcodec/cabac.c  |  2 ++
> >  libavcodec/x86/constants.c  |  8 
> >  libavutil/attributes.h  |  6 ++
> >  libavutil/attributes_internal.h | 16 
> >  5 files changed, 34 insertions(+)
> >
> > diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
> > index 730fa70fff..d16b6c24c3 100644
> > --- a/libavcodec/ac3dsp.c
> > +++ b/libavcodec/ac3dsp.c
> > @@ -25,6 +25,7 @@
> >
> >  #include "config.h"
> >  #include "libavutil/attributes.h"
> > +#include "libavutil/attributes_internal.h"
> >  #include "libavutil/common.h"
> >  #include "libavutil/intmath.h"
> >  #include "libavutil/mem_internal.h"
> > @@ -104,6 +105,7 @@ static void ac3_update_bap_counts_c(uint16_t
> mant_cnt[16], uint8_t *bap,
> >  mant_cnt[bap[len]]++;
> >  }
> >
> > +attribute_mcmodel_small
> >  DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
>
> Shouldn't stuff like this be applied to the declaration so that C code
> can also take advantage of the knowledge that this object will be placed
> in the small code section?
>

That's right. I will have it corrected in the newer version.


>
> >  0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
> >  };
> > diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
> > index 7d41cd2ae6..b8c6db29a2 100644
> > --- a/libavcodec/cabac.c
> > +++ b/libavcodec/cabac.c
> > @@ -24,11 +24,13 @@
> >   * Context Adaptive Binary Arithmetic Coder.
> >   */
> >
> > +#include "libavutil/attributes_internal.h"
> >  #include "libavutil/error.h"
> >  #include "libavutil/mem_internal.h"
> >
> >  #include "cabac.h"
> >
> > +attribute_mcmodel_small
> >  DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 +
> 4*2*64 + 4*64 + 63] = {
>
> Your commit message ("However, assembly files continue to access")
> speaks only of assembly files, i.e. external asm. Yet this here is only
> used by inline ASM. Looking through the code the reason for this is that
> I thought that specifying the memory model is only necessary for stuff
> used by external asm, yet ff_h264_cabac_tables does not seem to be used
> by external ASM at all, only inline ASM. If I see this correctly, the
> reason for this is that LOCAL_MANGLE (and therefore MANGLE) uses rip
> addressing on x64 when configure sets the RIP define. But this means
> that the set of files needing attribute_mcmodel_small is a superset of
> the files currently using DECLARE_ASM_ALIGNED. This means that one would
> only need two macros for the variables accessed by ASM: One for only
> external ASM and one for inline ASM (and potentially external ASM)
> instead of adding attribute_mcmodel_small at various places in the
> codebase.
>


By "However, assembly files continue to access", I meant all assembly
references,
and yes, as you noted, LOCAL_MANGLE uses rip relative addressing. I will
have
the description fixed in the next version.

Making this attribute part of these macros mak

Re: [FFmpeg-devel] [PATCH v5] Mark C globals with small code model

2025-03-20 Thread Pranav Kant via ffmpeg-devel
Patch version v5:
- Uses two new macros DECLARE_ASM_VAR (used for both external and inline
asm) and DECLARE_EXTERNAL_ASM_VAR (used only for external asm)
- I intend to remove explicit existing use of attribute_visibility_hidden
in follow-up patch and instead use DECLARE_EXTERNAL_ASM_VAR for those
variables
- Other variables will be marked with these two new macros in a follow-up
patch. I want to settle down on the infrastructure first with a handful of
variables.

Let me know if you have any questions.

On Thu, Mar 20, 2025 at 5:28 PM Pranav Kant  wrote:

> By default, all globals in C/C++ compiled by clang are allocated
> in non-large data sections. See [1] for background on code models.
> For PIC (Position independent code), this is fine as long as binary is
> small but as binary size increases, users maybe want to use medium/large
> code models (-mcmodel=medium) which moves data in to large sections.
> As data in these large sections cannot be accessed using PIC code
> anymore (as it may be too far away), compiler ends up using a different
> instruction sequence when building C/C++ code -- using GOT to access
> these globals (which can be relaxed by linker at link time if binary
> ends up being smaller). However, external assembly and inline assembly
> continue to access these globals using older PC-relative addressing
> which may not work because globals may be placed too far away.
>
> Introduce new macros for such variables that mark them with small code
> model attribute. This ensures that these variables are never allocated
> in large data sections, and continue to be validly accessed from assembly
> code.
>
> This patch should not have any affect on builds that use small code
> model, which is the default mode.
>
> [1]
> https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
>
> Signed-off-by: Pranav Kant 
> ---
>  libavcodec/ac3dsp.h |  4 +++-
>  libavcodec/cabac.h  |  4 +++-
>  libavcodec/x86/constants.h  | 12 +++-
>  libavutil/attributes.h  |  6 ++
>  libavutil/attributes_internal.h | 16 
>  libavutil/mem_internal.h| 13 +
>  6 files changed, 48 insertions(+), 7 deletions(-)
>
> diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
> index b1b2bced8f..a3c55a833b 100644
> --- a/libavcodec/ac3dsp.h
> +++ b/libavcodec/ac3dsp.h
> @@ -25,11 +25,13 @@
>  #include 
>  #include 
>
> +#include "libavutil/mem_internal.h"
> +
>  /**
>   * Number of mantissa bits written for each bap value.
>   * bap values with fractional bits are set to 0 and are calculated
> separately.
>   */
> -extern const uint16_t ff_ac3_bap_bits[16];
> +extern DECLARE_EXTERNAL_ASM_VAR(16, const uint16_t, ff_ac3_bap_bits)[16];
>
>  typedef struct AC3DSPContext {
>  /**
> diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
> index 38d06b2842..df352258c6 100644
> --- a/libavcodec/cabac.h
> +++ b/libavcodec/cabac.h
> @@ -29,7 +29,9 @@
>
>  #include 
>
> -extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
> +#include "libavutil/mem_internal.h"
> +
> +extern DECLARE_ASM_VAR(1, const uint8_t, ff_h264_cabac_tables)[512 +
> 4*2*64 + 4*64 + 63];
>  #define H264_NORM_SHIFT_OFFSET 0
>  #define H264_LPS_RANGE_OFFSET 512
>  #define H264_MLPS_STATE_OFFSET 1024
> diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
> index 0c6bf41fa0..2561302604 100644
> --- a/libavcodec/x86/constants.h
> +++ b/libavcodec/x86/constants.h
> @@ -23,13 +23,14 @@
>
>  #include 
>
> +#include "libavutil/mem_internal.h"
>  #include "libavutil/x86/asm.h"
>
> -extern const ymm_reg  ff_pw_1;
> +extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pw_1);
>  extern const ymm_reg  ff_pw_2;
>  extern const xmm_reg  ff_pw_3;
> -extern const ymm_reg  ff_pw_4;
> -extern const xmm_reg  ff_pw_5;
> +extern DECLARE_ASM_VAR(32, const ymm_reg, ff_pw_4);
> +extern DECLARE_ASM_VAR(16, const xmm_reg, ff_pw_5);
>  extern const xmm_reg  ff_pw_8;
>  extern const xmm_reg  ff_pw_9;
>  extern const uint64_t ff_pw_15;
> @@ -43,7 +44,7 @@ extern const uint64_t ff_pw_128;
>  extern const ymm_reg  ff_pw_255;
>  extern const ymm_reg  ff_pw_256;
>  extern const ymm_reg  ff_pw_512;
> -extern const ymm_reg  ff_pw_1023;
> +extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pw_1023);
>  extern const ymm_reg  ff_pw_1024;
>  extern const ymm_reg  ff_pw_2048;
>  extern const ymm_reg  ff_pw_4095;
> @@ -52,9 +53,10 @@ extern const ymm_reg  ff_pw_8192;
>  extern const ymm_reg  ff_pw_m1;
>
>  extern const ymm_reg  ff_pb_0;
> -extern const ymm_reg  ff_pb_1;
> +extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pb_1);
>  extern const ymm_reg  ff_pb_2;
>  extern const ymm_reg  ff_pb_3;
> +extern DECLARE_ASM_VAR(32, const xmm_reg, ff_pb_15);
>  extern const ymm_reg  ff_pb_80;
>  extern const ymm_reg  ff_pb_FE;
>  extern const uint64_t ff_pb_FC;
> diff --git a/libavutil/attributes.h b/libavutil/attributes.h
> index 04c615c952..dfc35fa31e 100644
> --- a/libavutil/attribu

[FFmpeg-devel] [PATCH v5] Mark C globals with small code model

2025-04-05 Thread Pranav Kant via ffmpeg-devel
By default, all globals in C/C++ compiled by clang are allocated
in non-large data sections. See [1] for background on code models.
For PIC (Position independent code), this is fine as long as binary is
small but as binary size increases, users maybe want to use medium/large
code models (-mcmodel=medium) which moves data in to large sections.
As data in these large sections cannot be accessed using PIC code
anymore (as it may be too far away), compiler ends up using a different
instruction sequence when building C/C++ code -- using GOT to access
these globals (which can be relaxed by linker at link time if binary
ends up being smaller). However, external assembly and inline assembly
continue to access these globals using older PC-relative addressing
which may not work because globals may be placed too far away.

Introduce new macros for such variables that mark them with small code
model attribute. This ensures that these variables are never allocated
in large data sections, and continue to be validly accessed from assembly
code.

This patch should not have any affect on builds that use small code
model, which is the default mode.

[1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models

Signed-off-by: Pranav Kant 
---
 libavcodec/ac3dsp.h |  4 +++-
 libavcodec/cabac.h  |  4 +++-
 libavcodec/x86/constants.h  | 12 +++-
 libavutil/attributes.h  |  6 ++
 libavutil/attributes_internal.h | 16 
 libavutil/mem_internal.h| 13 +
 6 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index b1b2bced8f..a3c55a833b 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -25,11 +25,13 @@
 #include 
 #include 
 
+#include "libavutil/mem_internal.h"
+
 /**
  * Number of mantissa bits written for each bap value.
  * bap values with fractional bits are set to 0 and are calculated separately.
  */
-extern const uint16_t ff_ac3_bap_bits[16];
+extern DECLARE_EXTERNAL_ASM_VAR(16, const uint16_t, ff_ac3_bap_bits)[16];
 
 typedef struct AC3DSPContext {
 /**
diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
index 38d06b2842..df352258c6 100644
--- a/libavcodec/cabac.h
+++ b/libavcodec/cabac.h
@@ -29,7 +29,9 @@
 
 #include 
 
-extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
+#include "libavutil/mem_internal.h"
+
+extern DECLARE_ASM_VAR(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 
4*64 + 63];
 #define H264_NORM_SHIFT_OFFSET 0
 #define H264_LPS_RANGE_OFFSET 512
 #define H264_MLPS_STATE_OFFSET 1024
diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index 0c6bf41fa0..2561302604 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h
@@ -23,13 +23,14 @@
 
 #include 
 
+#include "libavutil/mem_internal.h"
 #include "libavutil/x86/asm.h"
 
-extern const ymm_reg  ff_pw_1;
+extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pw_1);
 extern const ymm_reg  ff_pw_2;
 extern const xmm_reg  ff_pw_3;
-extern const ymm_reg  ff_pw_4;
-extern const xmm_reg  ff_pw_5;
+extern DECLARE_ASM_VAR(32, const ymm_reg, ff_pw_4);
+extern DECLARE_ASM_VAR(16, const xmm_reg, ff_pw_5);
 extern const xmm_reg  ff_pw_8;
 extern const xmm_reg  ff_pw_9;
 extern const uint64_t ff_pw_15;
@@ -43,7 +44,7 @@ extern const uint64_t ff_pw_128;
 extern const ymm_reg  ff_pw_255;
 extern const ymm_reg  ff_pw_256;
 extern const ymm_reg  ff_pw_512;
-extern const ymm_reg  ff_pw_1023;
+extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pw_1023);
 extern const ymm_reg  ff_pw_1024;
 extern const ymm_reg  ff_pw_2048;
 extern const ymm_reg  ff_pw_4095;
@@ -52,9 +53,10 @@ extern const ymm_reg  ff_pw_8192;
 extern const ymm_reg  ff_pw_m1;
 
 extern const ymm_reg  ff_pb_0;
-extern const ymm_reg  ff_pb_1;
+extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pb_1);
 extern const ymm_reg  ff_pb_2;
 extern const ymm_reg  ff_pb_3;
+extern DECLARE_ASM_VAR(32, const xmm_reg, ff_pb_15);
 extern const ymm_reg  ff_pb_80;
 extern const ymm_reg  ff_pb_FE;
 extern const uint64_t ff_pb_FC;
diff --git a/libavutil/attributes.h b/libavutil/attributes.h
index 04c615c952..dfc35fa31e 100644
--- a/libavutil/attributes.h
+++ b/libavutil/attributes.h
@@ -40,6 +40,12 @@
 #define AV_HAS_BUILTIN(x) 0
 #endif
 
+#ifdef __has_attribute
+#define AV_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define AV_HAS_ATTRIBUTE(x) 0
+#endif
+
 #ifndef av_always_inline
 #if AV_GCC_VERSION_AT_LEAST(3,1)
 #define av_always_inline __attribute__((always_inline)) inline
diff --git a/libavutil/attributes_internal.h b/libavutil/attributes_internal.h
index bc85ce77ff..c557fa0af0 100644
--- a/libavutil/attributes_internal.h
+++ b/libavutil/attributes_internal.h
@@ -19,6 +19,7 @@
 #ifndef AVUTIL_ATTRIBUTES_INTERNAL_H
 #define AVUTIL_ATTRIBUTES_INTERNAL_H
 
+#include "config.h"
 #include "attributes.h"
 
 #if (AV_GCC_VERSION_AT_LEAST(4,0) || defined(__clang__)) && (defined(__ELF__) 
|| d

Re: [FFmpeg-devel] [PATCH v5] Mark C globals with small code model

2025-04-04 Thread Pranav Kant via ffmpeg-devel
Any thoughts on this?

On Thu, Mar 20, 2025 at 5:30 PM Pranav Kant  wrote:

> Patch version v5:
> - Uses two new macros DECLARE_ASM_VAR (used for both external and inline
> asm) and DECLARE_EXTERNAL_ASM_VAR (used only for external asm)
> - I intend to remove explicit existing use of attribute_visibility_hidden
> in follow-up patch and instead use DECLARE_EXTERNAL_ASM_VAR for those
> variables
> - Other variables will be marked with these two new macros in a follow-up
> patch. I want to settle down on the infrastructure first with a handful of
> variables.
>
> Let me know if you have any questions.
>
> On Thu, Mar 20, 2025 at 5:28 PM Pranav Kant  wrote:
>
>> By default, all globals in C/C++ compiled by clang are allocated
>> in non-large data sections. See [1] for background on code models.
>> For PIC (Position independent code), this is fine as long as binary is
>> small but as binary size increases, users maybe want to use medium/large
>> code models (-mcmodel=medium) which moves data in to large sections.
>> As data in these large sections cannot be accessed using PIC code
>> anymore (as it may be too far away), compiler ends up using a different
>> instruction sequence when building C/C++ code -- using GOT to access
>> these globals (which can be relaxed by linker at link time if binary
>> ends up being smaller). However, external assembly and inline assembly
>> continue to access these globals using older PC-relative addressing
>> which may not work because globals may be placed too far away.
>>
>> Introduce new macros for such variables that mark them with small code
>> model attribute. This ensures that these variables are never allocated
>> in large data sections, and continue to be validly accessed from assembly
>> code.
>>
>> This patch should not have any affect on builds that use small code
>> model, which is the default mode.
>>
>> [1]
>> https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
>>
>> Signed-off-by: Pranav Kant 
>> ---
>>  libavcodec/ac3dsp.h |  4 +++-
>>  libavcodec/cabac.h  |  4 +++-
>>  libavcodec/x86/constants.h  | 12 +++-
>>  libavutil/attributes.h  |  6 ++
>>  libavutil/attributes_internal.h | 16 
>>  libavutil/mem_internal.h| 13 +
>>  6 files changed, 48 insertions(+), 7 deletions(-)
>>
>> diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
>> index b1b2bced8f..a3c55a833b 100644
>> --- a/libavcodec/ac3dsp.h
>> +++ b/libavcodec/ac3dsp.h
>> @@ -25,11 +25,13 @@
>>  #include 
>>  #include 
>>
>> +#include "libavutil/mem_internal.h"
>> +
>>  /**
>>   * Number of mantissa bits written for each bap value.
>>   * bap values with fractional bits are set to 0 and are calculated
>> separately.
>>   */
>> -extern const uint16_t ff_ac3_bap_bits[16];
>> +extern DECLARE_EXTERNAL_ASM_VAR(16, const uint16_t, ff_ac3_bap_bits)[16];
>>
>>  typedef struct AC3DSPContext {
>>  /**
>> diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
>> index 38d06b2842..df352258c6 100644
>> --- a/libavcodec/cabac.h
>> +++ b/libavcodec/cabac.h
>> @@ -29,7 +29,9 @@
>>
>>  #include 
>>
>> -extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
>> +#include "libavutil/mem_internal.h"
>> +
>> +extern DECLARE_ASM_VAR(1, const uint8_t, ff_h264_cabac_tables)[512 +
>> 4*2*64 + 4*64 + 63];
>>  #define H264_NORM_SHIFT_OFFSET 0
>>  #define H264_LPS_RANGE_OFFSET 512
>>  #define H264_MLPS_STATE_OFFSET 1024
>> diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
>> index 0c6bf41fa0..2561302604 100644
>> --- a/libavcodec/x86/constants.h
>> +++ b/libavcodec/x86/constants.h
>> @@ -23,13 +23,14 @@
>>
>>  #include 
>>
>> +#include "libavutil/mem_internal.h"
>>  #include "libavutil/x86/asm.h"
>>
>> -extern const ymm_reg  ff_pw_1;
>> +extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pw_1);
>>  extern const ymm_reg  ff_pw_2;
>>  extern const xmm_reg  ff_pw_3;
>> -extern const ymm_reg  ff_pw_4;
>> -extern const xmm_reg  ff_pw_5;
>> +extern DECLARE_ASM_VAR(32, const ymm_reg, ff_pw_4);
>> +extern DECLARE_ASM_VAR(16, const xmm_reg, ff_pw_5);
>>  extern const xmm_reg  ff_pw_8;
>>  extern const xmm_reg  ff_pw_9;
>>  extern const uint64_t ff_pw_15;
>> @@ -43,7 +44,7 @@ extern const uint64_t ff_pw_128;
>>  extern const ymm_reg  ff_pw_255;
>>  extern const ymm_reg  ff_pw_256;
>>  extern const ymm_reg  ff_pw_512;
>> -extern const ymm_reg  ff_pw_1023;
>> +extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pw_1023);
>>  extern const ymm_reg  ff_pw_1024;
>>  extern const ymm_reg  ff_pw_2048;
>>  extern const ymm_reg  ff_pw_4095;
>> @@ -52,9 +53,10 @@ extern const ymm_reg  ff_pw_8192;
>>  extern const ymm_reg  ff_pw_m1;
>>
>>  extern const ymm_reg  ff_pb_0;
>> -extern const ymm_reg  ff_pb_1;
>> +extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pb_1);
>>  extern const ymm_reg  ff_pb_2;
>>  extern const ymm_reg  ff_pb_3;
>> +extern DECLARE_ASM_VAR(32, const xmm_reg, ff_pb_15);
>>  extern const ymm

Re: [FFmpeg-devel] [PATCH v5] Mark C globals with small code model

2025-04-15 Thread Pranav Kant via ffmpeg-devel
Hello again. Is there anything else I can do here?

On Fri, Apr 4, 2025 at 11:40 AM Pranav Kant  wrote:

> Any thoughts on this?
>
> On Thu, Mar 20, 2025 at 5:30 PM Pranav Kant  wrote:
>
>> Patch version v5:
>> - Uses two new macros DECLARE_ASM_VAR (used for both external and inline
>> asm) and DECLARE_EXTERNAL_ASM_VAR (used only for external asm)
>> - I intend to remove explicit existing use of attribute_visibility_hidden
>> in follow-up patch and instead use DECLARE_EXTERNAL_ASM_VAR for those
>> variables
>> - Other variables will be marked with these two new macros in a follow-up
>> patch. I want to settle down on the infrastructure first with a handful of
>> variables.
>>
>> Let me know if you have any questions.
>>
>> On Thu, Mar 20, 2025 at 5:28 PM Pranav Kant  wrote:
>>
>>> By default, all globals in C/C++ compiled by clang are allocated
>>> in non-large data sections. See [1] for background on code models.
>>> For PIC (Position independent code), this is fine as long as binary is
>>> small but as binary size increases, users maybe want to use medium/large
>>> code models (-mcmodel=medium) which moves data in to large sections.
>>> As data in these large sections cannot be accessed using PIC code
>>> anymore (as it may be too far away), compiler ends up using a different
>>> instruction sequence when building C/C++ code -- using GOT to access
>>> these globals (which can be relaxed by linker at link time if binary
>>> ends up being smaller). However, external assembly and inline assembly
>>> continue to access these globals using older PC-relative addressing
>>> which may not work because globals may be placed too far away.
>>>
>>> Introduce new macros for such variables that mark them with small code
>>> model attribute. This ensures that these variables are never allocated
>>> in large data sections, and continue to be validly accessed from assembly
>>> code.
>>>
>>> This patch should not have any affect on builds that use small code
>>> model, which is the default mode.
>>>
>>> [1]
>>> https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
>>>
>>> Signed-off-by: Pranav Kant 
>>> ---
>>>  libavcodec/ac3dsp.h |  4 +++-
>>>  libavcodec/cabac.h  |  4 +++-
>>>  libavcodec/x86/constants.h  | 12 +++-
>>>  libavutil/attributes.h  |  6 ++
>>>  libavutil/attributes_internal.h | 16 
>>>  libavutil/mem_internal.h| 13 +
>>>  6 files changed, 48 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
>>> index b1b2bced8f..a3c55a833b 100644
>>> --- a/libavcodec/ac3dsp.h
>>> +++ b/libavcodec/ac3dsp.h
>>> @@ -25,11 +25,13 @@
>>>  #include 
>>>  #include 
>>>
>>> +#include "libavutil/mem_internal.h"
>>> +
>>>  /**
>>>   * Number of mantissa bits written for each bap value.
>>>   * bap values with fractional bits are set to 0 and are calculated
>>> separately.
>>>   */
>>> -extern const uint16_t ff_ac3_bap_bits[16];
>>> +extern DECLARE_EXTERNAL_ASM_VAR(16, const uint16_t,
>>> ff_ac3_bap_bits)[16];
>>>
>>>  typedef struct AC3DSPContext {
>>>  /**
>>> diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
>>> index 38d06b2842..df352258c6 100644
>>> --- a/libavcodec/cabac.h
>>> +++ b/libavcodec/cabac.h
>>> @@ -29,7 +29,9 @@
>>>
>>>  #include 
>>>
>>> -extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
>>> +#include "libavutil/mem_internal.h"
>>> +
>>> +extern DECLARE_ASM_VAR(1, const uint8_t, ff_h264_cabac_tables)[512 +
>>> 4*2*64 + 4*64 + 63];
>>>  #define H264_NORM_SHIFT_OFFSET 0
>>>  #define H264_LPS_RANGE_OFFSET 512
>>>  #define H264_MLPS_STATE_OFFSET 1024
>>> diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
>>> index 0c6bf41fa0..2561302604 100644
>>> --- a/libavcodec/x86/constants.h
>>> +++ b/libavcodec/x86/constants.h
>>> @@ -23,13 +23,14 @@
>>>
>>>  #include 
>>>
>>> +#include "libavutil/mem_internal.h"
>>>  #include "libavutil/x86/asm.h"
>>>
>>> -extern const ymm_reg  ff_pw_1;
>>> +extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pw_1);
>>>  extern const ymm_reg  ff_pw_2;
>>>  extern const xmm_reg  ff_pw_3;
>>> -extern const ymm_reg  ff_pw_4;
>>> -extern const xmm_reg  ff_pw_5;
>>> +extern DECLARE_ASM_VAR(32, const ymm_reg, ff_pw_4);
>>> +extern DECLARE_ASM_VAR(16, const xmm_reg, ff_pw_5);
>>>  extern const xmm_reg  ff_pw_8;
>>>  extern const xmm_reg  ff_pw_9;
>>>  extern const uint64_t ff_pw_15;
>>> @@ -43,7 +44,7 @@ extern const uint64_t ff_pw_128;
>>>  extern const ymm_reg  ff_pw_255;
>>>  extern const ymm_reg  ff_pw_256;
>>>  extern const ymm_reg  ff_pw_512;
>>> -extern const ymm_reg  ff_pw_1023;
>>> +extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pw_1023);
>>>  extern const ymm_reg  ff_pw_1024;
>>>  extern const ymm_reg  ff_pw_2048;
>>>  extern const ymm_reg  ff_pw_4095;
>>> @@ -52,9 +53,10 @@ extern const ymm_reg  ff_pw_8192;
>>>  extern const ymm_reg  ff_pw_m1;
>>>
>>>  extern const ymm_reg  ff_pb_0;
>>> -extern c

Re: [FFmpeg-devel] [PATCH v5] Mark C globals with small code model

2025-04-28 Thread Pranav Kant via ffmpeg-devel
Ping.

On Tue, Apr 15, 2025 at 4:22 PM Pranav Kant  wrote:

> Hello again. Is there anything else I can do here?
>
> On Fri, Apr 4, 2025 at 11:40 AM Pranav Kant  wrote:
>
>> Any thoughts on this?
>>
>> On Thu, Mar 20, 2025 at 5:30 PM Pranav Kant  wrote:
>>
>>> Patch version v5:
>>> - Uses two new macros DECLARE_ASM_VAR (used for both external and inline
>>> asm) and DECLARE_EXTERNAL_ASM_VAR (used only for external asm)
>>> - I intend to remove explicit existing use of
>>> attribute_visibility_hidden in follow-up patch and instead use
>>> DECLARE_EXTERNAL_ASM_VAR for those variables
>>> - Other variables will be marked with these two new macros in a
>>> follow-up patch. I want to settle down on the infrastructure first with a
>>> handful of variables.
>>>
>>> Let me know if you have any questions.
>>>
>>> On Thu, Mar 20, 2025 at 5:28 PM Pranav Kant  wrote:
>>>
 By default, all globals in C/C++ compiled by clang are allocated
 in non-large data sections. See [1] for background on code models.
 For PIC (Position independent code), this is fine as long as binary is
 small but as binary size increases, users maybe want to use medium/large
 code models (-mcmodel=medium) which moves data in to large sections.
 As data in these large sections cannot be accessed using PIC code
 anymore (as it may be too far away), compiler ends up using a different
 instruction sequence when building C/C++ code -- using GOT to access
 these globals (which can be relaxed by linker at link time if binary
 ends up being smaller). However, external assembly and inline assembly
 continue to access these globals using older PC-relative addressing
 which may not work because globals may be placed too far away.

 Introduce new macros for such variables that mark them with small code
 model attribute. This ensures that these variables are never allocated
 in large data sections, and continue to be validly accessed from
 assembly
 code.

 This patch should not have any affect on builds that use small code
 model, which is the default mode.

 [1]
 https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models

 Signed-off-by: Pranav Kant 
 ---
  libavcodec/ac3dsp.h |  4 +++-
  libavcodec/cabac.h  |  4 +++-
  libavcodec/x86/constants.h  | 12 +++-
  libavutil/attributes.h  |  6 ++
  libavutil/attributes_internal.h | 16 
  libavutil/mem_internal.h| 13 +
  6 files changed, 48 insertions(+), 7 deletions(-)

 diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
 index b1b2bced8f..a3c55a833b 100644
 --- a/libavcodec/ac3dsp.h
 +++ b/libavcodec/ac3dsp.h
 @@ -25,11 +25,13 @@
  #include 
  #include 

 +#include "libavutil/mem_internal.h"
 +
  /**
   * Number of mantissa bits written for each bap value.
   * bap values with fractional bits are set to 0 and are calculated
 separately.
   */
 -extern const uint16_t ff_ac3_bap_bits[16];
 +extern DECLARE_EXTERNAL_ASM_VAR(16, const uint16_t,
 ff_ac3_bap_bits)[16];

  typedef struct AC3DSPContext {
  /**
 diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
 index 38d06b2842..df352258c6 100644
 --- a/libavcodec/cabac.h
 +++ b/libavcodec/cabac.h
 @@ -29,7 +29,9 @@

  #include 

 -extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
 +#include "libavutil/mem_internal.h"
 +
 +extern DECLARE_ASM_VAR(1, const uint8_t, ff_h264_cabac_tables)[512 +
 4*2*64 + 4*64 + 63];
  #define H264_NORM_SHIFT_OFFSET 0
  #define H264_LPS_RANGE_OFFSET 512
  #define H264_MLPS_STATE_OFFSET 1024
 diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
 index 0c6bf41fa0..2561302604 100644
 --- a/libavcodec/x86/constants.h
 +++ b/libavcodec/x86/constants.h
 @@ -23,13 +23,14 @@

  #include 

 +#include "libavutil/mem_internal.h"
  #include "libavutil/x86/asm.h"

 -extern const ymm_reg  ff_pw_1;
 +extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pw_1);
  extern const ymm_reg  ff_pw_2;
  extern const xmm_reg  ff_pw_3;
 -extern const ymm_reg  ff_pw_4;
 -extern const xmm_reg  ff_pw_5;
 +extern DECLARE_ASM_VAR(32, const ymm_reg, ff_pw_4);
 +extern DECLARE_ASM_VAR(16, const xmm_reg, ff_pw_5);
  extern const xmm_reg  ff_pw_8;
  extern const xmm_reg  ff_pw_9;
  extern const uint64_t ff_pw_15;
 @@ -43,7 +44,7 @@ extern const uint64_t ff_pw_128;
  extern const ymm_reg  ff_pw_255;
  extern const ymm_reg  ff_pw_256;
  extern const ymm_reg  ff_pw_512;
 -extern const ymm_reg  ff_pw_1023;
 +extern DECLARE_EXTERNAL_ASM_VAR(32, const ymm_reg, ff_pw_1023);
  extern const ymm_reg  ff_pw_1024;
  extern const ymm_reg  ff_pw_20

Re: [FFmpeg-devel] [PATCH v6] Mark C globals with small code model

2025-05-06 Thread Pranav Kant via ffmpeg-devel
Patch version 6:
- Added support for all `cextern` symbols.
- Added attribute_mcmodel_small attribute to other DECLARE_ASM* macros.
- Used DECLARE_ASM* macros for symbols that are being accessed from
assembly but still marked with non-ASM DECLARE macros.


On Tue, May 6, 2025 at 4:25 PM Pranav Kant  wrote:

> By default, all globals in C/C++ compiled by clang are allocated
> in non-large data sections. See [1] for background on code models.
> For PIC (Position independent code), this is fine as long as binary is
> small but as binary size increases, users maybe want to use medium/large
> code models (-mcmodel=medium) which moves data in to large sections.
> As data in these large sections cannot be accessed using PIC code
> anymore (as it may be too far away), compiler ends up using a different
> instruction sequence when building C/C++ code -- using GOT to access
> these globals (which can be relaxed by linker at link time if binary
> ends up being smaller).
>
> However, hardcoded assembly (external asm files, as well as inline
> assembly) continue to access these globals defined in C/C++ files using
> older (and invalid instruction sequence). So, we mark all such globals
> with an attribute that forces them to be allocated in small sections
> allowing them to validly be accessed from the assembly code.
>
> This patch should not have any affect on builds that use small code
> model, which is the default mode.
>
> [1]
> https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
> ---
>  libavcodec/ac3dsp.h |  4 +-
>  libavcodec/cabac.h  |  4 +-
>  libavcodec/h263dsp.h|  3 +-
>  libavcodec/sbrdsp.h |  3 +-
>  libavcodec/x86/constants.c  |  4 +-
>  libavcodec/x86/constants.h  | 79 +
>  libavutil/attributes.h  |  6 +++
>  libavutil/attributes_internal.h | 16 +++
>  libavutil/mem_internal.h| 16 ---
>  9 files changed, 84 insertions(+), 51 deletions(-)
>
> diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
> index b1b2bced8f..914824025f 100644
> --- a/libavcodec/ac3dsp.h
> +++ b/libavcodec/ac3dsp.h
> @@ -25,11 +25,13 @@
>  #include 
>  #include 
>
> +#include "libavutil/mem_internal.h"
> +
>  /**
>   * Number of mantissa bits written for each bap value.
>   * bap values with fractional bits are set to 0 and are calculated
> separately.
>   */
> -extern const uint16_t ff_ac3_bap_bits[16];
> +EXTERN_ASM_VAR(const uint16_t, ff_ac3_bap_bits)[16];
>
>  typedef struct AC3DSPContext {
>  /**
> diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
> index 38d06b2842..70730e4059 100644
> --- a/libavcodec/cabac.h
> +++ b/libavcodec/cabac.h
> @@ -29,7 +29,9 @@
>
>  #include 
>
> -extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
> +#include "libavutil/mem_internal.h"
> +
> +EXTERN_ASM_VAR(const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 4*64 +
> 63];
>  #define H264_NORM_SHIFT_OFFSET 0
>  #define H264_LPS_RANGE_OFFSET 512
>  #define H264_MLPS_STATE_OFFSET 1024
> diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
> index 2dccd23392..fd107e7546 100644
> --- a/libavcodec/h263dsp.h
> +++ b/libavcodec/h263dsp.h
> @@ -20,8 +20,9 @@
>  #define AVCODEC_H263DSP_H
>
>  #include 
> +#include "libavutil/mem_internal.h"
>
> -extern const uint8_t ff_h263_loop_filter_strength[32];
> +EXTERN_ASM_VAR(const uint8_t, ff_h263_loop_filter_strength)[32];
>
>  typedef struct H263DSPContext {
>  void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
> diff --git a/libavcodec/sbrdsp.h b/libavcodec/sbrdsp.h
> index 09b2cbfc10..8ce467c0fb 100644
> --- a/libavcodec/sbrdsp.h
> +++ b/libavcodec/sbrdsp.h
> @@ -23,6 +23,7 @@
>
>  #include 
>  #include "aac_defines.h"
> +#include "libavutil/mem_internal.h"
>
>  typedef struct SBRDSPContext {
>  void (*sum64x5)(INTFLOAT *z);
> @@ -43,7 +44,7 @@ typedef struct SBRDSPContext {
>int kx, int m_max);
>  } SBRDSPContext;
>
> -extern const INTFLOAT AAC_RENAME(ff_sbr_noise_table)[][2];
> +EXTERN_ASM_VAR(const INTFLOAT, AAC_RENAME(ff_sbr_noise_table))[][2];
>
>  void AAC_RENAME(ff_sbrdsp_init)(SBRDSPContext *s);
>  void ff_sbrdsp_init_arm(SBRDSPContext *s);
> diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
> index c5f3c6428e..ed0d456d99 100644
> --- a/libavcodec/x86/constants.c
> +++ b/libavcodec/x86/constants.c
> @@ -46,7 +46,7 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = {
> 0x0100010001000100ULL, 0x010
>
>  0x0100010001000100ULL, 0x0100010001000100ULL };
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = {
> 0x0200020002000200ULL, 0x0200020002000200ULL,
>
>  0x0200020002000200ULL, 0x0200020002000200ULL };
> -DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = {
> 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
> +DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_1019) = {
> 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = {
>

[FFmpeg-devel] [PATCH v6] Mark C globals with small code model

2025-05-06 Thread Pranav Kant via ffmpeg-devel
By default, all globals in C/C++ compiled by clang are allocated
in non-large data sections. See [1] for background on code models.
For PIC (Position independent code), this is fine as long as binary is
small but as binary size increases, users maybe want to use medium/large
code models (-mcmodel=medium) which moves data in to large sections.
As data in these large sections cannot be accessed using PIC code
anymore (as it may be too far away), compiler ends up using a different
instruction sequence when building C/C++ code -- using GOT to access
these globals (which can be relaxed by linker at link time if binary
ends up being smaller).

However, hardcoded assembly (external asm files, as well as inline
assembly) continue to access these globals defined in C/C++ files using
older (and invalid instruction sequence). So, we mark all such globals
with an attribute that forces them to be allocated in small sections
allowing them to validly be accessed from the assembly code.

This patch should not have any affect on builds that use small code
model, which is the default mode.

[1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models
---
 libavcodec/ac3dsp.h |  4 +-
 libavcodec/cabac.h  |  4 +-
 libavcodec/h263dsp.h|  3 +-
 libavcodec/sbrdsp.h |  3 +-
 libavcodec/x86/constants.c  |  4 +-
 libavcodec/x86/constants.h  | 79 +
 libavutil/attributes.h  |  6 +++
 libavutil/attributes_internal.h | 16 +++
 libavutil/mem_internal.h| 16 ---
 9 files changed, 84 insertions(+), 51 deletions(-)

diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index b1b2bced8f..914824025f 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -25,11 +25,13 @@
 #include 
 #include 
 
+#include "libavutil/mem_internal.h"
+
 /**
  * Number of mantissa bits written for each bap value.
  * bap values with fractional bits are set to 0 and are calculated separately.
  */
-extern const uint16_t ff_ac3_bap_bits[16];
+EXTERN_ASM_VAR(const uint16_t, ff_ac3_bap_bits)[16];
 
 typedef struct AC3DSPContext {
 /**
diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
index 38d06b2842..70730e4059 100644
--- a/libavcodec/cabac.h
+++ b/libavcodec/cabac.h
@@ -29,7 +29,9 @@
 
 #include 
 
-extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
+#include "libavutil/mem_internal.h"
+
+EXTERN_ASM_VAR(const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 4*64 + 63];
 #define H264_NORM_SHIFT_OFFSET 0
 #define H264_LPS_RANGE_OFFSET 512
 #define H264_MLPS_STATE_OFFSET 1024
diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
index 2dccd23392..fd107e7546 100644
--- a/libavcodec/h263dsp.h
+++ b/libavcodec/h263dsp.h
@@ -20,8 +20,9 @@
 #define AVCODEC_H263DSP_H
 
 #include 
+#include "libavutil/mem_internal.h"
 
-extern const uint8_t ff_h263_loop_filter_strength[32];
+EXTERN_ASM_VAR(const uint8_t, ff_h263_loop_filter_strength)[32];
 
 typedef struct H263DSPContext {
 void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
diff --git a/libavcodec/sbrdsp.h b/libavcodec/sbrdsp.h
index 09b2cbfc10..8ce467c0fb 100644
--- a/libavcodec/sbrdsp.h
+++ b/libavcodec/sbrdsp.h
@@ -23,6 +23,7 @@
 
 #include 
 #include "aac_defines.h"
+#include "libavutil/mem_internal.h"
 
 typedef struct SBRDSPContext {
 void (*sum64x5)(INTFLOAT *z);
@@ -43,7 +44,7 @@ typedef struct SBRDSPContext {
   int kx, int m_max);
 } SBRDSPContext;
 
-extern const INTFLOAT AAC_RENAME(ff_sbr_noise_table)[][2];
+EXTERN_ASM_VAR(const INTFLOAT, AAC_RENAME(ff_sbr_noise_table))[][2];
 
 void AAC_RENAME(ff_sbrdsp_init)(SBRDSPContext *s);
 void ff_sbrdsp_init_arm(SBRDSPContext *s);
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index c5f3c6428e..ed0d456d99 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -46,7 +46,7 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = { 
0x0100010001000100ULL, 0x010
 0x0100010001000100ULL, 
0x0100010001000100ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 
0x0200020002000200ULL,
 0x0200020002000200ULL, 
0x0200020002000200ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 
0x03FB03FB03FB03FBULL };
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 
0x03FB03FB03FB03FBULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 
0x03ff03ff03ff03ffULL,
 0x03ff03ff03ff03ffULL, 
0x03ff03ff03ff03ffULL};
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1024) = { 0x0400040004000400ULL, 
0x0400040004000400ULL,
@@ -70,7 +70,7 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_2)= { 
0x0202020202020202ULL, 0x020
 0x0202020202020202U