commit: 80713478a329eba635f17209a3e1ed0549062060 Author: Sam James <sam <AT> gentoo <DOT> org> AuthorDate: Sun Aug 17 15:10:50 2025 +0000 Commit: Sam James <sam <AT> gentoo <DOT> org> CommitDate: Sun Aug 17 15:10:50 2025 +0000 URL: https://gitweb.gentoo.org/proj/gcc-patches.git/commit/?id=80713478
16.0.0: update TLS patch Signed-off-by: Sam James <sam <AT> gentoo.org> ...he-TLS-call-before-all-FLAGS_REG-setting-.patch | 287 ++++++++++++++++----- 1 file changed, 227 insertions(+), 60 deletions(-) diff --git a/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch b/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch index 62ccbe9..a01b5bc 100644 --- a/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch +++ b/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch @@ -1,9 +1,9 @@ -https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121572#c4 +https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121572#c6 -From caceadf47fe1b278311c73d5cbd062dca62298ac Mon Sep 17 00:00:00 2001 +From 27d463bd32623918d641a4f5168811ae86939e9b Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <[email protected]> Date: Sat, 16 Aug 2025 14:04:33 -0700 -Subject: [PATCH] x86: Place the TLS call before all FLAGS_REG setting BBs +Subject: [PATCH] x86: Place the TLS call before all register setting BBs We can't place a TLS call before a conditional jump in a basic block like @@ -18,9 +18,16 @@ We can't place a TLS call before a conditional jump in a basic block like (int_list:REG_BR_PROB 628353713 (nil))) -> 27) -since the TLS call will clobber flags register. Instead, we should place -such call before all register setting basic blocks which dominate the -current basic block. +since the TLS call will clobber flags register nor place a TLS call in a +basic block if any live caller-saved registers aren't dead at the end of +the basic block: + +;; live in 6 [bp] 7 [sp] 16 [argp] 17 [flags] 19 [frame] 104 +;; live gen 0 [ax] 102 106 108 116 117 118 120 +;; live kill 5 [di] + +Instead, we should place such call before all register setting basic +blocks which dominate the current basic block. NB: GNU2 TLS: @@ -43,30 +50,74 @@ only clobbers flags register. gcc/ PR target/121572 - * config/i386/i386-features.cc (ix86_place_single_tls_call): Also - search for REG_DEAD notes if flag register is alive. Place the - TLS call before all FLAGS_REG setting BBs for conditional jump. + * config/i386/i386-features.cc (ix86_get_dominator_for_reg): New. + (ix86_place_single_tls_call): Add the live flag register to the + bitmap. Clear the live register bitmap only for hard register. + If there is a conditional jump in the basic block or any live + caller-saved registers aren't dead at the end of the basic block, + get the basic block which dominates all basic blocks which set + the live registers. gcc/testsuite/ PR target/121572 * gcc.target/i386/pr121572-1a.c: New test. * gcc.target/i386/pr121572-1b.c: Likewise. + * gcc.target/i386/pr121572-2a.c: Likewise. + * gcc.target/i386/pr121572-2b.c: Likewise. Signed-off-by: H.J. Lu <[email protected]> --- - gcc/config/i386/i386-features.cc | 41 ++++++++++++++++++++- - gcc/testsuite/gcc.target/i386/pr121572-1a.c | 40 ++++++++++++++++++++ - gcc/testsuite/gcc.target/i386/pr121572-1b.c | 18 +++++++++ - 3 files changed, 98 insertions(+), 1 deletion(-) + gcc/config/i386/i386-features.cc | 77 ++++++++++++++++----- + gcc/testsuite/gcc.target/i386/pr121572-1a.c | 41 +++++++++++ + gcc/testsuite/gcc.target/i386/pr121572-1b.c | 18 +++++ + gcc/testsuite/gcc.target/i386/pr121572-2a.c | 55 +++++++++++++++ + gcc/testsuite/gcc.target/i386/pr121572-2b.c | 17 +++++ + 5 files changed, 192 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1b.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2a.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2b.c diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc -index f0bdc5c1880..7cdf98c5778 100644 +index f0bdc5c1880..235c255232a 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc -@@ -3748,6 +3748,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, +@@ -3732,6 +3732,33 @@ replace_tls_call (rtx src, auto_bitmap &tls_call_insns) + } + } + ++/* Return the basic block which dominates all basic blocks which set ++ hard register REGNO used in basic block BB. */ ++ ++static basic_block ++ix86_get_dominator_for_reg (unsigned int regno, basic_block bb) ++{ ++ basic_block set_bb; ++ auto_bitmap set_bbs; ++ ++ /* Get all BBs which set REGNO and dominate the current BB from all ++ DEFs of REGNO. */ ++ for (df_ref def = DF_REG_DEF_CHAIN (regno); ++ def; ++ def = DF_REF_NEXT_REG (def)) ++ if (!DF_REF_IS_ARTIFICIAL (def) ++ && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER) ++ && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER)) ++ { ++ set_bb = DF_REF_BB (def); ++ if (dominated_by_p (CDI_DOMINATORS, bb, set_bb)) ++ bitmap_set_bit (set_bbs, set_bb->index); ++ } ++ ++ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); ++ return bb; ++} ++ + /* Generate a TLS call of KIND with VAL and copy the call result to DEST, + at entry of the nearest dominator for basic block map BBS, which is in + the fake loop that contains the whole function, so that there is only +@@ -3748,6 +3775,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, bb = get_immediate_dominator (CDI_DOMINATORS, bb->loop_father->header); @@ -74,16 +125,17 @@ index f0bdc5c1880..7cdf98c5778 100644 rtx_insn *insn = BB_HEAD (bb); while (insn && !NONDEBUG_INSN_P (insn)) { -@@ -3837,7 +3838,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, - && bitmap_bit_p (in, i)) - bitmap_set_bit (live_caller_saved_regs, i); +@@ -3824,7 +3852,8 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, + auto_bitmap live_caller_saved_regs; + bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb); -- if (!bitmap_empty_p (live_caller_saved_regs)) -+ if (flags_live_p || !bitmap_empty_p (live_caller_saved_regs)) - { - /* Search for REG_DEAD notes in this basic block. */ - FOR_BB_INSNS (bb, insn) -@@ -3845,6 +3846,44 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, +- bool flags_live_p = bitmap_bit_p (in, FLAGS_REG); ++ if (bitmap_bit_p (in, FLAGS_REG)) ++ bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); + + unsigned int i; + +@@ -3845,13 +3874,28 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, if (!NONDEBUG_INSN_P (insn)) continue; @@ -96,44 +148,74 @@ index f0bdc5c1880..7cdf98c5778 100644 + || !(LABEL_P (label) || SYMBOL_REF_P (label))) + gcc_unreachable (); + -+ rtx_insn *set_insn; -+ basic_block set_bb; -+ auto_bitmap set_bbs; -+ -+ /* Get all BBs which define FLAGS_REG and dominate the -+ current BB from all DEFs of FLAGS_REG. */ -+ for (df_ref def = DF_REG_DEF_CHAIN (FLAGS_REG); -+ def; -+ def = DF_REF_NEXT_REG (def)) -+ if (!DF_REF_IS_ARTIFICIAL (def) -+ && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER) -+ && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER)) -+ { -+ set_insn = DF_REF_INSN (def); -+ set = single_set (set_insn); -+ gcc_assert (set); -+ set_bb = DF_REF_BB (def); -+ if (dominated_by_p (CDI_DOMINATORS, bb, set_bb)) -+ bitmap_set_bit (set_bbs, set_bb->index); -+ } -+ + /* Place the call before all FLAGS_REG setting BBs since + we can't place a call before nor after a conditional + jump. */ -+ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, -+ set_bbs); -+ goto place_tls_call; ++ break; + } + /* Check if FLAGS register is live. */ set = single_set (insn); if (set) + { + rtx dest = SET_DEST (set); + if (REG_P (dest) && REGNO (dest) == FLAGS_REG) +- flags_live_p = true; ++ bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); + } + + rtx link; +@@ -3863,29 +3907,30 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, + for (i = REGNO (XEXP (link, 0)); + i < END_REGNO (XEXP (link, 0)); + i++) +- bitmap_clear_bit (live_caller_saved_regs, i); +- +- /* Check if FLAGS register is dead. */ +- if (REGNO (XEXP (link, 0)) == FLAGS_REG) +- flags_live_p = false; ++ if (i < FIRST_PSEUDO_REGISTER) ++ bitmap_clear_bit (live_caller_saved_regs, i); + + if (bitmap_empty_p (live_caller_saved_regs)) + { +- /* All live caller-saved registers are dead after +- this instruction. Since TLS instructions +- clobber FLAGS register, it must be dead where +- the TLS will be inserted after. */ +- if (flags_live_p) +- gcc_unreachable (); + after = insn; + goto insert_after; + } + } + } + +- /* All live caller-saved registers should be dead at the end +- of this basic block. */ +- gcc_unreachable (); ++ /* If any live caller-saved registers aren't dead at the end ++ of this basic block, get the basic block which dominates all ++ basic blocks which set the remaining live registers. */ ++ auto_bitmap set_bbs; ++ bitmap_iterator bi; ++ unsigned int id; ++ EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi) ++ { ++ basic_block set_bb = ix86_get_dominator_for_reg (id, bb); ++ bitmap_set_bit (set_bbs, set_bb->index); ++ } ++ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); ++ goto place_tls_call; + } + + /* Emit the TLS CALL insn. */ diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1a.c b/gcc/testsuite/gcc.target/i386/pr121572-1a.c new file mode 100644 -index 00000000000..179a1a9e66c +index 00000000000..270d8ff5cb6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121572-1a.c -@@ -0,0 +1,40 @@ +@@ -0,0 +1,41 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ @@ -151,28 +233,29 @@ index 00000000000..179a1a9e66c +**... +*/ + -+extern __thread int tv_cache __attribute__ ((visibility("hidden"))); ++extern __thread int tv_cache __attribute__ ((visibility ("hidden"))); +extern void use_cache (int); +extern int val (int v); + -+__attribute__((optimize(2))) ++__attribute__ ((optimize (2))) +void +bug (void) +{ -+ int compared = val(-1); ++ int compared = val (-1); + -+ if (compared == 0 || (compared > 0 && val(2) == 0)) ++ if (compared == 0 || (compared > 0 && val (2) == 0)) + { -+ __builtin_trap(); ++ __builtin_trap (); + } + -+ if (compared < 0) { -+ use_cache(tv_cache); ++ if (compared < 0) ++ { ++ use_cache (tv_cache); + return; -+ } ++ } + -+ use_cache(tv_cache); -+ __builtin_trap(); ++ use_cache (tv_cache); ++ __builtin_trap (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1b.c b/gcc/testsuite/gcc.target/i386/pr121572-1b.c new file mode 100644 @@ -198,5 +281,89 @@ index 00000000000..8a6089109f5 +*/ + +#include "pr121572-1a.c" +diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2a.c b/gcc/testsuite/gcc.target/i386/pr121572-2a.c +new file mode 100644 +index 00000000000..3f2230f8885 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr121572-2a.c +@@ -0,0 +1,55 @@ ++/* { dg-do compile { target *-*-linux* } } */ ++/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ ++/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ ++/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ ++ ++/* ++**mpfr_agm: ++**.LFB[0-9]+: ++**... ++** movl %edi, %ebp ++** data16 leaq __gmpfr_emax@tlsgd\(%rip\), %rdi ++** .value 0x6666 ++** rex64 ++** call __tls_get_addr@PLT ++** mov[l|q] mpfr_agm_compare@GOTPCREL\(%rip\), %[e|r]dx ++** movl \(%[e|r]dx\), %edx ++** testl %edx, %edx ++** je .L2 ++**... ++*/ ++ ++typedef enum ++{ ++ MPFR_RNDN ++} mpfr_rnd_t; ++typedef int mpfr_t[1]; ++long __gmpfr_emin, mpfr_agm_expo_0; ++_Thread_local long __gmpfr_emax; ++int mpfr_agm_compare, mpfr_agm___trans_tmp_1; ++mpfr_t mpfr_agm_u; ++void mpfr_mul (int *, int, int, mpfr_rnd_t); ++int ++mpfr_agm (int op1) ++{ ++ int op2 = 0; ++ if (__builtin_expect (mpfr_agm_compare == 0, 0)) ++ return 0; ++ if (mpfr_agm_compare > 0) ++ { ++ int t = op1; ++ op2 = t; ++ } ++ mpfr_agm_expo_0 = __gmpfr_emax; ++ for (;;) ++ { ++ retry: ++ mpfr_mul (mpfr_agm_u, op1, op2, MPFR_RNDN); ++ if (0) ++ goto retry; ++ if (__builtin_expect (mpfr_agm___trans_tmp_1, 1)) ++ break; ++ } ++ __gmpfr_emin = __gmpfr_emax; ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2b.c b/gcc/testsuite/gcc.target/i386/pr121572-2b.c +new file mode 100644 +index 00000000000..d81e2edc6f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr121572-2b.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile { target *-*-linux* } } */ ++/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */ ++/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ ++/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ ++ ++/* ++**mpfr_agm: ++**.LFB[0-9]+: ++** .cfi_startproc ++** sub[l|q] \$[0-9]+, %[e|r]sp ++** .cfi_def_cfa_offset [0-9]+ ++** lea[l|q] __gmpfr_emax@TLSDESC\(%rip\), %[e|r]ax ++** call \*__gmpfr_emax@TLSCALL\(%[e|r]ax\) ++**... ++*/ ++ ++#include "pr121572-2a.c" -- 2.50.1
