commit: fc24904e190e6c785f7c4640ab00d214e397d49e Author: Sam James <sam <AT> gentoo <DOT> org> AuthorDate: Mon Aug 18 23:08:29 2025 +0000 Commit: Sam James <sam <AT> gentoo <DOT> org> CommitDate: Mon Aug 18 23:08:29 2025 +0000 URL: https://gitweb.gentoo.org/proj/gcc-patches.git/commit/?id=fc24904e
16.0.0: update TLS patch Signed-off-by: Sam James <sam <AT> gentoo.org> ...he-TLS-call-before-all-FLAGS_REG-setting-.patch | 417 ++++++++++++++------- 1 file changed, 291 insertions(+), 126 deletions(-) diff --git a/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch b/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch index 497ee1c..d998713 100644 --- a/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch +++ b/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch @@ -1,8 +1,7 @@ -From 81d358d3e24e18744e1c000672c5317d606c91ee Mon Sep 17 00:00:00 2001 -Message-ID: <81d358d3e24e18744e1c000672c5317d606c91ee.1755464442.git....@gentoo.org> +From 93b90830524746278635ddac3a5841caa7139baf Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <[email protected]> -Date: Sun, 17 Aug 2025 13:50:05 -0700 -Subject: [PATCH] x86: Place the TLS call before all register setting BBs +Date: Sat, 16 Aug 2025 14:04:33 -0700 +Subject: [PATCH v3] x86: Place the TLS call before all register setting BBs We can't place a TLS call before a conditional jump in a basic block like @@ -37,15 +36,10 @@ gcc/ * config/i386/i386-features.cc (replace_tls_call): Add a bitmap argument and put the updated TLS instruction in the bitmap. (ix86_get_dominator_for_reg): New. + (ix86_emit_tls_call): Likewise. (ix86_place_single_tls_call): Add 2 bitmap arguments for updated - GNU and GNU2 TLS instructions. Add the live flag register to the - bitmap. Insert the __tls_get_addr call before INSN if it replaces - a __tls_get_addr call. Mark FLAGS register as dead if INSN - replaced the GNU2 TLS instruction. Clear the live register bitmap - only for hard register. If there is a conditional jump in the - basic block or any live caller-saved registers aren't dead at the - end of the basic block, get the basic block which dominates all - basic blocks which set the live registers. + GNU and GNU2 TLS instructions. Call ix86_emit_tls_call to emit + TLS instruction. Correct debug dump for before instruction. gcc/testsuite/ @@ -57,19 +51,19 @@ gcc/testsuite/ Signed-off-by: H.J. Lu <[email protected]> --- - gcc/config/i386/i386-features.cc | 136 ++++++++++++++++---- - gcc/testsuite/gcc.target/i386/pr121572-1a.c | 41 ++++++ - gcc/testsuite/gcc.target/i386/pr121572-1b.c | 18 +++ - gcc/testsuite/gcc.target/i386/pr121572-2a.c | 39 ++++++ + gcc/config/i386/i386-features.cc | 329 +++++++++++++------- + gcc/testsuite/gcc.target/i386/pr121572-1a.c | 41 +++ + gcc/testsuite/gcc.target/i386/pr121572-1b.c | 18 ++ + gcc/testsuite/gcc.target/i386/pr121572-2a.c | 39 +++ gcc/testsuite/gcc.target/i386/pr121572-2b.c | 6 + - 5 files changed, 215 insertions(+), 25 deletions(-) + 5 files changed, 327 insertions(+), 106 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2b.c diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc -index f0bdc5c1880b..903f2b0b4789 100644 +index f0bdc5c1880..b1211ca916a 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3684,10 +3684,12 @@ ix86_broadcast_inner (rtx op, machine_mode mode, @@ -97,7 +91,7 @@ index f0bdc5c1880b..903f2b0b4789 100644 if (dump_file) { fprintf (dump_file, "\nReplace:\n\n"); -@@ -3732,15 +3737,48 @@ replace_tls_call (rtx src, auto_bitmap &tls_call_insns) +@@ -3732,15 +3737,211 @@ replace_tls_call (rtx src, auto_bitmap &tls_call_insns) } } @@ -128,49 +122,82 @@ index f0bdc5c1880b..903f2b0b4789 100644 + return bb; +} + - /* Generate a TLS call of KIND with VAL and copy the call result to DEST, - at entry of the nearest dominator for basic block map BBS, which is in - the fake loop that contains the whole function, so that there is only -- a single TLS CALL of KIND with VAL in the whole function. If -- TLSDESC_SET isn't nullptr, insert it before the TLS call. */ -+ a single TLS CALL of KIND with VAL in the whole function. -+ UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS -+ instructions. UPDATED_GNU2_TLS_INSNS contains instructions which -+ replace the GNU2 TLS instructions. If TLSDESC_SET isn't nullptr, -+ insert it before the TLS call. */ - - static void - ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, -- bitmap bbs, rtx tlsdesc_set = nullptr) -+ auto_bitmap &bbs, -+ auto_bitmap &updated_gnu_tls_insns, -+ auto_bitmap &updated_gnu2_tls_insns, -+ rtx tlsdesc_set = nullptr) - { - basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs); - while (bb->loop_father->latch -@@ -3748,6 +3786,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, - bb = get_immediate_dominator (CDI_DOMINATORS, - bb->loop_father->header); - -+place_tls_call: - rtx_insn *insn = BB_HEAD (bb); - while (insn && !NONDEBUG_INSN_P (insn)) - { -@@ -3824,7 +3863,8 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, - auto_bitmap live_caller_saved_regs; - bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb); - -- bool flags_live_p = bitmap_bit_p (in, FLAGS_REG); -+ if (bitmap_bit_p (in, FLAGS_REG)) -+ bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); - - unsigned int i; - -@@ -3845,13 +3885,46 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, - if (!NONDEBUG_INSN_P (insn)) - continue; - ++/* Emit a TLS_SET instruction of KIND in basic block BB. Store the ++ insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P ++ for emit_insn_after. UPDATED_GNU_TLS_INSNS contains instructions ++ which replace the GNU TLS instructions. UPDATED_GNU2_TLS_INSNS ++ contains instructions which replace the GNU2 TLS instructions. */ ++ ++static rtx_insn * ++ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb, ++ rtx_insn **before_p, rtx_insn **after_p, ++ auto_bitmap &updated_gnu_tls_insns, ++ auto_bitmap &updated_gnu2_tls_insns) ++{ ++ rtx_insn *tls_insn; ++ ++ do ++ { ++ rtx_insn *insn = BB_HEAD (bb); ++ while (insn && !NONDEBUG_INSN_P (insn)) ++ { ++ if (insn == BB_END (bb)) ++ { ++ insn = NULL; ++ break; ++ } ++ insn = NEXT_INSN (insn); ++ } ++ ++ /* TLS_GD and TLS_LD_BASE instructions are normal functions which ++ clobber caller-saved registers. TLSDESC instructions only ++ clobber FLAGS. If any registers clobbered by TLS instructions ++ are live in this basic block, we must insert TLS instructions ++ after all live registers clobbered are dead. */ ++ ++ auto_bitmap live_caller_saved_regs; ++ bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb); ++ ++ if (bitmap_bit_p (in, FLAGS_REG)) ++ bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); ++ ++ unsigned int i; ++ ++ /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE ++ instructions. */ ++ if (kind != X86_CSE_TLSDESC) ++ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ if (call_used_regs[i] ++ && !fixed_regs[i] ++ && bitmap_bit_p (in, i)) ++ bitmap_set_bit (live_caller_saved_regs, i); ++ ++ if (bitmap_empty_p (live_caller_saved_regs)) ++ { ++ if (insn == BB_HEAD (bb)) ++ { ++ *before_p = insn; ++ tls_insn = emit_insn_before (tls_set, insn); ++ } ++ else ++ { ++ insn = insn ? PREV_INSN (insn) : BB_END (bb); ++ *after_p = insn; ++ tls_insn = emit_insn_after (tls_set, insn); ++ } ++ return tls_insn; ++ } ++ ++ bool repeat = false; ++ ++ /* Search for REG_DEAD notes in this basic block. */ ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!NONDEBUG_INSN_P (insn)) ++ continue; ++ ++ /* NB: Conditional jump is the only instruction which reads ++ flags register and changes control flow. */ + if (JUMP_P (insn)) + { + /* This must be a conditional jump. */ @@ -184,25 +211,27 @@ index f0bdc5c1880b..903f2b0b4789 100644 + we can't place a call before nor after a conditional + jump. */ + bb = ix86_get_dominator_for_reg (FLAGS_REG, bb); -+ goto place_tls_call; ++ ++ /* Start over again. */ ++ repeat = true; ++ break; + } + - /* Check if FLAGS register is live. */ - set = single_set (insn); - if (set) - { - rtx dest = SET_DEST (set); -- if (REG_P (dest) && REGNO (dest) == FLAGS_REG) -- flags_live_p = true; ++ /* Check if FLAGS register is live. */ ++ rtx set = single_set (insn); ++ if (set) ++ { ++ rtx dest = SET_DEST (set); + if (REG_P (dest)) + { + if (bitmap_bit_p (updated_gnu_tls_insns, + INSN_UID (insn))) + { -+ /* Insert the __tls_get_addr call before INSN which -+ replaces a __tls_get_addr call. */ -+ before = insn; -+ goto insert_before; ++ /* Insert the __tls_get_addr call before INSN ++ which replaces a __tls_get_addr call. */ ++ *before_p = insn; ++ tls_insn = emit_insn_before (tls_set, insn); ++ return tls_insn; + } + if (bitmap_bit_p (updated_gnu2_tls_insns, + INSN_UID (insn))) @@ -213,67 +242,206 @@ index f0bdc5c1880b..903f2b0b4789 100644 + else if (REGNO (dest) == FLAGS_REG) + bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); + } - } ++ } ++ ++ rtx link; ++ for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) ++ if (REG_NOTE_KIND (link) == REG_DEAD ++ && REG_P (XEXP (link, 0))) ++ { ++ /* Mark the live caller-saved register as dead. */ ++ for (i = REGNO (XEXP (link, 0)); ++ i < END_REGNO (XEXP (link, 0)); ++ i++) ++ if (i < FIRST_PSEUDO_REGISTER) ++ bitmap_clear_bit (live_caller_saved_regs, i); ++ ++ if (bitmap_empty_p (live_caller_saved_regs)) ++ { ++ *after_p = insn; ++ tls_insn = emit_insn_after (tls_set, insn); ++ return tls_insn; ++ } ++ } ++ } ++ ++ /* NB: Start over again for conditional jump. */ ++ if (repeat) ++ continue; ++ ++ /* If any live caller-saved registers aren't dead at the end of ++ this basic block, get the basic block which dominates all ++ basic blocks which set the remaining live registers. */ ++ auto_bitmap set_bbs; ++ bitmap_iterator bi; ++ unsigned int id; ++ EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi) ++ { ++ basic_block set_bb = ix86_get_dominator_for_reg (id, bb); ++ bitmap_set_bit (set_bbs, set_bb->index); ++ } ++ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); ++ } ++ while (true); ++} ++ + /* Generate a TLS call of KIND with VAL and copy the call result to DEST, + at entry of the nearest dominator for basic block map BBS, which is in + the fake loop that contains the whole function, so that there is only +- a single TLS CALL of KIND with VAL in the whole function. If +- TLSDESC_SET isn't nullptr, insert it before the TLS call. */ ++ a single TLS CALL of KIND with VAL in the whole function. ++ UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS ++ instructions. UPDATED_GNU2_TLS_INSNS contains instructions which ++ replace the GNU2 TLS instructions. If TLSDESC_SET isn't nullptr, ++ insert it before the TLS call. */ - rtx link; -@@ -3863,29 +3936,30 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, - for (i = REGNO (XEXP (link, 0)); - i < END_REGNO (XEXP (link, 0)); - i++) + static void + ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, +- bitmap bbs, rtx tlsdesc_set = nullptr) ++ auto_bitmap &bbs, ++ auto_bitmap &updated_gnu_tls_insns, ++ auto_bitmap &updated_gnu2_tls_insns, ++ rtx tlsdesc_set = nullptr) + { + basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs); + while (bb->loop_father->latch +@@ -3748,17 +3949,6 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, + bb = get_immediate_dominator (CDI_DOMINATORS, + bb->loop_father->header); + +- rtx_insn *insn = BB_HEAD (bb); +- while (insn && !NONDEBUG_INSN_P (insn)) +- { +- if (insn == BB_END (bb)) +- { +- insn = NULL; +- break; +- } +- insn = NEXT_INSN (insn); +- } +- + rtx rax = nullptr, rdi; + rtx eqv = nullptr; + rtx caddr; +@@ -3766,7 +3956,6 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, + rtx clob; + rtx symbol; + rtx tls; +- rtx_insn *tls_insn; + + switch (kind) + { +@@ -3808,94 +3997,13 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, + gcc_unreachable (); + } + ++ /* Emit the TLS CALL insn. */ + rtx_insn *before = nullptr; + rtx_insn *after = nullptr; +- if (insn == BB_HEAD (bb)) +- before = insn; +- else +- after = insn ? PREV_INSN (insn) : BB_END (bb); +- +- /* TLS_GD and TLS_LD_BASE instructions are normal functions which +- clobber caller-saved registers. TLSDESC instructions only clobber +- FLAGS. If any registers clobbered by TLS instructions are live +- in this basic block, we must insert TLS instructions after all live +- registers clobbered are dead. */ +- +- auto_bitmap live_caller_saved_regs; +- bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb); +- +- bool flags_live_p = bitmap_bit_p (in, FLAGS_REG); +- +- unsigned int i; +- +- /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE +- instructions. */ +- if (kind != X86_CSE_TLSDESC) +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (call_used_regs[i] +- && !fixed_regs[i] +- && bitmap_bit_p (in, i)) +- bitmap_set_bit (live_caller_saved_regs, i); +- +- if (!bitmap_empty_p (live_caller_saved_regs)) +- { +- /* Search for REG_DEAD notes in this basic block. */ +- FOR_BB_INSNS (bb, insn) +- { +- if (!NONDEBUG_INSN_P (insn)) +- continue; +- +- /* Check if FLAGS register is live. */ +- set = single_set (insn); +- if (set) +- { +- rtx dest = SET_DEST (set); +- if (REG_P (dest) && REGNO (dest) == FLAGS_REG) +- flags_live_p = true; +- } +- +- rtx link; +- for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) +- if (REG_NOTE_KIND (link) == REG_DEAD +- && REG_P (XEXP (link, 0))) +- { +- /* Mark the live caller-saved register as dead. */ +- for (i = REGNO (XEXP (link, 0)); +- i < END_REGNO (XEXP (link, 0)); +- i++) - bitmap_clear_bit (live_caller_saved_regs, i); - - /* Check if FLAGS register is dead. */ - if (REGNO (XEXP (link, 0)) == FLAGS_REG) - flags_live_p = false; -+ if (i < FIRST_PSEUDO_REGISTER) -+ bitmap_clear_bit (live_caller_saved_regs, i); - - if (bitmap_empty_p (live_caller_saved_regs)) - { +- +- if (bitmap_empty_p (live_caller_saved_regs)) +- { - /* All live caller-saved registers are dead after - this instruction. Since TLS instructions - clobber FLAGS register, it must be dead where - the TLS will be inserted after. */ - if (flags_live_p) - gcc_unreachable (); - after = insn; - goto insert_after; - } - } - } - +- after = insn; +- goto insert_after; +- } +- } +- } +- - /* All live caller-saved registers should be dead at the end - of this basic block. */ - gcc_unreachable (); -+ /* If any live caller-saved registers aren't dead at the end -+ of this basic block, get the basic block which dominates all -+ basic blocks which set the remaining live registers. */ -+ auto_bitmap set_bbs; -+ bitmap_iterator bi; -+ unsigned int id; -+ EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi) -+ { -+ basic_block set_bb = ix86_get_dominator_for_reg (id, bb); -+ bitmap_set_bit (set_bbs, set_bb->index); -+ } -+ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); -+ goto place_tls_call; - } - - /* Emit the TLS CALL insn. */ -@@ -3895,7 +3969,10 @@ insert_after: - tls_insn = emit_insn_after (tls, after); - } - else +- } +- +- /* Emit the TLS CALL insn. */ +- if (after) +- { +-insert_after: +- tls_insn = emit_insn_after (tls, after); +- } +- else - tls_insn = emit_insn_before (tls, before); -+ { -+insert_before: -+ tls_insn = emit_insn_before (tls, before); -+ } ++ rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before, ++ &after, ++ updated_gnu_tls_insns, ++ updated_gnu2_tls_insns); rtx_insn *tlsdesc_insn = nullptr; if (tlsdesc_set) -@@ -4213,6 +4290,8 @@ pass_x86_cse::x86_cse (void) +@@ -3936,7 +4044,7 @@ insert_after: + print_rtl_single (dump_file, tlsdesc_insn); + print_rtl_single (dump_file, tls_insn); + fprintf (dump_file, "\nbefore:\n\n"); +- print_rtl_single (dump_file, insn); ++ print_rtl_single (dump_file, before); + fprintf (dump_file, "\n"); + } + } +@@ -4213,6 +4321,8 @@ pass_x86_cse::x86_cse (void) basic_block bb; rtx_insn *insn; unsigned int i; @@ -282,7 +450,7 @@ index f0bdc5c1880b..903f2b0b4789 100644 df_set_flags (DF_DEFER_INSN_RESCAN); -@@ -4333,7 +4412,10 @@ pass_x86_cse::x86_cse (void) +@@ -4333,7 +4443,10 @@ pass_x86_cse::x86_cse (void) case X86_CSE_TLS_LD_BASE: case X86_CSE_TLSDESC: broadcast_reg = gen_reg_rtx (load->mode); @@ -294,7 +462,7 @@ index f0bdc5c1880b..903f2b0b4789 100644 load->broadcast_reg = broadcast_reg; break; -@@ -4399,6 +4481,8 @@ pass_x86_cse::x86_cse (void) +@@ -4399,6 +4512,8 @@ pass_x86_cse::x86_cse (void) load->val, load->kind, load->bbs, @@ -303,7 +471,7 @@ index f0bdc5c1880b..903f2b0b4789 100644 PATTERN (load->def_insn)); break; case X86_CSE_VEC_DUP: -@@ -4442,7 +4526,9 @@ pass_x86_cse::x86_cse (void) +@@ -4442,7 +4557,9 @@ pass_x86_cse::x86_cse (void) ix86_place_single_tls_call (load->broadcast_reg, load->val, load->kind, @@ -316,7 +484,7 @@ index f0bdc5c1880b..903f2b0b4789 100644 case X86_CSE_CONSTM1_VECTOR: diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1a.c b/gcc/testsuite/gcc.target/i386/pr121572-1a.c new file mode 100644 -index 000000000000..270d8ff5cb6d +index 00000000000..270d8ff5cb6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121572-1a.c @@ -0,0 +1,41 @@ @@ -363,7 +531,7 @@ index 000000000000..270d8ff5cb6d +} diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1b.c b/gcc/testsuite/gcc.target/i386/pr121572-1b.c new file mode 100644 -index 000000000000..8a6089109f50 +index 00000000000..8a6089109f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121572-1b.c @@ -0,0 +1,18 @@ @@ -387,7 +555,7 @@ index 000000000000..8a6089109f50 +#include "pr121572-1a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2a.c b/gcc/testsuite/gcc.target/i386/pr121572-2a.c new file mode 100644 -index 000000000000..38b254657d35 +index 00000000000..38b254657d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121572-2a.c @@ -0,0 +1,39 @@ @@ -432,7 +600,7 @@ index 000000000000..38b254657d35 +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2b.c b/gcc/testsuite/gcc.target/i386/pr121572-2b.c new file mode 100644 -index 000000000000..33d700243249 +index 00000000000..33d70024324 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121572-2b.c @@ -0,0 +1,6 @@ @@ -442,8 +610,5 @@ index 000000000000..33d700243249 +#include "pr121572-2a.c" + +/* { dg-final { scan-assembler-times "call\[ \t\]\\*__gmpfr_emax@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ - -base-commit: 6f63044a7ae63a276a4f6d3108849e093c690bc6 -- 2.50.1 -
