For TLS calls:
1. UNSPEC_TLS_GD:
(parallel [
(set (reg:DI 0 ax)
(call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
(const_int 0 [0])))
(unspec:DI [(symbol_ref:DI ("e") [flags 0x50])
(reg/f:DI 7 sp)] UNSPEC_TLS_GD)
(clobber (reg:DI 5 di))])
2. UNSPEC_TLS_LD_BASE:
(parallel [
(set (reg:DI 0 ax)
(call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
(const_int 0 [0])))
(unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
3. UNSPEC_TLSDESC:
(parallel [
(set (reg/f:DI 104)
(plus:DI (unspec:DI [
(symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
(reg:DI 114)
(reg/f:DI 7 sp)] UNSPEC_TLSDESC)
(const:DI (unspec:DI [
(symbol_ref:DI ("e") [flags 0x1a])
] UNSPEC_DTPOFF))))
(clobber (reg:CC 17 flags))])
(parallel [
(set (reg:DI 101)
(unspec:DI [(symbol_ref:DI ("e") [flags 0x50])
(reg:DI 112)
(reg/f:DI 7 sp)] UNSPEC_TLSDESC))
(clobber (reg:CC 17 flags))])
they return the same value for the same input value. But multiple calls
with the same input value may be generated for simple programs like:
void a(long *);
int b(void);
void c(void);
static __thread long e;
long
d(void)
{
a(&e);
if (b())
c();
return e;
}
When compiled with -O2 -fPIC -mtls-dialect=gnu2, the following codes are
generated:
.type d, @function
d:
.LFB0:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
leaq e@TLSDESC(%rip), %rbx
movq %rbx, %rax
call *e@TLSCALL(%rax)
addq %fs:0, %rax
movq %rax, %rdi
call a@PLT
call b@PLT
testl %eax, %eax
jne .L8
movq %rbx, %rax
call *e@TLSCALL(%rax)
popq %rbx
.cfi_remember_state
.cfi_def_cfa_offset 8
movq %fs:(%rax), %rax
ret
.p2align 4,,10
.p2align 3
.L8:
.cfi_restore_state
call c@PLT
movq %rbx, %rax
call *e@TLSCALL(%rax)
popq %rbx
.cfi_def_cfa_offset 8
movq %fs:(%rax), %rax
ret
.cfi_endproc
There are 3 "call *e@TLSCALL(%rax)". They all return the same value.
Rename the remove_redundant_vector pass to the x86_cse pass, for 64bit,
extend it to also remove redundant TLS calls to generate:
d:
.LFB0:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
leaq e@TLSDESC(%rip), %rax
movq %fs:0, %rdi
call *e@TLSCALL(%rax)
addq %rax, %rdi
movq %rax, %rbx
call a@PLT
call b@PLT
testl %eax, %eax
jne .L8
movq %fs:(%rbx), %rax
popq %rbx
.cfi_remember_state
.cfi_def_cfa_offset 8
ret
.p2align 4,,10
.p2align 3
.L8:
.cfi_restore_state
call c@PLT
movq %fs:(%rbx), %rax
popq %rbx
.cfi_def_cfa_offset 8
ret
.cfi_endproc
with only one "call *e@TLSCALL(%rax)". This reduces the number of
__tls_get_addr calls in libgcc.a by 72%:
__tls_get_addr calls before after
libgcc.a 868 243
gcc/
PR target/81501
* config/i386/i386-features.cc (x86_cse_kind): Add X86_CSE_TLS_GD,
X86_CSE_TLS_LD_BASE and X86_CSE_TLSDESC.
(redundant_load): Renamed to ...
(redundant_pattern): This.
(replace_tls_call): New.
(ix86_place_single_tls_call): Likewise.
(pass_remove_redundant_vector_load): Renamed to ...
(pass_x86_cse): This. Add val, def_insn, mode, scalar_mode,
kind, candidate_kind, x86_cse, candidate_gnu_tls_p,
candidate_gnu2_tls_p and candidate_vector_p.
(pass_x86_cse::candidate_gnu_tls_p): New.
(pass_x86_cse::candidate_gnu2_tls_p): Likewise.
(pass_x86_cse::candidate_vector_p): Likewise.
(remove_redundant_vector_load): Renamed to ...
(pass_x86_cse::x86_cse): This. Extend to remove redundant TLS
calls.
(make_pass_remove_redundant_vector_load): Renamed to ...
(make_pass_x86_cse): This.
(config/i386/i386-passes.def): Replace
pass_remove_redundant_vector_load with pass_x86_cse.
config/i386/i386-protos.h (ix86_tls_get_addr): New.
(make_pass_remove_redundant_vector_load): Renamed to ...
(make_pass_x86_cse): This.
* config/i386/i386.cc (ix86_tls_get_addr): Remove static.
* config/i386/i386.h (machine_function): Add
tls_descriptor_call_multiple_p.
* config/i386/i386.md (tls64): New attribute.
(@tls_global_dynamic_64_<mode>): Set tls_descriptor_call_multiple_p.
(@tls_local_dynamic_base_64_<mode>): Likewise.
(@tls_dynamic_gnu2_64_<mode>): Likewise.
(*tls_global_dynamic_64_<mode>): Set tls64 attribute to gd.
(*tls_local_dynamic_base_64_<mode>): Set tls64 attribute to ld_base.
(*tls_dynamic_gnu2_lea_64_<mode>): Set tls64 attribute to lea.
(*tls_dynamic_gnu2_call_64_<mode>): Set tls64 attribute to call.
(*tls_dynamic_gnu2_combine_64_<mode>): Set tls64 attribute to
combine.
gcc/testsuite/
PR target/81501
* g++.target/i386/pr81501-1.C: New test.
* gcc.target/i386/pr81501-1a.c: Likewise.
* gcc.target/i386/pr81501-1b.c: Likewise.
* gcc.target/i386/pr81501-2a.c: Likewise.
* gcc.target/i386/pr81501-2b.c: Likewise.
* gcc.target/i386/pr81501-3.c: Likewise.
* gcc.target/i386/pr81501-4a.c: Likewise.
* gcc.target/i386/pr81501-4b.c: Likewise.
* gcc.target/i386/pr81501-5.c: Likewise.
* gcc.target/i386/pr81501-6a.c: Likewise.
* gcc.target/i386/pr81501-6b.c: Likewise.
* gcc.target/i386/pr81501-7.c: Likewise.
* gcc.target/i386/pr81501-8a.c: Likewise.
* gcc.target/i386/pr81501-8b.c: Likewise.
* gcc.target/i386/pr81501-9a.c: Likewise.
* gcc.target/i386/pr81501-9b.c: Likewise.
Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
gcc/config/i386/i386-features.cc | 766 ++++++++++++++++++---
gcc/config/i386/i386-passes.def | 2 +-
gcc/config/i386/i386-protos.h | 4 +-
gcc/config/i386/i386.cc | 2 +-
gcc/config/i386/i386.h | 3 +
gcc/config/i386/i386.md | 25 +-
gcc/testsuite/g++.target/i386/pr81501-1.C | 16 +
gcc/testsuite/gcc.target/i386/pr81501-1a.c | 17 +
gcc/testsuite/gcc.target/i386/pr81501-1b.c | 6 +
gcc/testsuite/gcc.target/i386/pr81501-2a.c | 17 +
gcc/testsuite/gcc.target/i386/pr81501-2b.c | 6 +
gcc/testsuite/gcc.target/i386/pr81501-3.c | 9 +
gcc/testsuite/gcc.target/i386/pr81501-4a.c | 51 ++
gcc/testsuite/gcc.target/i386/pr81501-4b.c | 6 +
gcc/testsuite/gcc.target/i386/pr81501-5.c | 13 +
gcc/testsuite/gcc.target/i386/pr81501-6a.c | 67 ++
gcc/testsuite/gcc.target/i386/pr81501-6b.c | 28 +
gcc/testsuite/gcc.target/i386/pr81501-7.c | 20 +
gcc/testsuite/gcc.target/i386/pr81501-8a.c | 82 +++
gcc/testsuite/gcc.target/i386/pr81501-8b.c | 31 +
gcc/testsuite/gcc.target/i386/pr81501-9a.c | 39 ++
gcc/testsuite/gcc.target/i386/pr81501-9b.c | 22 +
22 files changed, 1119 insertions(+), 113 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/pr81501-1.C
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-1a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-1b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-2a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-2b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-4a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-4b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-6a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-6b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-8a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-8b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-9a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-9b.c
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c131577805f..d38b297a89a 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3493,10 +3493,13 @@ enum x86_cse_kind
{
X86_CSE_CONST0_VECTOR,
X86_CSE_CONSTM1_VECTOR,
- X86_CSE_VEC_DUP
+ X86_CSE_VEC_DUP,
+ X86_CSE_TLS_GD,
+ X86_CSE_TLS_LD_BASE,
+ X86_CSE_TLSDESC
};
-struct redundant_load
+struct redundant_pattern
{
/* Bitmap of basic blocks with broadcast instructions. */
auto_bitmap bbs;
@@ -3669,22 +3672,570 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
return op;
}
-/* At entry of the nearest common dominator for basic blocks with vector
- CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest
- vector set instruction for all CONST0_RTX and integer CONSTM1_RTX
- uses.
+/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC. */
- NB: We want to generate only a single widest vector set to cover the
- whole function. The LCM algorithm isn't appropriate here since it
- may place a vector set inside the loop. */
+static void
+replace_tls_call (rtx src, auto_bitmap &tls_call_insns)
+{
+ bitmap_iterator bi;
+ unsigned int id;
-static unsigned int
-remove_redundant_vector_load (void)
+ EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi)
+ {
+ rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
+
+ /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are
+ allowed. */
+ if (!CALL_P (insn))
+ {
+ attr_tls64 tls64 = get_attr_tls64 (insn);
+ if (tls64 != TLS64_CALL && tls64 != TLS64_COMBINE)
+ gcc_unreachable ();
+ }
+
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != PARALLEL)
+ gcc_unreachable ();
+
+ int j;
+ rtx op, dest = nullptr;
+ for (j = XVECLEN (pat, 0) - 1; j >= 0; j--)
+ {
+ op = XVECEXP (pat, 0, j);
+ if (GET_CODE (op) == SET)
+ {
+ dest = SET_DEST (op);
+ break;
+ }
+ }
+
+ rtx set = gen_rtx_SET (dest, src);
+ rtx_insn *set_insn = emit_insn_after (set, insn);
+ if (recog_memoized (set_insn) < 0)
+ gcc_unreachable ();
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nReplace:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\nwith:\n\n");
+ print_rtl_single (dump_file, set_insn);
+ fprintf (dump_file, "\n");
+ }
+
+ /* Delete the CALL insn. */
+ delete_insn (insn);
+
+ df_insn_rescan (set_insn);
+ }
+}
+
+/* Generate a TLS call of KIND with VAL and copy the call result to DEST,
+ at entry of the nearest dominator for basic block map BBS, which is in
+ the fake loop that contains the whole function, so that there is only
+ a single TLS CALL of KIND with VAL in the whole function. If
+ TLSDESC_SET isn't nullptr, insert it before the TLS call. */
+
+static void
+ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
+ bitmap bbs, rtx tlsdesc_set = nullptr)
+{
+ basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
+ while (bb->loop_father->latch
+ != EXIT_BLOCK_PTR_FOR_FN (cfun))
+ bb = get_immediate_dominator (CDI_DOMINATORS,
+ bb->loop_father->header);
+
+ rtx_insn *insn = BB_HEAD (bb);
+ while (insn && !NONDEBUG_INSN_P (insn))
+ {
+ if (insn == BB_END (bb))
+ {
+ insn = NULL;
+ break;
+ }
+ insn = NEXT_INSN (insn);
+ }
+
+ rtx rax = nullptr, rdi;
+ rtx eqv = nullptr;
+ rtx caddr;
+ rtx set;
+ rtx clob;
+ rtx symbol;
+ rtx tls;
+ rtx_insn *tls_insn;
+
+ switch (kind)
+ {
+ case X86_CSE_TLS_GD:
+ rax = gen_rtx_REG (Pmode, AX_REG);
+ rdi = gen_rtx_REG (Pmode, DI_REG);
+ caddr = ix86_tls_get_addr ();
+
+ symbol = XVECEXP (val, 0, 0);
+ tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi);
+
+ if (GET_MODE (symbol) != Pmode)
+ symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol);
+ eqv = symbol;
+ break;
+
+ case X86_CSE_TLS_LD_BASE:
+ rax = gen_rtx_REG (Pmode, AX_REG);
+ rdi = gen_rtx_REG (Pmode, DI_REG);
+ caddr = ix86_tls_get_addr ();
+
+ tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi);
+
+ /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers
+ to share the LD_BASE result with other LD model accesses. */
+ eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_TLS_LD_BASE);
+
+ break;
+
+ case X86_CSE_TLSDESC:
+ set = gen_rtx_SET (dest, val);
+ clob = gen_rtx_CLOBBER (VOIDmode,
+ gen_rtx_REG (CCmode, FLAGS_REG));
+ tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ rtx_insn *before = nullptr;
+ rtx_insn *after = nullptr;
+ if (insn == BB_HEAD (bb))
+ before = insn;
+ else
+ after = insn ? PREV_INSN (insn) : BB_END (bb);
+
+ /* TLS_GD and TLS_LD_BASE instructions are normal functions which
+ clobber caller-saved registers. TLSDESC instructions are special
+ functions which only clobber RAX. If any registers clobbered by
+ the TLS instruction are live in this basic block, we must insert
+ the TLS instruction after all live registers clobbered by the TLS
+ instruction are dead. */
+
+ auto_bitmap live_caller_saved_regs;
+ bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
+
+ bool flags_live_p = bitmap_bit_p (in, FLAGS_REG);
+
+ unsigned int i;
+
+ /* Get all live caller-saved registers. */
+ if (kind == X86_CSE_TLSDESC)
+ {
+ if (bitmap_bit_p (in, AX_REG))
+ bitmap_set_bit (live_caller_saved_regs, AX_REG);
+ }
+ else
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (call_used_regs[i]
+ && !fixed_regs[i]
+ && bitmap_bit_p (in, i))
+ bitmap_set_bit (live_caller_saved_regs, i);
+
+ if (!bitmap_empty_p (live_caller_saved_regs))
+ {
+ /* Search for REG_DEAD notes in this basic block. */
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ /* Check if FLAGS register is live. */
+ set = single_set (insn);
+ if (set)
+ {
+ rtx dest = SET_DEST (set);
+ if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
+ flags_live_p = true;
+ }
+
+ rtx link;
+ for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ if (REG_NOTE_KIND (link) == REG_DEAD
+ && REG_P (XEXP (link, 0)))
+ {
+ /* Mark the live caller-saved register as dead. */
+ for (i = REGNO (XEXP (link, 0));
+ i < END_REGNO (XEXP (link, 0));
+ i++)
+ bitmap_clear_bit (live_caller_saved_regs, i);
+
+ /* Check if FLAGS register is dead. */
+ if (REGNO (XEXP (link, 0)) == FLAGS_REG)
+ flags_live_p = false;
+
+ if (bitmap_empty_p (live_caller_saved_regs))
+ {
+ /* All live caller-saved registers are dead after
+ this instruction. Since TLS instructions
+ clobber FLAGS register, it must be dead where
+ the TLS will be inserted after. */
+ if (flags_live_p)
+ gcc_unreachable ();
+ after = insn;
+ goto insert_after;
+ }
+ }
+ }
+
+ /* All live caller-saved registers should be dead at the end
+ of this basic block. */
+ gcc_unreachable ();
+ }
+
+ /* Emit the TLS CALL insn. */
+ if (after)
+ {
+insert_after:
+ tls_insn = emit_insn_after (tls, after);
+ }
+ else
+ tls_insn = emit_insn_before (tls, before);
+
+ rtx_insn *tlsdesc_insn = nullptr;
+ if (tlsdesc_set)
+ {
+ rtx dest = copy_rtx (SET_DEST (tlsdesc_set));
+ rtx src = copy_rtx (SET_SRC (tlsdesc_set));
+ tlsdesc_set = gen_rtx_SET (dest, src);
+ tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn);
+ }
+
+ if (kind != X86_CSE_TLSDESC)
+ {
+ RTL_CONST_CALL_P (tls_insn) = 1;
+
+ /* Indicate that this function can't jump to non-local gotos. */
+ make_reg_eh_region_note_nothrow_nononlocal (tls_insn);
+ }
+
+ if (recog_memoized (tls_insn) < 0)
+ gcc_unreachable ();
+
+ if (dump_file)
+ {
+ if (after)
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ if (tlsdesc_insn)
+ print_rtl_single (dump_file, tlsdesc_insn);
+ print_rtl_single (dump_file, tls_insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, after);
+ fprintf (dump_file, "\n");
+ }
+ else
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ if (tlsdesc_insn)
+ print_rtl_single (dump_file, tlsdesc_insn);
+ print_rtl_single (dump_file, tls_insn);
+ fprintf (dump_file, "\nbefore:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\n");
+ }
+ }
+
+ if (kind != X86_CSE_TLSDESC)
+ {
+ /* Copy RAX to DEST. */
+ set = gen_rtx_SET (dest, rax);
+ rtx_insn *set_insn = emit_insn_after (set, tls_insn);
+ set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest);
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ print_rtl_single (dump_file, set_insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, tls_insn);
+ fprintf (dump_file, "\n");
+ }
+ }
+}
+
+namespace {
+
+const pass_data pass_data_x86_cse =
+{
+ RTL_PASS, /* type */
+ "x86_cse", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_MACH_DEP, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_x86_cse : public rtl_opt_pass
+{
+public:
+ pass_x86_cse (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_x86_cse, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *fun) final override
+ {
+ return (TARGET_SSE2
+ && optimize
+ && optimize_function_for_speed_p (fun));
+ }
+
+ unsigned int execute (function *) final override
+ {
+ return x86_cse ();
+ }
+
+private:
+ /* The redundant source value. */
+ rtx val;
+ /* The instruction which defines the redundant value. */
+ rtx_insn *def_insn;
+ /* Mode of the destination of the candidate redundant instruction. */
+ machine_mode mode;
+ /* Mode of the source of the candidate redundant instruction. */
+ machine_mode scalar_mode;
+ /* The classification of the candidate redundant instruction. */
+ x86_cse_kind kind;
+
+ enum candidate_kind
+ {
+ candidate_no, /* Instruction isn't a candidate. */
+ candidate_ignore, /* Instruction should be ignored. */
+ candidate_yes /* Instruction is a candidate. */
+ };
+
+ unsigned int x86_cse (void);
+ candidate_kind candidate_gnu_tls_p (rtx_insn *);
+ candidate_kind candidate_gnu2_tls_p (rtx_insn *, rtx);
+ bool candidate_vector_p (rtx, rtx);
+}; // class pass_x86_cse
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+ INSN is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE. */
+
+pass_x86_cse::candidate_kind
+pass_x86_cse::candidate_gnu_tls_p (rtx_insn *insn)
+{
+ if (!TARGET_64BIT
+ || !cfun->machine->tls_descriptor_call_multiple_p
+ || !CALL_P (insn))
+ return candidate_no;
+
+ /* Record the redundant TLS CALLs for 64-bit:
+
+ (parallel [
+ (set (reg:DI 0 ax)
+ (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
+ (const_int 0 [0])))
+ (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
+ (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
+ (clobber (reg:DI 5 di))])
+
+
+ and
+
+ (parallel [
+ (set (reg:DI 0 ax)
+ (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
+ (const_int 0 [0])))
+ (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
+
+ */
+
+ rtx pat, set, dest;
+ attr_tls64 tls64 = get_attr_tls64 (insn);
+ switch (tls64)
+ {
+ default:
+ return candidate_ignore;
+
+ case TLS64_GD:
+ case TLS64_LD_BASE:
+ pat = PATTERN (insn);
+ set = XVECEXP (pat, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ dest = SET_DEST (set);
+ scalar_mode = mode = GET_MODE (dest);
+ val = XVECEXP (pat, 0, 1);
+ gcc_assert (GET_CODE (val) == UNSPEC);
+ break;
+ }
+
+ if (tls64 == TLS64_GD)
+ kind = X86_CSE_TLS_GD;
+ else
+ kind = X86_CSE_TLS_LD_BASE;
+
+ def_insn = nullptr;
+ return candidate_yes;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+ INSN is UNSPEC_TLSDESC. */
+
+pass_x86_cse::candidate_kind
+pass_x86_cse::candidate_gnu2_tls_p (rtx_insn *insn, rtx src)
+{
+ if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
+ return candidate_no;
+
+ /* Record GNU2 TLS CALLs for 64-bit:
+
+ (parallel [
+ (set (reg/f:DI 104)
+ (plus:DI (unspec:DI [
+ (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
+ (reg:DI 114)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
+ (const:DI (unspec:DI [
+ (symbol_ref:DI ("e") [flags 0x1a])
+ ] UNSPEC_DTPOFF))))
+ (clobber (reg:CC 17 flags))])
+
+ and
+
+ (parallel [
+ (set (reg:DI 101)
+ (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
+ (reg:DI 112)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
+ (clobber (reg:CC 17 flags))])
+
+ */
+
+ attr_tls64 tls64 = get_attr_tls64 (insn);
+ if (tls64 == TLS64_CALL)
+ val = src;
+ else if (tls64 == TLS64_COMBINE)
+ {
+ val = src;
+ src = XEXP (src, 0);
+ }
+ else
+ return candidate_no;
+
+ kind = X86_CSE_TLSDESC;
+ gcc_assert (GET_CODE (src) == UNSPEC);
+ src = XVECEXP (src, 0, 1);
+ scalar_mode = mode = GET_MODE (src);
+ if (REG_P (src))
+ {
+ /* All definitions of reg:DI 129 in
+
+ (set (reg:DI 110)
+ (unspec:DI [(symbol_ref:DI ("foo"))
+ (reg:DI 129)
+ (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
+
+ should have the same source as in
+
+ (set (reg:DI 129)
+ (unspec:DI [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC))
+
+ */
+
+ df_ref ref;
+ rtx_insn *set_insn = nullptr;
+ rtx tls_src = nullptr;
+ for (ref = DF_REG_DEF_CHAIN (REGNO (src));
+ ref;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ break;
+
+ set_insn = DF_REF_INSN (ref);
+ tls64 = get_attr_tls64 (set_insn);
+ if (tls64 != TLS64_LEA)
+ {
+ set_insn = nullptr;
+ break;
+ }
+
+ rtx tls_set = PATTERN (set_insn);
+ if (!tls_src)
+ tls_src = SET_SRC (tls_set);
+ else if (!rtx_equal_p (tls_src, SET_SRC (tls_set)))
+ {
+ set_insn = nullptr;
+ break;
+ }
+ }
+
+ if (!set_insn)
+ return candidate_ignore;
+
+ rtx set = single_set (insn);
+ if (!set)
+ return candidate_ignore;
+
+ def_insn = set_insn;
+ }
+ else if (GET_CODE (src) == UNSPEC
+ && XINT (src, 1) == UNSPEC_TLSDESC
+ && SYMBOL_REF_P (XVECEXP (src, 0, 0)))
+ def_insn = nullptr;
+ else
+ gcc_unreachable ();
+
+ return candidate_yes;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+ INSN is a vector broadcast instruction. */
+
+bool
+pass_x86_cse::candidate_vector_p (rtx set, rtx src)
+{
+ rtx dest = SET_DEST (set);
+ mode = GET_MODE (dest);
+ /* Skip non-vector instruction. */
+ if (!VECTOR_MODE_P (mode))
+ return false;
+
+ /* Skip non-vector load instruction. */
+ if (!REG_P (dest) && !SUBREG_P (dest))
+ return false;
+
+ val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
+ &def_insn);
+ return val ? true : false;
+}
+
+/* At entry of the nearest common dominator for basic blocks with
+
+ 1. Vector CONST0_RTX patterns.
+ 2. Vector CONSTM1_RTX patterns.
+ 3. Vector broadcast patterns.
+ 4. UNSPEC_TLS_GD patterns.
+ 5. UNSPEC_TLS_LD_BASE patterns.
+ 6. UNSPEC_TLSDESC patterns.
+
+ generate a single pattern whose destination is used to replace the
+ source in all identical patterns.
+
+ NB: We want to generate a pattern, which is executed only once, to
+ cover the whole function. The LCM algorithm isn't appropriate here
+ since it may place a pattern inside the loop. */
+
+unsigned int
+pass_x86_cse::x86_cse (void)
{
timevar_push (TV_MACH_DEP);
- auto_vec<redundant_load *> loads;
- redundant_load *load;
+ auto_vec<redundant_pattern *> loads;
+ redundant_pattern *load;
basic_block bb;
rtx_insn *insn;
unsigned int i;
@@ -3700,61 +4251,74 @@ remove_redundant_vector_load (void)
if (!NONDEBUG_INSN_P (insn))
continue;
- rtx set = single_set (insn);
- if (!set)
- continue;
+ bool matched = false;
+ rtx set, src;
+ /* Remove redundant pattens if there are more than 2 of
+ them. */
+ unsigned int threshold = 2;
- /* Record single set vector instruction with CONST0_RTX and
- CONSTM1_RTX source. Record basic blocks with CONST0_RTX and
- CONSTM1_RTX. Count CONST0_RTX and CONSTM1_RTX. Record the
- maximum size of CONST0_RTX and CONSTM1_RTX. */
+ /* First check UNSPEC_TLS_GD and UNSPEC_TLS_LD_BASE. */
+ switch (candidate_gnu_tls_p (insn))
+ {
+ case candidate_no:
+ /* This isn't UNSPEC_TLS_GD nor UNSPEC_TLS_LD_BASE. */
+ set = single_set (insn);
+ if (!set)
+ continue;
- rtx dest = SET_DEST (set);
- machine_mode mode = GET_MODE (dest);
- /* Skip non-vector instruction. */
- if (!VECTOR_MODE_P (mode))
- continue;
+ src = SET_SRC (set);
- rtx src = SET_SRC (set);
- /* Skip non-vector load instruction. */
- if (!REG_P (dest) && !SUBREG_P (dest))
- continue;
+ /* Check UNSPEC_TLSDESC. */
+ switch (candidate_gnu2_tls_p (insn, src))
+ {
+ case candidate_no:
+ /* Check vector instruction. */
+ if (candidate_vector_p (set, src))
+ break;
+ continue;
+ case candidate_ignore:
+ /* Not a candidate. Skip. */
+ continue;
+ case candidate_yes:
+ break;
+ }
+ break;
- rtx_insn *def_insn;
- machine_mode scalar_mode;
- x86_cse_kind kind;
- rtx val = ix86_broadcast_inner (src, mode, &scalar_mode,
- &kind, &def_insn);
- if (!val)
- continue;
+ case candidate_ignore:
+ /* Not a candidate. Skip. */
+ continue;
- /* Remove redundant register loads if there are more than 2
- loads will be used. */
- unsigned int threshold = 2;
+ case candidate_yes:
+ /* This is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE. */
+ break;
+ }
- /* Check if there is a matching redundant vector load. */
- bool matched = false;
+ /* Check if there is a matching redundant load. */
FOR_EACH_VEC_ELT (loads, i, load)
if (load->val
&& load->kind == kind
&& load->mode == scalar_mode
&& (load->bb == bb
- || kind < X86_CSE_VEC_DUP
+ || kind != X86_CSE_VEC_DUP
/* Non all 0s/1s vector load must be in the same
basic block if it is in a recursive call. */
|| !recursive_call_p)
&& rtx_equal_p (load->val, val))
{
- /* Record vector instruction. */
+ /* Record instruction. */
bitmap_set_bit (load->insns, INSN_UID (insn));
/* Record the maximum vector size. */
- if (load->size < GET_MODE_SIZE (mode))
+ if (kind <= X86_CSE_VEC_DUP
+ && load->size < GET_MODE_SIZE (mode))
load->size = GET_MODE_SIZE (mode);
/* Record the basic block. */
bitmap_set_bit (load->bbs, bb->index);
+
+ /* Increment the count. */
load->count++;
+
matched = true;
break;
}
@@ -3762,8 +4326,11 @@ remove_redundant_vector_load (void)
if (matched)
continue;
- /* We see this vector broadcast the first time. */
- load = new redundant_load;
+ /* We see this instruction the first time. Record the
+ redundant source value, its mode, the destination size,
+ instruction which defines the redundant source value,
+ instruction basic block and the instruction kind. */
+ load = new redundant_pattern;
load->val = copy_rtx (val);
load->mode = scalar_mode;
@@ -3786,6 +4353,15 @@ remove_redundant_vector_load (void)
FOR_EACH_VEC_ELT (loads, i, load)
if (load->count >= load->threshold)
{
+ if (load->kind > X86_CSE_VEC_DUP)
+ {
+ broadcast_reg = gen_reg_rtx (load->mode);
+ replace_tls_call (broadcast_reg, load->insns);
+ load->broadcast_reg = broadcast_reg;
+ replaced = true;
+ continue;
+ }
+
machine_mode mode = ix86_get_vector_cse_mode (load->size,
load->mode);
broadcast_reg = gen_reg_rtx (mode);
@@ -3841,34 +4417,48 @@ remove_redundant_vector_load (void)
{
if (load->def_insn)
{
- /* Insert a broadcast after the original scalar
- definition. */
- rtx set = gen_rtx_SET (load->broadcast_reg,
- load->broadcast_source);
- insn = emit_insn_after (set, load->def_insn);
-
- if (cfun->can_throw_non_call_exceptions)
+ if (load->kind == X86_CSE_TLSDESC)
+ ix86_place_single_tls_call (load->broadcast_reg,
+ load->val,
+ load->kind,
+ load->bbs,
+ PATTERN (load->def_insn));
+ else
{
- /* Handle REG_EH_REGION note in DEF_INSN. */
- rtx note = find_reg_note (load->def_insn,
- REG_EH_REGION, nullptr);
- if (note)
+ /* Insert a broadcast after the original scalar
+ definition. */
+ rtx set = gen_rtx_SET (load->broadcast_reg,
+ load->broadcast_source);
+ insn = emit_insn_after (set, load->def_insn);
+
+ if (cfun->can_throw_non_call_exceptions)
{
- control_flow_insns.safe_push (load->def_insn);
- add_reg_note (insn, REG_EH_REGION,
- XEXP (note, 0));
+ /* Handle REG_EH_REGION note in DEF_INSN. */
+ rtx note = find_reg_note (load->def_insn,
+ REG_EH_REGION, nullptr);
+ if (note)
+ {
+ control_flow_insns.safe_push (load->def_insn);
+ add_reg_note (insn, REG_EH_REGION,
+ XEXP (note, 0));
+ }
}
- }
- if (dump_file)
- {
- fprintf (dump_file, "\nAdd:\n\n");
- print_rtl_single (dump_file, insn);
- fprintf (dump_file, "\nafter:\n\n");
- print_rtl_single (dump_file, load->def_insn);
- fprintf (dump_file, "\n");
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nAdd:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, load->def_insn);
+ fprintf (dump_file, "\n");
+ }
}
}
+ else if (load->kind > X86_CSE_VEC_DUP)
+ ix86_place_single_tls_call (load->broadcast_reg,
+ load->val,
+ load->kind,
+ load->bbs);
else
ix86_place_single_vector_set (load->broadcast_reg,
load->broadcast_source,
@@ -3905,48 +4495,12 @@ remove_redundant_vector_load (void)
return 0;
}
-namespace {
-
-const pass_data pass_data_remove_redundant_vector_load =
-{
- RTL_PASS, /* type */
- "rrvl", /* name */
- OPTGROUP_NONE, /* optinfo_flags */
- TV_MACH_DEP, /* tv_id */
- 0, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0, /* todo_flags_finish */
-};
-
-class pass_remove_redundant_vector_load : public rtl_opt_pass
-{
-public:
- pass_remove_redundant_vector_load (gcc::context *ctxt)
- : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt)
- {}
-
- /* opt_pass methods: */
- bool gate (function *fun) final override
- {
- return (TARGET_SSE2
- && optimize
- && optimize_function_for_speed_p (fun));
- }
-
- unsigned int execute (function *) final override
- {
- return remove_redundant_vector_load ();
- }
-}; // class pass_remove_redundant_vector_load
-
} // anon namespace
rtl_opt_pass *
-make_pass_remove_redundant_vector_load (gcc::context *ctxt)
+make_pass_x86_cse (gcc::context *ctxt)
{
- return new pass_remove_redundant_vector_load (ctxt);
+ return new pass_x86_cse (ctxt);
}
/* Convert legacy instructions that clobbers EFLAGS to APX_NF
diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
index 06f0288b067..553b46d1fdc 100644
--- a/gcc/config/i386/i386-passes.def
+++ b/gcc/config/i386/i386-passes.def
@@ -35,6 +35,6 @@ along with GCC; see the file COPYING3. If not see
PR116174. */
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops);
- INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load);
+ INSERT_PASS_AFTER (pass_late_combine, 1, pass_x86_cse);
INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency);
INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 69bc0ee570d..ee6b78b2c77 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -290,6 +290,7 @@ extern rtx ix86_tls_module_base (void);
extern bool ix86_gpr_tls_address_pattern_p (rtx);
extern bool ix86_tls_address_pattern_p (rtx);
extern rtx ix86_rewrite_tls_address (rtx);
+extern rtx ix86_tls_get_addr (void);
extern void ix86_expand_vector_init (bool, rtx, rtx);
extern void ix86_expand_vector_set (bool, rtx, rtx, int);
@@ -430,8 +431,7 @@ extern rtl_opt_pass
*make_pass_insert_endbr_and_patchable_area
(gcc::context *);
extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
(gcc::context *);
-extern rtl_opt_pass *make_pass_remove_redundant_vector_load
- (gcc::context *);
+extern rtl_opt_pass *make_pass_x86_cse (gcc::context *);
extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *);
extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4682db85ce4..8e66362862a 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -12439,7 +12439,7 @@ ix86_tls_index (void)
static GTY(()) rtx ix86_tls_symbol;
-static rtx
+rtx
ix86_tls_get_addr (void)
{
if (!ix86_tls_symbol)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 791f3b9e133..912b942aa1e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2865,6 +2865,9 @@ struct GTY(()) machine_function {
approximation. */
BOOL_BITFIELD tls_descriptor_call_expanded_p : 1;
+ /* True if TLS descriptor is called more than once. */
+ BOOL_BITFIELD tls_descriptor_call_multiple_p : 1;
+
/* If true, the current function has a STATIC_CHAIN is placed on the
stack below the return address. */
BOOL_BITFIELD static_chain_on_stack : 1;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index eb526997584..6f15d850c82 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -901,6 +901,10 @@ (define_attr "i387_cw"
"roundeven,floor,ceil,trunc,uninitialized,any"
(define_attr "avx_partial_xmm_update" "false,true"
(const_string "false"))
+;; Define attribute to indicate 64-bit TLS insns.
+(define_attr "tls64" "gd,ld_base,call,combine,lea,none"
+ (const_string "none"))
+
;; Define attribute to classify add/sub insns that consumes carry flag (CF)
(define_attr "use_carry" "0,1" (const_string "0"))
@@ -23243,6 +23247,7 @@ (define_insn "*tls_global_dynamic_64_<mode>"
return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}";
}
[(set_attr "type" "multi")
+ (set_attr "tls64" "gd")
(set (attr "length")
(symbol_ref "TARGET_X32 ? 15 : 16"))])
@@ -23281,7 +23286,11 @@ (define_expand "@tls_global_dynamic_64_<mode>"
UNSPEC_TLS_GD)
(clobber (match_operand:P 3 "register_operand"))])]
"TARGET_64BIT"
- "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
+{
+ if (ix86_tls_descriptor_calls_expanded_in_cfun)
+ cfun->machine->tls_descriptor_call_multiple_p = true;
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
(define_insn "*tls_local_dynamic_base_32_gnu"
[(set (match_operand:SI 0 "register_operand" "=a")
@@ -23343,6 +23352,7 @@ (define_insn "*tls_local_dynamic_base_64_<mode>"
return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}";
}
[(set_attr "type" "multi")
+ (set_attr "tls64" "ld_base")
(set_attr "length" "12")])
(define_insn "*tls_local_dynamic_base_64_largepic"
@@ -23376,7 +23386,11 @@ (define_expand "@tls_local_dynamic_base_64_<mode>"
(unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)
(clobber (match_operand:P 2 "register_operand"))])]
"TARGET_64BIT"
- "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
+{
+ if (ix86_tls_descriptor_calls_expanded_in_cfun)
+ cfun->machine->tls_descriptor_call_multiple_p = true;
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
;; Local dynamic of a single variable is a lose. Show combine how
;; to convert that back to global dynamic.
@@ -23570,6 +23584,8 @@ (define_expand "@tls_dynamic_gnu2_64_<mode>"
"TARGET_64BIT && TARGET_GNU2_TLS"
{
operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
+ if (ix86_tls_descriptor_calls_expanded_in_cfun)
+ cfun->machine->tls_descriptor_call_multiple_p = true;
ix86_tls_descriptor_calls_expanded_in_cfun = true;
})
@@ -23581,6 +23597,7 @@ (define_insn "*tls_dynamic_gnu2_lea_64_<mode>"
"lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
[(set_attr "type" "lea")
(set_attr "mode" "<MODE>")
+ (set_attr "tls64" "lea")
(set_attr "length" "7")
(set_attr "length_address" "4")])
@@ -23594,6 +23611,7 @@ (define_insn "*tls_dynamic_gnu2_call_64_<mode>"
"TARGET_64BIT && TARGET_GNU2_TLS"
"call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
[(set_attr "type" "call")
+ (set_attr "tls64" "call")
(set_attr "length" "2")
(set_attr "length_address" "0")])
@@ -23615,7 +23633,8 @@ (define_insn_and_split
"*tls_dynamic_gnu2_combine_64_<mode>"
{
operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1]));
-})
+}
+ [(set_attr "tls64" "combine")])
(define_split
[(match_operand 0 "tls_address_pattern")]
diff --git a/gcc/testsuite/g++.target/i386/pr81501-1.C
b/gcc/testsuite/g++.target/i386/pr81501-1.C
new file mode 100644
index 00000000000..b2e89f4a5f0
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr81501-1.C
@@ -0,0 +1,16 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-std=c++14 -mtls-dialect=gnu -O2 -fpic -fplt" } */
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 {
target { ! ia32 } } } } */
+
+struct foo
+{
+ foo();
+ ~foo();
+};
+
+foo *
+test ()
+{
+ static thread_local foo foo_tls;
+ return &foo_tls;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-1a.c
b/gcc/testsuite/gcc.target/i386/pr81501-1a.c
new file mode 100644
index 00000000000..30b4642a9ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-1a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+
+void a(long *);
+int b(void);
+void c(void);
+static __thread long e;
+long
+d(void)
+{
+ a(&e);
+ if (b())
+ c();
+ return e;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 {
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-1b.c
b/gcc/testsuite/gcc.target/i386/pr81501-1b.c
new file mode 100644
index 00000000000..de25f226990
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-1b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+
+#include "pr81501-1a.c"
+
+/* { dg-final { scan-assembler-times "call\[ \t\]\\*e@TLSCALL\\(%(?:r|e)ax\\)"
1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-2a.c
b/gcc/testsuite/gcc.target/i386/pr81501-2a.c
new file mode 100644
index 00000000000..a06302a468f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-2a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+
+void a(long *);
+int b(void);
+void c(void);
+extern __thread long e;
+long
+d(void)
+{
+ a(&e);
+ if (b())
+ c();
+ return e;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 {
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-2b.c
b/gcc/testsuite/gcc.target/i386/pr81501-2b.c
new file mode 100644
index 00000000000..4afb7426c81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-2b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+
+#include "pr81501-2a.c"
+
+/* { dg-final { scan-assembler-times "call\[ \t\]\\*e@TLSCALL\\(%(?:r|e)ax\\)"
1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-3.c
b/gcc/testsuite/gcc.target/i386/pr81501-3.c
new file mode 100644
index 00000000000..d4220630900
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+
+static __thread int local1;
+int *
+get_local1 (void)
+{
+ return &local1;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-4a.c
b/gcc/testsuite/gcc.target/i386/pr81501-4a.c
new file mode 100644
index 00000000000..0c655e259ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-4a.c
@@ -0,0 +1,51 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -march=x86-64 -fpic -fplt -mtls-dialect=gnu" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } }
{^\t?\.} } } */
+
+/*
+**in_dso:
+**.LFB[0-9]+:
+**...
+** movl %edi, %.*
+**...
+** mov(l|q) %(e|r)si, %.*
+**...
+** call __tls_get_addr@PLT
+**...
+*/
+
+__thread int foo;
+
+extern void bar1 (int *, int *);
+extern void bar2 (int);
+extern void bar3 (const char *);
+
+int
+in_dso (int n, int *caller_foop)
+{
+ int *foop;
+ int result = 0;
+
+ bar3 ("foo"); /* Make sure PLT is used before macros.
*/
+ asm ("" ::: "memory");
+
+ foop = &foo;
+
+ if (caller_foop != (void *) 0 && foop != caller_foop)
+ {
+ bar1 (caller_foop, foop);
+ result = 1;
+ }
+ else if (*foop != n)
+ {
+ bar2 (n);
+ result = 1;
+ }
+
+ *foop = 16;
+
+ return result;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 {
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-4b.c
b/gcc/testsuite/gcc.target/i386/pr81501-4b.c
new file mode 100644
index 00000000000..5d35712b70d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-4b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+
+#include "pr81501-4a.c"
+
+/* { dg-final { scan-assembler-times "call\[
\t\]\\*\*foo@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-5.c
b/gcc/testsuite/gcc.target/i386/pr81501-5.c
new file mode 100644
index 00000000000..7f666e1c006
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-5.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+
+extern __thread int __bid_IDEC_glbflags;
+extern long __bid64qq_div_bid_y_0_1;
+extern void get_BID64(int *);
+void
+__bid64qq_div(void)
+{
+ if (__bid64qq_div_bid_y_0_1)
+ __bid_IDEC_glbflags |= 1;
+ get_BID64(&__bid_IDEC_glbflags);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-6a.c
b/gcc/testsuite/gcc.target/i386/pr81501-6a.c
new file mode 100644
index 00000000000..db8acf82883
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-6a.c
@@ -0,0 +1,67 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -march=x86-64 -fpic -fplt -mtls-dialect=gnu" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } }
{^\t?\.} } } */
+
+/*
+**in_dso:
+**.LFB[0-9]+:
+**...
+** mov(l|q) %(e|r)dx, %.*
+**...
+** movl %edi, %.*
+**...
+** mov(l|q) %(e|r)si, %.*
+**...
+** call __tls_get_addr@PLT
+**...
+*/
+
+__thread int foo;
+__thread int bar;
+
+extern void fun1 (int *, int *);
+extern void fun2 (int);
+extern void fun3 (const char *);
+
+int
+in_dso (int n, int *caller_foop, int *caller_barp)
+{
+ int *foop;
+ int *barp;
+ int result = 0;
+
+ fun3 ("foo"); /* Make sure PLT is used before macros.
*/
+ asm ("" ::: "memory");
+
+ foop = &foo;
+ barp = &bar;
+
+ if (caller_foop != (void *) 0 && foop != caller_foop)
+ {
+ fun1 (caller_foop, foop);
+ result = 1;
+ if (caller_barp != (void *) 0 && barp != caller_barp)
+ {
+ fun1 (caller_barp, barp);
+ result = 2;
+ }
+ else if (*barp != n)
+ {
+ fun2 (n);
+ result = 3;
+ }
+ }
+ else if (*foop != n)
+ {
+ fun2 (n);
+ result = 4;
+ }
+
+ *barp = 16;
+ *foop = 16;
+
+ return result;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 2 {
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-6b.c
b/gcc/testsuite/gcc.target/i386/pr81501-6b.c
new file mode 100644
index 00000000000..0b71f0a9039
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-6b.c
@@ -0,0 +1,28 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -march=x86-64 -fpic -fplt -mtls-dialect=gnu2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } }
{^\t?\.} } } */
+
+/*
+**in_dso:
+**.LFB[0-9]+:
+**...
+** lea(l|q) bar@TLSDESC\(%rip\), %(e|r)ax
+** mov(l|q) %(e|r)si, %.*
+**...
+** mov(l|q) %(e|r)dx, %.*
+**...
+** movl %edi, %.*
+**...
+** call \*bar@TLSCALL\(%(e|r)ax\)
+**...
+** lea(l|q) foo@TLSDESC\(%rip\), %(e|r)ax
+**...
+** call \*foo@TLSCALL\(%(e|r)ax\)
+**...
+*/
+
+#include "pr81501-6a.c"
+
+/* { dg-final { scan-assembler-times "call\[
\t\]\\*foo@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "call\[
\t\]\\*bar@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-7.c
b/gcc/testsuite/gcc.target/i386/pr81501-7.c
new file mode 100644
index 00000000000..b2fe5d5eb85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-7.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+
+extern int __bid_IDEC_glbround, __bid64qqq_fma_save_fpsf;
+extern __thread int __bid_IDEC_glbflags;
+typedef struct {
+ long w[2];
+} UINT128;
+extern long __bid64qqq_fma_res_0_1;
+extern void bid128_ext_fma(UINT128, UINT128);
+void
+__bid64qqq_fma(UINT128 y, UINT128 z)
+{
+ __bid_IDEC_glbflags = 0;
+ bid128_ext_fma(y, z);
+ if (__bid_IDEC_glbround || __bid64qqq_fma_res_0_1)
+ __bid_IDEC_glbflags |= __bid64qqq_fma_save_fpsf;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 {
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-8a.c
b/gcc/testsuite/gcc.target/i386/pr81501-8a.c
new file mode 100644
index 00000000000..7e14ef5cd4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-8a.c
@@ -0,0 +1,82 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -march=x86-64 -fpic -fplt -mtls-dialect=gnu" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } }
{^\t?\.} } } */
+
+/*
+**in_dso:
+**.LFB[0-9]+:
+**...
+** mov(l|q) %(e|r)dx, %.*
+**...
+** movl %edi, %.*
+**...
+** mov(l|q) %(e|r)si, %.*
+**...
+** testb %al, %al
+**...
+** call __tls_get_addr@PLT
+**...
+*/
+
+#include <stdarg.h>
+
+__thread int foo;
+__thread int bar;
+
+extern void fun1 (int *, int *);
+extern void fun2 (int);
+extern void fun3 (const char *);
+
+int
+in_dso (int n, int *caller_foop, int *caller_barp, ...)
+{
+ int *foop;
+ int *barp;
+ int result;
+ va_list ap;
+ double d;
+
+ va_start (ap, caller_barp);
+
+ result = 0;
+
+ fun3 ("foo"); /* Make sure PLT is used before macros.
*/
+ asm ("" ::: "memory");
+
+ foop = &foo;
+ barp = &bar;
+
+ if (caller_foop != (void *) 0 && foop != caller_foop)
+ {
+ fun1 (caller_foop, foop);
+ result = 1;
+ if (caller_barp != (void *) 0 && barp != caller_barp)
+ {
+ fun1 (caller_barp, barp);
+ result = 2;
+ }
+ else if (*barp != n)
+ {
+ fun2 (n);
+ result = 3;
+ }
+ }
+ else if (*foop != n)
+ {
+ fun2 (n);
+ result = 4;
+ }
+
+ *barp = 16;
+ *foop = 16;
+
+ d = va_arg (ap, double);
+ if (d != 1234.0)
+ result = 10;
+ va_end (ap);
+
+ return result;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 2 {
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-8b.c
b/gcc/testsuite/gcc.target/i386/pr81501-8b.c
new file mode 100644
index 00000000000..778b2fb3507
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-8b.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -march=x86-64 -fpic -fplt -mtls-dialect=gnu2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } }
{^\t?\.} } } */
+
+/*
+**in_dso:
+**.LFB[0-9]+:
+**...
+** mov(l|q) %(e|r)si, %.*
+**...
+** mov(l|q) %(e|r)dx, %.*
+**...
+** movl %edi, %.*
+**...
+** testb %al, %al
+**...
+** lea(l|q) bar@TLSDESC\(%rip\), %(e|r)ax
+**...
+** call \*bar@TLSCALL\(%(e|r)ax\)
+**...
+** lea(l|q) foo@TLSDESC\(%rip\), %(e|r)ax
+**...
+** call \*foo@TLSCALL\(%(e|r)ax\)
+**...
+*/
+
+#include "pr81501-8a.c"
+
+/* { dg-final { scan-assembler-times "call\[
\t\]\\*foo@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "call\[
\t\]\\*bar@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-9a.c
b/gcc/testsuite/gcc.target/i386/pr81501-9a.c
new file mode 100644
index 00000000000..c5de37009c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-9a.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -fpic -fplt -mtls-dialect=gnu" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } }
{^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+**...
+** vpbroadcastb %edi, %zmm0
+**...
+** call __tls_get_addr@PLT
+**...
+*/
+
+#include <immintrin.h>
+
+extern __m512i sinkz;
+extern __m256i sinky;
+extern __m128i sinkx;
+extern void func1 (long *);
+extern int func2 (void);
+extern void func3 (void);
+static __thread long var;
+
+long
+foo (char c)
+{
+ func1 (&var);
+ if (func2 ())
+ func3 ();
+ sinkx = _mm_set1_epi8 (c);
+ sinkz = _mm512_set1_epi8 (c);
+ sinky = _mm256_set1_epi8 (c);
+ return var;
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb" 1 } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 {
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-9b.c
b/gcc/testsuite/gcc.target/i386/pr81501-9b.c
new file mode 100644
index 00000000000..711b177bc1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-9b.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -march=x86-64-v4 -fpic -fplt -mtls-dialect=gnu2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } }
{^\t?\.} } } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+
+/*
+**foo:
+**.LFB[0-9]+:
+**...
+** vpbroadcastb %edi, %zmm0
+**...
+** lea(l|q) var@TLSDESC\(%rip\), %(e|r)ax
+**...
+** call \*var@TLSCALL\(%(e|r)ax\)
+**...
+*/
+
+#include "pr81501-9a.c"
+
+/* { dg-final { scan-assembler-times "vpbroadcastb" 1 } } */
+/* { dg-final { scan-assembler-times "call\[
\t\]\\*var@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
--
2.50.1