On Sun, Mar 18, 2012 at 1:55 PM, Uros Bizjak <ubiz...@gmail.com> wrote: > On Sun, Mar 18, 2012 at 5:01 PM, Uros Bizjak <ubiz...@gmail.com> wrote: > >>> I am testing this patch. OK for trunk if it passes all tests? >> >> No, force_reg will generate a pseudo, so this conversion is valid only >> for !can_create_pseudo (). >> >> At least for *tls_initial_exec_x32_store, you will need a temporary to >> split the pattern after reload.
Here is the updated patch to add can_create_pseudo. I also changed tls_initial_exec_x32 to take an input register operand as thread pointer. > Please try attached patch. It simply throws away all recent > complications w.r.t. to thread pointer and always handles TP in > DImode. > > The testcase: > > --cut here-- > __thread int foo __attribute__ ((tls_model ("initial-exec"))); > > void bar (int x) > { > foo = x; > } > > int baz (void) > { > return foo; > } > --cut here-- > > Now compiles to: > > bar: > movq foo@gottpoff(%rip), %rax > movl %edi, %fs:(%rax) > ret > > baz: > movq foo@gottpoff(%rip), %rax > movl %fs:(%rax), %eax > ret > > In effect, this always generates %fs(%rDI) and emits REX prefix before > mov/add to satisfy brain-dead linkers. > > The patch is bootstrapping now on x86_64-pc-linux-gnu. > For -- extern __thread char c; extern char y; void ie (void) { y = c; } -- Your patch generates: movl %fs:0, %eax movq c@gottpoff(%rip), %rdx movzbl (%rax,%rdx), %edx movb %dl, y(%rip) ret It can be optimized to: movq c@gottpoff(%rip), %rax movzbl %fs:(%rax), %eax movb %al, y(%rip) ret H.J.
2012-03-19 H.J. Lu <hongjiu...@intel.com> * config/i386/i386-protos.h (ix86_split_tls_initial_exec_x32): New. * config/i386/i386.c (legitimize_tls_address): Also pass thread pointer to gen_tls_initial_exec_x32. (ix86_split_tls_initial_exec_x32): New. * config/i386/i386.md (*load_tp_x32): Renamed to ... (*load_tp_x32_<mode>): This. Replace SI with SWI48x. (tls_initial_exec_x32): Add an input register operand as thread pointer. Generate a REX prefix if needed. (*tls_initial_exec_x32_load): New. (*tls_initial_exec_x32_store): Likewise. diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 630112f..528eeaa 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -142,6 +142,7 @@ extern void ix86_split_lshr (rtx *, rtx, enum machine_mode); extern rtx ix86_find_base_term (rtx); extern bool ix86_check_movabs (rtx, int); extern void ix86_split_idivmod (enum machine_mode, rtx[], bool); +extern void ix86_split_tls_initial_exec_x32 (rtx [], enum machine_mode, bool); extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot); extern int ix86_attr_length_immediate_default (rtx, bool); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 78a366e..fb802ee 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12671,13 +12671,14 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) } else if (Pmode == SImode) { - /* Always generate - movl %fs:0, %reg32 + /* Always generate a REX prefix for addl xgottpoff(%rip), %reg32 - to support linker IE->LE optimization and avoid - fs:(%reg32) as memory operand. */ + to support linker IE->LE optimization. */ dest = gen_reg_rtx (Pmode); - emit_insn (gen_tls_initial_exec_x32 (dest, x)); + base = get_thread_pointer (for_mov + || !(TARGET_TLS_DIRECT_SEG_REFS + && TARGET_TLS_INDIRECT_SEG_REFS)); + emit_insn (gen_tls_initial_exec_x32 (dest, base, x)); return dest; } @@ -12754,6 +12755,28 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) return dest; } +/* Split x32 TLS IE access in MODE. Split load if LOAD is TRUE, + otherwise split store. */ + +void +ix86_split_tls_initial_exec_x32 (rtx operands[], + enum machine_mode mode, bool load) +{ + rtx base, mem; + rtx off = load ? operands[1] : operands[0]; + off = gen_rtx_UNSPEC (DImode, gen_rtvec (1, off), UNSPEC_GOTNTPOFF); + off = gen_rtx_CONST (DImode, off); + off = gen_const_mem (DImode, off); + set_mem_alias_set (off, ix86_GOT_alias_set ()); + base = gen_rtx_UNSPEC (DImode, gen_rtvec (1, const0_rtx), UNSPEC_TP); + off = gen_rtx_PLUS (DImode, base, force_reg (DImode, off)); + mem = gen_rtx_MEM (mode, off); + if (load) + emit_move_insn (operands[0], mem); + else + emit_move_insn (mem, operands[1]); +} + /* Create or return the unique __imp_DECL dllimport symbol corresponding to symbol DECL. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index eae26ae..1643792 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -12747,11 +12747,11 @@ (define_mode_attr tp_seg [(SI "gs") (DI "fs")]) ;; Load and add the thread base pointer from %<tp_seg>:0. -(define_insn "*load_tp_x32" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(const_int 0)] UNSPEC_TP))] +(define_insn "*load_tp_x32_<mode>" + [(set (match_operand:SWI48x 0 "register_operand" "=r") + (unspec:SWI48x [(const_int 0)] UNSPEC_TP))] "TARGET_X32" - "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}" + "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}" [(set_attr "type" "imov") (set_attr "modrm" "0") (set_attr "length" "7") @@ -12836,27 +12836,54 @@ } [(set_attr "type" "multi")]) -;; When Pmode == SImode, there may be no REX prefix for ADD. Avoid -;; any instructions between MOV and ADD, which may interfere linker -;; IE->LE optimization, since the last byte of the previous instruction -;; before ADD may look like a REX prefix. This also avoids -;; movl x@gottpoff(%rip), %reg32 -;; movl $fs:(%reg32), %reg32 -;; Since address override works only on the (reg32) part in fs:(reg32), -;; we can't use it as memory operand. +;; When Pmode == SImode, there may be no REX prefix for ADD. Make sure +;; there is a REX prefix. (define_insn "tls_initial_exec_x32" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI - [(match_operand 1 "tls_symbolic_operand" "")] + [(match_operand:SI 1 "register_operand" "0") + (match_operand 2 "tls_symbolic_operand" "")] UNSPEC_TLS_IE_X32)) (clobber (reg:CC FLAGS_REG))] "TARGET_X32" { - output_asm_insn - ("mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}", operands); - return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}"; + if (!REX_INT_REG_P (operands[0])) + fputs ("\trex ", asm_out_file); + return "add{l}\t{%a2@gottpoff(%%rip), %0|%0, %a2@gottpoff[rip]}"; } - [(set_attr "type" "multi")]) + [(set_attr "type" "alu") + (set_attr "length" "7") + (set_attr "memory" "load")]) + +(define_insn_and_split "*tls_initial_exec_x32_load" + [(set (match_operand:SWI1248x 0 "register_operand" "=r") + (mem:SWI1248x + (unspec:SI + [(unspec:SI [(const_int 0)] UNSPEC_TP) + (match_operand 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_IE_X32))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_X32 + && can_create_pseudo_p ()" + "#" + "" + [(const_int 0)] + "ix86_split_tls_initial_exec_x32 (operands, <MODE>mode, TRUE); DONE;") + +(define_insn_and_split "*tls_initial_exec_x32_store" + [(set (mem:SWI1248x + (unspec:SI + [(unspec:SI [(const_int 0)] UNSPEC_TP) + (match_operand 0 "tls_symbolic_operand" "")] + UNSPEC_TLS_IE_X32)) + (match_operand:SWI1248x 1 "register_operand" "r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_X32 + && can_create_pseudo_p ()" + "#" + "" + [(const_int 0)] + "ix86_split_tls_initial_exec_x32 (operands, <MODE>mode, FALSE); DONE;") ;; GNU2 TLS patterns can be split.