On Mon, Mar 19, 2012 at 9:19 AM, H.J. Lu <hjl.to...@gmail.com> wrote: > On Mon, Mar 19, 2012 at 8:54 AM, H.J. Lu <hjl.to...@gmail.com> wrote: >> On Mon, Mar 19, 2012 at 8:51 AM, H.J. Lu <hjl.to...@gmail.com> wrote: >>> On Sun, Mar 18, 2012 at 1:55 PM, Uros Bizjak <ubiz...@gmail.com> wrote: >>>> On Sun, Mar 18, 2012 at 5:01 PM, Uros Bizjak <ubiz...@gmail.com> wrote: >>>> >>>>>> I am testing this patch. OK for trunk if it passes all tests? >>>>> >>>>> No, force_reg will generate a pseudo, so this conversion is valid only >>>>> for !can_create_pseudo (). >>>>> >>>>> At least for *tls_initial_exec_x32_store, you will need a temporary to >>>>> split the pattern after reload. >>> >>> Here is the updated patch to add can_create_pseudo. I also changed >>> tls_initial_exec_x32 to take an input register operand as thread pointer. >>> >>>> Please try attached patch. It simply throws away all recent >>>> complications w.r.t. to thread pointer and always handles TP in >>>> DImode. >>>> >>>> The testcase: >>>> >>>> --cut here-- >>>> __thread int foo __attribute__ ((tls_model ("initial-exec"))); >>>> >>>> void bar (int x) >>>> { >>>> foo = x; >>>> } >>>> >>>> int baz (void) >>>> { >>>> return foo; >>>> } >>>> --cut here-- >>>> >>>> Now compiles to: >>>> >>>> bar: >>>> movq foo@gottpoff(%rip), %rax >>>> movl %edi, %fs:(%rax) >>>> ret >>>> >>>> baz: >>>> movq foo@gottpoff(%rip), %rax >>>> movl %fs:(%rax), %eax >>>> ret >>>> >>>> In effect, this always generates %fs(%rDI) and emits REX prefix before >>>> mov/add to satisfy brain-dead linkers. >>>> >>>> The patch is bootstrapping now on x86_64-pc-linux-gnu. >>>> >>> >>> For >>> >>> -- >>> extern __thread char c; >>> extern char y; >>> void >>> ie (void) >>> { >>> y = c; >>> } >>> -- >>> >>> Your patch generates: >>> >>> movl %fs:0, %eax >>> movq c@gottpoff(%rip), %rdx >>> movzbl (%rax,%rdx), %edx >>> movb %dl, y(%rip) >>> ret >>> >>> It can be optimized to: >>> >>> movq c@gottpoff(%rip), %rax >>> movzbl %fs:(%rax), %eax >>> movb %al, y(%rip) >>> ret >>> >> >> Combine failed: >> >> (set (reg:QI 63 [ c ]) >> (mem/c:QI (plus:DI (zero_extend:DI (unspec:SI [ >> (const_int 0 [0]) >> ] UNSPEC_TP)) >> (mem/u/c:DI (const:DI (unspec:DI [ >> (symbol_ref:SI ("c") [flags 0x60] >> <var_decl 0x7ffff19b8140 c>) >> ] UNSPEC_GOTNTPOFF)) [2 S8 A8])) [0 c+0 S1 A8])) >> >> > > Wrong testcase. IT should be > > -- > extern __thread char c; > extern __thread short w; > extern char y; > extern short i; > void > ie (void) > { > y = c; > i = w; > } > --- > > I got > > movl %fs:0, %eax > movq c@gottpoff(%rip), %rdx > movzbl (%rax,%rdx), %edx > movb %dl, y(%rip) > movq w@gottpoff(%rip), %rdx > movzwl (%rax,%rdx), %eax > movw %ax, i(%rip) > ret > > It can be > > movq c@gottpoff(%rip), %rax > movzbl %fs:(%rax), %eax > movb %al, y(%rip) > movq w@gottpoff(%rip), %rax > movzwl %fs:(%rax), %eax > movw %ax, i(%rip) > ret > >
How about this patch? I changed 32 TP load to (define_insn "*load_tp_x32_<mode>" [(set (match_operand:SWI48x 0 "register_operand" "=r") (unspec:SWI48x [(const_int 0)] UNSPEC_TP))] "TARGET_X32" "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}" [(set_attr "type" "imov") (set_attr "modrm" "0") (set_attr "length" "7") (set_attr "memory" "load") (set_attr "imm_disp" "false")]) and removed *load_tp_x32_zext. -- H.J.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9aa5ee7..66221e4 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12483,15 +12483,12 @@ legitimize_pic_address (rtx orig, rtx reg) /* Load the thread pointer. If TO_REG is true, force it into a register. */ static rtx -get_thread_pointer (bool to_reg) +get_thread_pointer (enum machine_mode tp_mode, bool to_reg) { - rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); - - if (GET_MODE (tp) != Pmode) - tp = convert_to_mode (Pmode, tp, 1); + rtx tp = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); if (to_reg) - tp = copy_addr_to_reg (tp); + tp = copy_to_mode_reg (tp_mode, tp); return tp; } @@ -12543,6 +12540,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) { rtx dest, base, off; rtx pic = NULL_RTX, tp = NULL_RTX; + enum machine_mode tp_mode = Pmode; int type; switch (model) @@ -12568,7 +12566,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) else emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); - tp = get_thread_pointer (true); + tp = get_thread_pointer (Pmode, true); dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); set_unique_reg_note (get_last_insn (), REG_EQUAL, x); @@ -12618,7 +12616,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) else emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); - tp = get_thread_pointer (true); + tp = get_thread_pointer (Pmode, true); set_unique_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_MINUS (Pmode, tmp, tp)); } @@ -12664,27 +12662,18 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) case TLS_MODEL_INITIAL_EXEC: if (TARGET_64BIT) { + tp_mode = DImode; + if (TARGET_SUN_TLS) { /* The Sun linker took the AMD64 TLS spec literally and can only handle %rax as destination of the initial executable code sequence. */ - dest = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (tp_mode); emit_insn (gen_tls_initial_exec_64_sun (dest, x)); return dest; } - else if (Pmode == SImode) - { - /* Always generate - movl %fs:0, %reg32 - addl xgottpoff(%rip), %reg32 - to support linker IE->LE optimization and avoid - fs:(%reg32) as memory operand. */ - dest = gen_reg_rtx (Pmode); - emit_insn (gen_tls_initial_exec_x32 (dest, x)); - return dest; - } pic = NULL; type = UNSPEC_GOTNTPOFF; @@ -12708,24 +12697,23 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) type = UNSPEC_INDNTPOFF; } - off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); - off = gen_rtx_CONST (Pmode, off); + off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); + off = gen_rtx_CONST (tp_mode, off); if (pic) - off = gen_rtx_PLUS (Pmode, pic, off); - off = gen_const_mem (Pmode, off); + off = gen_rtx_PLUS (tp_mode, pic, off); + off = gen_const_mem (tp_mode, off); set_mem_alias_set (off, ix86_GOT_alias_set ()); if (TARGET_64BIT || TARGET_ANY_GNU_TLS) { - base = get_thread_pointer (for_mov - || !(TARGET_TLS_DIRECT_SEG_REFS - && TARGET_TLS_INDIRECT_SEG_REFS)); - off = force_reg (Pmode, off); - return gen_rtx_PLUS (Pmode, base, off); + base = get_thread_pointer (tp_mode, + for_mov || !TARGET_TLS_DIRECT_SEG_REFS); + off = force_reg (tp_mode, off); + return gen_rtx_PLUS (tp_mode, base, off); } else { - base = get_thread_pointer (true); + base = get_thread_pointer (Pmode, true); dest = gen_reg_rtx (Pmode); emit_insn (ix86_gen_sub3 (dest, base, off)); } @@ -12739,14 +12727,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) if (TARGET_64BIT || TARGET_ANY_GNU_TLS) { - base = get_thread_pointer (for_mov - || !(TARGET_TLS_DIRECT_SEG_REFS - && TARGET_TLS_INDIRECT_SEG_REFS)); + base = get_thread_pointer (Pmode, + for_mov || !TARGET_TLS_DIRECT_SEG_REFS); return gen_rtx_PLUS (Pmode, base, off); } else { - base = get_thread_pointer (true); + base = get_thread_pointer (Pmode, true); dest = gen_reg_rtx (Pmode); emit_insn (ix86_gen_sub3 (dest, base, off)); } @@ -13274,8 +13261,7 @@ ix86_delegitimize_tls_address (rtx orig_x) rtx x = orig_x, unspec; struct ix86_address addr; - if (!(TARGET_TLS_DIRECT_SEG_REFS - && TARGET_TLS_INDIRECT_SEG_REFS)) + if (!TARGET_TLS_DIRECT_SEG_REFS) return orig_x; if (MEM_P (x)) x = XEXP (x, 0); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 9e5ac00..3fcd209 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -467,9 +467,6 @@ extern int x86_prefetch_sse; #define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0 #endif -/* Address override works only on the (%reg) part of %fs:(%reg). */ -#define TARGET_TLS_INDIRECT_SEG_REFS (Pmode == word_mode) - /* Fence to use after loop using storent. */ extern tree x86_mfence; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d23c67b..e167ceb 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -12747,20 +12747,9 @@ (define_mode_attr tp_seg [(SI "gs") (DI "fs")]) ;; Load and add the thread base pointer from %<tp_seg>:0. -(define_insn "*load_tp_x32" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(const_int 0)] UNSPEC_TP))] - "TARGET_X32" - "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}" - [(set_attr "type" "imov") - (set_attr "modrm" "0") - (set_attr "length" "7") - (set_attr "memory" "load") - (set_attr "imm_disp" "false")]) - -(define_insn "*load_tp_x32_zext" - [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))] +(define_insn "*load_tp_x32_<mode>" + [(set (match_operand:SWI48x 0 "register_operand" "=r") + (unspec:SWI48x [(const_int 0)] UNSPEC_TP))] "TARGET_X32" "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}" [(set_attr "type" "imov") @@ -12836,28 +12825,6 @@ } [(set_attr "type" "multi")]) -;; When Pmode == SImode, there may be no REX prefix for ADD. Avoid -;; any instructions between MOV and ADD, which may interfere linker -;; IE->LE optimization, since the last byte of the previous instruction -;; before ADD may look like a REX prefix. This also avoids -;; movl x@gottpoff(%rip), %reg32 -;; movl $fs:(%reg32), %reg32 -;; Since address override works only on the (reg32) part in fs:(reg32), -;; we can't use it as memory operand. -(define_insn "tls_initial_exec_x32" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI - [(match_operand 1 "tls_symbolic_operand")] - UNSPEC_TLS_IE_X32)) - (clobber (reg:CC FLAGS_REG))] - "TARGET_X32" -{ - output_asm_insn - ("mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}", operands); - return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}"; -} - [(set_attr "type" "multi")]) - ;; GNU2 TLS patterns can be split. (define_expand "tls_dynamic_gnu2_32"