On Sun, Mar 18, 2012 at 1:55 PM, Uros Bizjak <ubiz...@gmail.com> wrote:
> On Sun, Mar 18, 2012 at 5:01 PM, Uros Bizjak <ubiz...@gmail.com> wrote:
>
>>> I am testing this patch.  OK for trunk if it passes all tests?
>>
>> No, force_reg will generate a pseudo, so this conversion is valid only
>> for !can_create_pseudo ().
>>
>> At least for *tls_initial_exec_x32_store, you will need a temporary to
>> split the pattern after reload.

Here is the updated patch to add can_create_pseudo.  I also changed
tls_initial_exec_x32 to take an input register operand as thread pointer.

> Please try attached patch. It simply throws away all recent
> complications w.r.t. to thread pointer and always handles TP in
> DImode.
>
> The testcase:
>
> --cut here--
> __thread int foo __attribute__ ((tls_model ("initial-exec")));
>
> void bar (int x)
> {
>  foo = x;
> }
>
> int baz (void)
> {
>  return foo;
> }
> --cut here--
>
> Now compiles to:
>
> bar:
>        movq    foo@gottpoff(%rip), %rax
>        movl    %edi, %fs:(%rax)
>        ret
>
> baz:
>        movq    foo@gottpoff(%rip), %rax
>        movl    %fs:(%rax), %eax
>        ret
>
> In effect, this always generates %fs(%rDI) and emits REX prefix before
> mov/add to satisfy brain-dead linkers.
>
> The patch is bootstrapping now on x86_64-pc-linux-gnu.
>

For

--
extern __thread char c;
extern char y;
void
ie (void)
{
  y = c;
}
--

Your patch generates:

        movl    %fs:0, %eax     
        movq    c@gottpoff(%rip), %rdx  
        movzbl  (%rax,%rdx), %edx       
        movb    %dl, y(%rip)    
        ret     

It can be optimized to:

        movq    c@gottpoff(%rip), %rax  
        movzbl  %fs:(%rax), %eax        
        movb    %al, y(%rip)    
        ret     

H.J.
2012-03-19  H.J. Lu  <hongjiu...@intel.com>

	* config/i386/i386-protos.h (ix86_split_tls_initial_exec_x32): New.

	* config/i386/i386.c (legitimize_tls_address): Also pass thread
	pointer to gen_tls_initial_exec_x32.
	(ix86_split_tls_initial_exec_x32): New.

	* config/i386/i386.md (*load_tp_x32): Renamed to ...
	(*load_tp_x32_<mode>): This. Replace SI with SWI48x.
	(tls_initial_exec_x32): Add an input register operand as thread
	pointer.  Generate a REX prefix if needed.
	(*tls_initial_exec_x32_load): New.
	(*tls_initial_exec_x32_store): Likewise.

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 630112f..528eeaa 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -142,6 +142,7 @@ extern void ix86_split_lshr (rtx *, rtx, enum machine_mode);
 extern rtx ix86_find_base_term (rtx);
 extern bool ix86_check_movabs (rtx, int);
 extern void ix86_split_idivmod (enum machine_mode, rtx[], bool);
+extern void ix86_split_tls_initial_exec_x32 (rtx [], enum machine_mode, bool);
 
 extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot);
 extern int ix86_attr_length_immediate_default (rtx, bool);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 78a366e..fb802ee 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12671,13 +12671,14 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 	    }
 	  else if (Pmode == SImode)
 	    {
-	      /* Always generate
-			movl %fs:0, %reg32
+	      /* Always generate a REX prefix for
 			addl xgottpoff(%rip), %reg32
-		 to support linker IE->LE optimization and avoid
-		 fs:(%reg32) as memory operand.  */
+		 to support linker IE->LE optimization.  */
 	      dest = gen_reg_rtx (Pmode);
-	      emit_insn (gen_tls_initial_exec_x32 (dest, x));
+	      base = get_thread_pointer (for_mov
+					 || !(TARGET_TLS_DIRECT_SEG_REFS
+					      && TARGET_TLS_INDIRECT_SEG_REFS));
+	      emit_insn (gen_tls_initial_exec_x32 (dest, base, x));
 	      return dest;
 	    }
 
@@ -12754,6 +12755,28 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
   return dest;
 }
 
+/* Split x32 TLS IE access in MODE.  Split load if LOAD is TRUE,
+   otherwise split store.  */
+
+void
+ix86_split_tls_initial_exec_x32 (rtx operands[],
+				 enum machine_mode mode, bool load)
+{
+  rtx base, mem;
+  rtx off = load ? operands[1] : operands[0];
+  off = gen_rtx_UNSPEC (DImode, gen_rtvec (1, off), UNSPEC_GOTNTPOFF);
+  off = gen_rtx_CONST (DImode, off);
+  off = gen_const_mem (DImode, off);
+  set_mem_alias_set (off, ix86_GOT_alias_set ());
+  base = gen_rtx_UNSPEC (DImode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+  off = gen_rtx_PLUS (DImode, base, force_reg (DImode, off));
+  mem = gen_rtx_MEM (mode, off);
+  if (load)
+    emit_move_insn (operands[0], mem);
+  else
+    emit_move_insn (mem, operands[1]);
+}
+
 /* Create or return the unique __imp_DECL dllimport symbol corresponding
    to symbol DECL.  */
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index eae26ae..1643792 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12747,11 +12747,11 @@
 (define_mode_attr tp_seg [(SI "gs") (DI "fs")])
 
 ;; Load and add the thread base pointer from %<tp_seg>:0.
-(define_insn "*load_tp_x32"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI [(const_int 0)] UNSPEC_TP))]
+(define_insn "*load_tp_x32_<mode>"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r")
+	(unspec:SWI48x [(const_int 0)] UNSPEC_TP))]
   "TARGET_X32"
-  "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
+  "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
   [(set_attr "type" "imov")
    (set_attr "modrm" "0")
    (set_attr "length" "7")
@@ -12836,27 +12836,54 @@
 }
   [(set_attr "type" "multi")])
 
-;; When Pmode == SImode, there may be no REX prefix for ADD.  Avoid
-;; any instructions between MOV and ADD, which may interfere linker
-;; IE->LE optimization, since the last byte of the previous instruction
-;; before ADD may look like a REX prefix.  This also avoids
-;;	movl x@gottpoff(%rip), %reg32
-;;	movl $fs:(%reg32), %reg32
-;; Since address override works only on the (reg32) part in fs:(reg32),
-;; we can't use it as memory operand.
+;; When Pmode == SImode, there may be no REX prefix for ADD.  Make sure
+;; there is a REX prefix.
 (define_insn "tls_initial_exec_x32"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
-	 [(match_operand 1 "tls_symbolic_operand" "")]
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand 2 "tls_symbolic_operand" "")]
 	 UNSPEC_TLS_IE_X32))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_X32"
 {
-  output_asm_insn
-    ("mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}", operands);
-  return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
+  if (!REX_INT_REG_P (operands[0]))
+    fputs ("\trex ", asm_out_file);
+  return "add{l}\t{%a2@gottpoff(%%rip), %0|%0, %a2@gottpoff[rip]}";
 }
-  [(set_attr "type" "multi")])
+  [(set_attr "type" "alu")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")])
+
+(define_insn_and_split "*tls_initial_exec_x32_load"
+  [(set (match_operand:SWI1248x 0 "register_operand" "=r")
+        (mem:SWI1248x
+	  (unspec:SI
+	   [(unspec:SI [(const_int 0)] UNSPEC_TP)
+	    (match_operand 1 "tls_symbolic_operand" "")]
+	   UNSPEC_TLS_IE_X32)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_X32
+   && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(const_int 0)]
+  "ix86_split_tls_initial_exec_x32 (operands, <MODE>mode, TRUE); DONE;")
+
+(define_insn_and_split "*tls_initial_exec_x32_store"
+  [(set (mem:SWI1248x
+	  (unspec:SI
+	   [(unspec:SI [(const_int 0)] UNSPEC_TP)
+	    (match_operand 0 "tls_symbolic_operand" "")]
+	   UNSPEC_TLS_IE_X32))
+  	(match_operand:SWI1248x 1 "register_operand" "r"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_X32
+   && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(const_int 0)]
+  "ix86_split_tls_initial_exec_x32 (operands, <MODE>mode, FALSE); DONE;")
 
 ;; GNU2 TLS patterns can be split.
 

Reply via email to