On Mon, Mar 5, 2012 at 9:25 AM, Uros Bizjak <ubiz...@gmail.com> wrote: > On Mon, Mar 5, 2012 at 6:03 PM, H.J. Lu <hjl.to...@gmail.com> wrote: > >>>> X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC >>>> by checking >>>> >>>> movq foo@gottpoff(%rip), %reg >>>> >>>> and >>>> >>>> addq foo@gottpoff(%rip), %reg >>>> >>>> It uses the REX prefix to avoid the last byte of the previous >>>> instruction. With 32bit Pmode, we may not have the REX prefix and >>>> the last byte of the previous instruction may be an offset, which >>>> may look like a REX prefix. IE->LE optimization will generate corrupted >>>> binary. This patch makes sure we always output an REX pfrefix for >>>> UNSPEC_GOTNTPOFF. OK for trunk? >>> >>> Actually, linker has: >>> >>> case R_X86_64_GOTTPOFF: >>> /* Check transition from IE access model: >>> mov foo@gottpoff(%rip), %reg >>> add foo@gottpoff(%rip), %reg >>> */ >>> >>> /* Check REX prefix first. */ >>> if (offset >= 3 && (offset + 4) <= sec->size) >>> { >>> val = bfd_get_8 (abfd, contents + offset - 3); >>> if (val != 0x48 && val != 0x4c) >>> { >>> /* X32 may have 0x44 REX prefix or no REX prefix. */ >>> if (ABI_64_P (abfd)) >>> return FALSE; >>> } >>> } >>> else >>> { >>> /* X32 may not have any REX prefix. */ >>> if (ABI_64_P (abfd)) >>> return FALSE; >>> if (offset < 2 || (offset + 3) > sec->size) >>> return FALSE; >>> } >>> >>> So, it should handle the case without REX just OK. If it doesn't, then >>> this is a bug in binutils. >>> >> >> The last byte of the displacement in the previous instruction >> may happen to look like a REX byte. In that case, linker >> will overwrite the last byte of the previous instruction and >> generate the wrong instruction sequence. >> >> I need to update linker to enforce the REX byte check. > > One important observation: if we want to follow the x86_64 TLS spec > strictly, we have to use existing DImode patterns only. This also > means that we should NOT convert other TLS patterns to Pmode, since > they explicitly state movq and addq. If this is not the case, then we > need new TLS specification for X32.
Here is a patch to properly generate X32 IE sequence. This is the summary of differences between x86-64 TLS and x32 TLS: x86-64 x32 GD byte 0x66; leaq foo@tlsgd(%rip),%rdi; leaq foo@tlsgd(%rip),%rdi; .word 0x6666; rex64; call __tls_get_addr@plt .word 0x6666; rex64; call __tls_get_addr@plt GD->IE optimization movq %fs:0,%rax; addq x@gottpoff(%rip),%rax movl %fs:0,%eax; addq x@gottpoff(%rip),%rax GD->LE optimization movq %fs:0,%rax; leaq x@tpoff(%rax),%rax movl %fs:0,%eax; leaq x@tpoff(%rax),%rax LD leaq foo@tlsld(%rip),%rdi; leaq foo@tlsld(%rip),%rdi; call __tls_get_addr@plt call __tls_get_addr@plt LD->LE optimization .word 0x6666; .byte 0x66; movq %fs:0, %rax nopl 0x0(%rax); movl %fs:0, %eax IE movq %fs:0,%reg64; movl %fs:0,%reg32; addq x@gottpoff(%rip),%reg64 addl x@gottpoff(%rip),%reg32 or Not supported if Pmode == SImode movq x@gottpoff(%rip),%reg64; movq x@gottpoff(%rip),%reg64; movq %fs:(%reg64),%reg32 movl %fs:(%reg64), %reg32 IE->LE optimization movq %fs:0,%reg64; movl %fs:0,%reg32; addq x@gottpoff(%rip),%reg64 addl x@gottpoff(%rip),%reg32 to movq %fs:0,%reg64; movl %fs:0,%reg32; addq foo@tpoff, %reg64 addl foo@tpoff, %reg32 movq %fs:0,%reg64; movl %fs:0,%reg32; leaq foo@tpoff(%reg64), %reg64 leal foo@tpoff(%reg32), %reg32 or movq x@gottpoff(%rip),%reg64 movq x@gottpoff(%rip),%reg64; movl %fs:(%reg64),%reg32 movl %fs:(%reg64), %reg32 to movq foo@tpoff, %reg64 movq foo@tpoff, %reg64 movl %fs:(%reeg64),%reg32 movl %fs:(%reg64), %reg32 LE movq %fs:0,%reg64; movl %fs:0,%reg32; leaq x@tpoff(%reg64),%reg32 leal x@tpoff(%reg32),%reg32 or movq %fs:0,%reg64; movl %fs:0,%reg32; addq $x@tpoff,%reg64 addl $x@tpoff,%reg32 or movq %fs:0,%reg64; movl %fs:0,%reg32; movl x@tpoff(%reg64),%reg32 movl x@tpoff(%reg32),%reg32 or movl %fs:x@tpoff,%reg32 movl %fs:x@tpoff,%reg32 X32 TLS implementation is straight forward, except for IE: 1. Since address override works only on the (reg32) part in fs:(reg32), we can't use it as memory operand. This patch changes ix86_decompose_address to disallow fs:(reg) if Pmode != word_mode. 2. When Pmode == SImode, there may be no REX prefix for ADD. Avoid any instructions between MOV and ADD, which may interfere linker IE->LE optimization, since the last byte of the previous instruction before ADD may look like a REX prefix. This patch adds tls_initial_exec_x32 to make sure that we always have movl %fs:0, %reg32 addl xgottpoff(%rip), %reg32 so that the last byte of the previous instruction before ADD will never be a REX byte. Tested on Linux/x32. -- H.J. -- 2012-03-09 H.J. Lu <hongjiu...@intel.com> * config/i386/i386.c (ix86_decompose_address): Disallow fs:(reg) if Pmode != word_mode. (legitimize_tls_address): Call gen_tls_initial_exec_x32 if Pmode == SImode for x32. * config/i386/i386.md (UNSPEC_TLS_IE_X32): New. (tls_initial_exec_x32): Likewise.
2012-03-09 H.J. Lu <hongjiu...@intel.com> * config/i386/i386.c (ix86_decompose_address): Disallow fs:(reg) if Pmode != word_mode. (legitimize_tls_address): Call gen_tls_initial_exec_x32 if Pmode == SImode for x32. * config/i386/i386.md (UNSPEC_TLS_IE_X32): New. (tls_initial_exec_x32): Likewise. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 15465c2..312b50c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -11524,6 +11534,11 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) else disp = addr; /* displacement */ + /* Since address override works only on the (reg32) part in fs:(reg32), + we can't use it as memory operand. */ + if (Pmode != word_mode && seg == SEG_FS && (base || index)) + return 0; + if (index) { if (REG_P (index)) @@ -12618,6 +12643,17 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) emit_insn (gen_tls_initial_exec_64_sun (dest, x)); return dest; } + else if (Pmode == SImode) + { + /* Always generate + movl %fs:0, %reg32 + addl xgottpoff(%rip), %reg32 + to support linker IE->LE optimization and avoid + fs:(%reg32) as memory operand. */ + dest = gen_reg_rtx (Pmode); + emit_insn (gen_tls_initial_exec_x32 (dest, x)); + return dest; + } pic = NULL; type = UNSPEC_GOTNTPOFF; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 188c982..d1fa997 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -95,6 +95,7 @@ UNSPEC_TLS_LD_BASE UNSPEC_TLSDESC UNSPEC_TLS_IE_SUN + UNSPEC_TLS_IE_X32 ;; Other random patterns UNSPEC_SCAS @@ -12775,6 +12776,28 @@ } [(set_attr "type" "multi")]) +;; When Pmode == SImode, there may be no REX prefix for ADD. Avoid +;; any instructions between MOV and ADD, which may interfere linker +;; IE->LE optimization, since the last byte of the previous instruction +;; before ADD may look like a REX prefix. This also avoids +;; movl x@gottpoff(%rip), %reg32 +;; movl $fs:(%reg32), %reg32 +;; Since address override works only on the (reg32) part in fs:(reg32), +;; we can't use it as memory operand. +(define_insn "tls_initial_exec_x32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_IE_X32)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_X32" +{ + output_asm_insn + ("mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}", operands); + return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}"; +} + [(set_attr "type" "multi")]) + ;; GNU2 TLS patterns can be split. (define_expand "tls_dynamic_gnu2_32"