X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC by checking
movq foo@gottpoff(%rip), %reg and addq foo@gottpoff(%rip), %reg It uses the REX prefix to avoid the last byte of the previous instruction. With 32bit Pmode, we may not have the REX prefix and the last byte of the previous instruction may be an offset, which may look like a REX prefix. IE->LE optimization will generate corrupted binary. This patch makes sure we always output an REX pfrefix for UNSPEC_GOTNTPOFF. OK for trunk? Thanks. H.J. 2012-03-02 H.J. Lu <hongjiu...@intel.com> * config/i386/i386-protos.h (ix86_output_rex_prefix_p): New. * config/i386/i386.c (ix86_output_rex_prefix_p): Likewise. * config/i386/i386.md (*movsi_internal): Output REX prefix if needed. (*add<mode>_1): Likewise. diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 630112f..a9b9d3f 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -277,6 +277,8 @@ extern void x86_output_aligned_bss (FILE *, tree, const char *, extern void x86_elf_aligned_common (FILE *, const char *, unsigned HOST_WIDE_INT, int); +extern bool ix86_output_rex_prefix_p (rtx, rtx); + #ifdef RTX_CODE extern void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *, enum rtx_code *, enum rtx_code *); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ac9c714..2cbfb64 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -14670,6 +14670,29 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x) return true; } + +/* Since x64-64 linker IE->LE transition requires a REX prefix, we + output a REX prefix if there isn't one. */ + +bool +ix86_output_rex_prefix_p (rtx dest, rtx op) +{ + if (!TARGET_X32 + || GET_MODE (dest) != SImode + || REX_INT_REG_P (dest) + || !MEM_P (op)) + return false; + + op = XEXP (op, 0); + if (GET_CODE (op) != CONST) + return false; + + op = XEXP (op, 0); + if (GET_CODE (op) != UNSPEC) + return false; + + return XINT (op, 1) == UNSPEC_GOTNTPOFF; +} /* Split one or more double-mode RTL references into pairs of half-mode references. The RTL can be REG, offsettable MEM, integer constant, or diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8fc7918..35b2673 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2211,7 +2211,13 @@ if (ix86_use_lea_for_mov (insn, operands)) return "lea{l}\t{%a1, %0|%0, %a1}"; else - return "mov{l}\t{%1, %0|%0, %1}"; + { + /* Output REX prefix if needed. */ + if (ix86_output_rex_prefix_p (operands[0], operands[1])) + return "rex mov{l}\t{%1, %0|%0, %1}"; + else + return "mov{l}\t{%1, %0|%0, %1}"; + } } } [(set (attr "type") @@ -5540,7 +5546,11 @@ if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; - return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; + /* Output REX prefix if needed. */ + if (ix86_output_rex_prefix_p (operands[0], operands[2])) + return "rex add{<imodesuffix>}\t{%2, %0|%0, %2}"; + else + return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; } } [(set (attr "type") -- 1.7.6.5