On Fri, Nov 9, 2012 at 2:28 PM, Uros Bizjak <ubiz...@gmail.com> wrote:
> Finally, having a post-reload mode-switching pass, we can double-check > that there are no live SSE registers at vzeroupper insertion point. As > vzeroupper is only an optimization, we want to play safe and cancel > vzeroupper insertion in this case > > There is no degradation for x86_64 gABI targets, since all SSE > registers are call-clobbered. Vzeroupper is conditionally inserted > just before call insn, where all registers are saved to stack and > already dead. The vzeroupper at function exit is not problematic. Patch was committed to mainline SVN with the following ChangeLog: 2012-11-16 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386-protos.h (ix86_emit_mode_set): Add third argument. * config/i386/i386.h (EMIT_MODE_SET): Update. * config/i386/i386.c (ix86_avx_emit_vzeroupper): New function. (ix86_emit_mode_set) <AVX_U128>: Call ix86_avx_emit_vzeroupper. Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}, configured with --with-arch=corei7-avx --with-tune=corei7-avx. Uros.
Index: i386-protos.h =================================================================== --- i386-protos.h (revision 193549) +++ i386-protos.h (working copy) @@ -172,8 +172,11 @@ extern int ix86_mode_after (int, int, rtx); extern int ix86_mode_entry (int); extern int ix86_mode_exit (int); -extern void ix86_emit_mode_set (int, int); +#ifdef HARD_CONST +extern void ix86_emit_mode_set (int, int, HARD_REG_SET); +#endif + extern void x86_order_regs_for_local_alloc (void); extern void x86_function_profiler (FILE *, int); extern void x86_emit_floatuns (rtx [2]); Index: i386.c =================================================================== --- i386.c (revision 193549) +++ i386.c (working copy) @@ -15477,16 +15477,38 @@ emit_move_insn (new_mode, reg); } +/* Emit vzeroupper. */ + +void +ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live) +{ + int i; + + /* Cancel automatic vzeroupper insertion if there are + live call-saved SSE registers at the insertion point. */ + + for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) + if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i]) + return; + + if (TARGET_64BIT) + for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) + if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i]) + return; + + emit_insn (gen_avx_vzeroupper ()); +} + /* Generate one or more insns to set ENTITY to MODE. */ void -ix86_emit_mode_set (int entity, int mode) +ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live) { switch (entity) { case AVX_U128: if (mode == AVX_U128_CLEAN) - emit_insn (gen_avx_vzeroupper ()); + ix86_avx_emit_vzeroupper (regs_live); break; case I387_TRUNC: case I387_FLOOR: Index: i386.h =================================================================== --- i386.h (revision 193549) +++ i386.h (working copy) @@ -2226,7 +2226,7 @@ are to be inserted. */ #define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ - ix86_emit_mode_set ((ENTITY), (MODE)) + ix86_emit_mode_set ((ENTITY), (MODE), (HARD_REGS_LIVE)) /* Avoid renaming of stack registers, as doing so in combination with scheduling just increases amount of live registers at time and in