On Fri, Nov 9, 2012 at 2:28 PM, Uros Bizjak <ubiz...@gmail.com> wrote:

> Finally, having a post-reload mode-switching pass, we can double-check
> that there are no live SSE registers at vzeroupper insertion point. As
> vzeroupper is only an optimization, we want to play safe and cancel
> vzeroupper insertion in this case
>
> There is no degradation for x86_64 gABI targets, since all SSE
> registers are call-clobbered. Vzeroupper is conditionally inserted
> just before call insn, where all registers are saved to stack and
> already dead. The vzeroupper at function exit is not problematic.

Patch was committed to mainline SVN with the following ChangeLog:

2012-11-16  Uros Bizjak  <ubiz...@gmail.com>

        * config/i386/i386-protos.h (ix86_emit_mode_set): Add third argument.
        * config/i386/i386.h (EMIT_MODE_SET): Update.
        * config/i386/i386.c (ix86_avx_emit_vzeroupper): New function.
        (ix86_emit_mode_set) <AVX_U128>: Call ix86_avx_emit_vzeroupper.

Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32},
configured with --with-arch=corei7-avx --with-tune=corei7-avx.

Uros.
Index: i386-protos.h
===================================================================
--- i386-protos.h       (revision 193549)
+++ i386-protos.h       (working copy)
@@ -172,8 +172,11 @@
 extern int ix86_mode_after (int, int, rtx);
 extern int ix86_mode_entry (int);
 extern int ix86_mode_exit (int);
-extern void ix86_emit_mode_set (int, int);
 
+#ifdef HARD_CONST
+extern void ix86_emit_mode_set (int, int, HARD_REG_SET);
+#endif
+
 extern void x86_order_regs_for_local_alloc (void);
 extern void x86_function_profiler (FILE *, int);
 extern void x86_emit_floatuns (rtx [2]);
Index: i386.c
===================================================================
--- i386.c      (revision 193549)
+++ i386.c      (working copy)
@@ -15477,16 +15477,38 @@
   emit_move_insn (new_mode, reg);
 }
 
+/* Emit vzeroupper.  */
+
+void
+ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
+{
+  int i;
+
+  /* Cancel automatic vzeroupper insertion if there are
+     live call-saved SSE registers at the insertion point.  */
+
+  for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
+    if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
+      return;
+
+  if (TARGET_64BIT)
+    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+      if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
+       return;
+
+  emit_insn (gen_avx_vzeroupper ());
+}
+
 /* Generate one or more insns to set ENTITY to MODE.  */
 
 void
-ix86_emit_mode_set (int entity, int mode)
+ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
 {
   switch (entity)
     {
     case AVX_U128:
       if (mode == AVX_U128_CLEAN)
-       emit_insn (gen_avx_vzeroupper ());
+       ix86_avx_emit_vzeroupper (regs_live);
       break;
     case I387_TRUNC:
     case I387_FLOOR:
Index: i386.h
===================================================================
--- i386.h      (revision 193549)
+++ i386.h      (working copy)
@@ -2226,7 +2226,7 @@
    are to be inserted.  */
 
 #define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
-  ix86_emit_mode_set ((ENTITY), (MODE))
+  ix86_emit_mode_set ((ENTITY), (MODE), (HARD_REGS_LIVE))
 
 /* Avoid renaming of stack registers, as doing so in combination with
    scheduling just increases amount of live registers at time and in

Reply via email to