This avoids spilling SSE registers to memory just because we access
vector components in a C array way.  The trick is to simply rewrite
those accesses to proper vector selects on the tree level and promote
the vector to SSA form.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2011-03-15  Richard Guenther  <rguent...@suse.de>

        PR tree-optimization/48037
        * tree-ssa.c (maybe_rewrite_mem_ref_base): Rewrite vector
        selects into BIT_FIELD_REFs.
        (non_rewritable_mem_ref_base): Check if a MEM_REF is a
        vector select.

        * gcc.target/i386/pr48037-1.c: New testcase.

Index: gcc/tree-ssa.c
===================================================================
*** gcc/tree-ssa.c      (revision 170776)
--- gcc/tree-ssa.c      (working copy)
*************** maybe_rewrite_mem_ref_base (tree *tp)
*** 1838,1855 ****
      tp = &TREE_OPERAND (*tp, 0);
    if (TREE_CODE (*tp) == MEM_REF
        && TREE_CODE (TREE_OPERAND (*tp, 0)) == ADDR_EXPR
-       && integer_zerop (TREE_OPERAND (*tp, 1))
        && (sym = TREE_OPERAND (TREE_OPERAND (*tp, 0), 0))
        && DECL_P (sym)
        && !TREE_ADDRESSABLE (sym)
        && symbol_marked_for_renaming (sym))
      {
!       if (!useless_type_conversion_p (TREE_TYPE (*tp),
!                                     TREE_TYPE (sym)))
!       *tp = build1 (VIEW_CONVERT_EXPR,
!                       TREE_TYPE (*tp), sym);
!       else
!       *tp = sym;
      }
  }
  
--- 1838,1869 ----
      tp = &TREE_OPERAND (*tp, 0);
    if (TREE_CODE (*tp) == MEM_REF
        && TREE_CODE (TREE_OPERAND (*tp, 0)) == ADDR_EXPR
        && (sym = TREE_OPERAND (TREE_OPERAND (*tp, 0), 0))
        && DECL_P (sym)
        && !TREE_ADDRESSABLE (sym)
        && symbol_marked_for_renaming (sym))
      {
!       if (TREE_CODE (TREE_TYPE (sym)) == VECTOR_TYPE
!         && useless_type_conversion_p (TREE_TYPE (*tp),
!                                       TREE_TYPE (TREE_TYPE (sym)))
!         && multiple_of_p (sizetype, TREE_OPERAND (*tp, 1),
!                           TYPE_SIZE_UNIT (TREE_TYPE (*tp))))
!       {
!         *tp = build3 (BIT_FIELD_REF, TREE_TYPE (*tp), sym, 
!                       TYPE_SIZE (TREE_TYPE (*tp)),
!                       int_const_binop (MULT_EXPR,
!                                        bitsize_int (BITS_PER_UNIT),
!                                        TREE_OPERAND (*tp, 1), 0));
!       }
!       else if (integer_zerop (TREE_OPERAND (*tp, 1)))
!       {
!         if (!useless_type_conversion_p (TREE_TYPE (*tp),
!                                         TREE_TYPE (sym)))
!           *tp = build1 (VIEW_CONVERT_EXPR,
!                         TREE_TYPE (*tp), sym);
!         else
!           *tp = sym;
!       }
      }
  }
  
*************** non_rewritable_mem_ref_base (tree ref)
*** 1869,1879 ****
      base = TREE_OPERAND (base, 0);
  
    /* But watch out for MEM_REFs we cannot lower to a
!      VIEW_CONVERT_EXPR.  */
    if (TREE_CODE (base) == MEM_REF
        && TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR)
      {
        tree decl = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
        if (DECL_P (decl)
          && (!integer_zerop (TREE_OPERAND (base, 1))
              || (DECL_SIZE (decl)
--- 1883,1900 ----
      base = TREE_OPERAND (base, 0);
  
    /* But watch out for MEM_REFs we cannot lower to a
!      VIEW_CONVERT_EXPR or a BIT_FIELD_REF.  */
    if (TREE_CODE (base) == MEM_REF
        && TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR)
      {
        tree decl = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
+       if (TREE_CODE (TREE_TYPE (decl)) == VECTOR_TYPE
+         && useless_type_conversion_p (TREE_TYPE (base),
+                                       TREE_TYPE (TREE_TYPE (decl)))
+         && double_int_fits_in_uhwi_p (mem_ref_offset (base))
+         && multiple_of_p (sizetype, TREE_OPERAND (base, 1),
+                           TYPE_SIZE_UNIT (TREE_TYPE (base))))
+       return NULL_TREE;
        if (DECL_P (decl)
          && (!integer_zerop (TREE_OPERAND (base, 1))
              || (DECL_SIZE (decl)
Index: gcc/testsuite/gcc.target/i386/pr48037-1.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr48037-1.c   (revision 0)
--- gcc/testsuite/gcc.target/i386/pr48037-1.c   (revision 0)
***************
*** 0 ****
--- 1,15 ----
+ /* { dg-do compile } */
+ /* { dg-require-effective-target lp64 } */
+ /* { dg-options "-O -fno-math-errno" } */
+ 
+ typedef double __m128d __attribute__((vector_size(16)));
+ __m128d vsqrt1 (__m128d const x)
+ {
+   double const* __restrict__ const y = (double const*)&x;
+   double const a = __builtin_sqrt(y[0]);
+   double const b = __builtin_sqrt(y[1]);
+   return (__m128d) { a, b };
+ }
+ 
+ /* Verify we do not spill x to the stack.  */
+ /* { dg-final { scan-assembler-not "%rsp" } } */

Reply via email to