[RTL, i386] Use subreg instead of UNSPEC_CAST

Marc Glisse Tue, 19 Mar 2013 08:48:01 -0700

Hello,

the following patch passes bootstrap+testsuite on x86_64-linux-gnu. Idon't see any particular reason to forbid vector subregs of vectors, sincewe can already do it through a scalar. And not using unspecs helps avoidunnecessary copies.


2013-01-03  Marc Glisse  <marc.gli...@inria.fr>

        PR target/50829
gcc/
        * config/i386/sse.md (enum unspec): Remove UNSPEC_CAST.
        (avx_<castmode><avxsizesuffix>_<castmode>): Use subreg.
        * emit-rtl.c (validate_subreg): Allow vector-vector subregs.

gcc/testsuite/
        * gcc.target/i386/pr50829.c: New file.

--
Marc Glisse

Index: gcc/testsuite/gcc.target/i386/pr50829.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr50829.c     (revision 0)
+++ gcc/testsuite/gcc.target/i386/pr50829.c     (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -mavx" } */
+
+#include <x86intrin.h>
+
+__m256d
+concat (__m128d x)
+{
+  __m256d z = _mm256_castpd128_pd256 (x);
+  return _mm256_insertf128_pd (z, x, 1);
+}
+
+/* { dg-final { scan-assembler-not "vmov" } } */

Property changes on: gcc/testsuite/gcc.target/i386/pr50829.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md      (revision 196633)
+++ gcc/config/i386/sse.md      (working copy)
@@ -66,21 +66,20 @@
   UNSPEC_AESKEYGENASSIST
 
   ;; For PCLMUL support
   UNSPEC_PCLMUL
 
   ;; For AVX support
   UNSPEC_PCMP
   UNSPEC_VPERMIL
   UNSPEC_VPERMIL2
   UNSPEC_VPERMIL2F128
-  UNSPEC_CAST
   UNSPEC_VTESTP
   UNSPEC_VCVTPH2PS
   UNSPEC_VCVTPS2PH
 
   ;; For AVX2 support
   UNSPEC_VPERMVAR
   UNSPEC_VPERMTI
   UNSPEC_GATHER
   UNSPEC_VSIBADDR
 ])
@@ -11089,23 +11088,22 @@
   "TARGET_AVX"
   "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "vex")
    (set_attr "btver2_decode" "vector") 
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
   [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
-       (unspec:AVX256MODE2P
-         [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
-         UNSPEC_CAST))]
+       (subreg:AVX256MODE2P
+         (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x") 0))]
   "TARGET_AVX"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
   rtx op0 = operands[0];
   rtx op1 = operands[1];
   if (REG_P (op0))
     op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
   else
Index: gcc/emit-rtl.c
===================================================================
--- gcc/emit-rtl.c      (revision 196633)
+++ gcc/emit-rtl.c      (working copy)
@@ -707,20 +707,23 @@ validate_subreg (enum machine_mode omode
   else if ((COMPLEX_MODE_P (imode) || VECTOR_MODE_P (imode))
           && GET_MODE_INNER (imode) == omode)
     ;
   /* ??? x86 sse code makes heavy use of *paradoxical* vector subregs,
      i.e. (subreg:V4SF (reg:SF) 0).  This surely isn't the cleanest way to
      represent this.  It's questionable if this ought to be represented at
      all -- why can't this all be hidden in post-reload splitters that make
      arbitrarily mode changes to the registers themselves.  */
   else if (VECTOR_MODE_P (omode) && GET_MODE_INNER (omode) == imode)
     ;
+  else if (VECTOR_MODE_P (omode) && VECTOR_MODE_P (imode)
+          && GET_MODE_INNER (omode) == GET_MODE_INNER (imode))
+    ;
   /* Subregs involving floating point modes are not allowed to
      change size.  Therefore (subreg:DI (reg:DF) 0) is fine, but
      (subreg:SI (reg:DF) 0) isn't.  */
   else if (FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))
     {
       if (! (isize == osize
             /* LRA can use subreg to store a floating point value in
                an integer mode.  Although the floating point and the
                integer modes need the same number of hard registers,
                the size of floating point mode can be less than the

[RTL, i386] Use subreg instead of UNSPEC_CAST

Reply via email to