Hi!

This is the last pattern I'm aware of that didn't have any v/Yv constraints
that ought to be changed (there perhaps are others which have v/Yv in some
of the alternatives, but not in all the ones that could use it).

The testcases show what are the changes useful for.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-06-01  Jakub Jelinek  <ja...@redhat.com>

        * config/i386/sse.md (*vec_concatv2di): Add x86_avx512dq v=Yv,rm
        alternative.  Change x=xm,C alternative to v=vm,C, x=x,x alternative
        to v=Yv,Yv and x=x,m to v=v,m.  Use maybe_evex prefix attribute
        instead of vex for the last two above mentioned alternatives.

        * gcc.target/i386/avx512dq-concatv2di-1.c: New test.
        * gcc.target/i386/avx512vl-concatv2di-1.c: New test.
        * gcc.target/i386/sse2-init-v2di-2.c: Adjust expected vec_concatv2di
        alternative number.

--- gcc/config/i386/sse.md.jj   2016-06-01 14:17:18.000000000 +0200
+++ gcc/config/i386/sse.md      2016-06-01 18:11:35.058942131 +0200
@@ -13567,17 +13567,18 @@ (define_insn "*vec_concatv4si"
 ;; movd instead of movq is required to handle broken assemblers.
 (define_insn "vec_concatv2di"
   [(set (match_operand:V2DI 0 "register_operand"
-         "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
+         "=Yr,*x,x ,v ,Yi,v ,!x,x,v ,x,x,v")
        (vec_concat:V2DI
          (match_operand:DI 1 "nonimmediate_operand"
-         "  0, 0,x ,r ,xm,*y,0,x,0,0,x")
+         "  0, 0,x ,Yv,r ,vm,*y,0,Yv,0,0,v")
          (match_operand:DI 2 "vector_move_operand"
-         "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
+         "*rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
   "TARGET_SSE"
   "@
    pinsrq\t{$1, %2, %0|%0, %2, 1}
    pinsrq\t{$1, %2, %0|%0, %2, 1}
    vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
+   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
    * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : 
\"%vmovd\t{%1, %0|%0, %1}\";
    %vmovq\t{%1, %0|%0, %1}
    movq2dq\t{%1, %0|%0, %1}
@@ -13592,40 +13593,46 @@ (define_insn "vec_concatv2di"
            (eq_attr "alternative" "2")
              (const_string "x64_avx")
            (eq_attr "alternative" "3")
+             (const_string "x64_avx512dq")
+           (eq_attr "alternative" "4")
              (const_string "x64")
-           (eq_attr "alternative" "4,5")
+           (eq_attr "alternative" "5,6")
              (const_string "sse2")
-           (eq_attr "alternative" "6")
+           (eq_attr "alternative" "7")
              (const_string "sse2_noavx")
-           (eq_attr "alternative" "7,10")
+           (eq_attr "alternative" "8,11")
              (const_string "avx")
           ]
           (const_string "noavx")))
    (set (attr "type")
      (if_then_else
-       (eq_attr "alternative" "0,1,2,6,7")
+       (eq_attr "alternative" "0,1,2,3,7,8")
        (const_string "sselog")
        (const_string "ssemov")))
    (set (attr "prefix_rex")
-     (if_then_else (eq_attr "alternative" "0,1,2,3")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
                   (const_string "1")
                   (const_string "*")))
    (set (attr "prefix_extra")
-     (if_then_else (eq_attr "alternative" "0,1,2")
+     (if_then_else (eq_attr "alternative" "0,1,2,3")
                   (const_string "1")
                   (const_string "*")))
    (set (attr "length_immediate")
-     (if_then_else (eq_attr "alternative" "0,1,2")
+     (if_then_else (eq_attr "alternative" "0,1,2,3")
                   (const_string "1")
                   (const_string "*")))
    (set (attr "prefix")
-     (cond [(eq_attr "alternative" "2,7,10")
+     (cond [(eq_attr "alternative" "2")
              (const_string "vex")
-           (eq_attr "alternative" "3,4")
+           (eq_attr "alternative" "3")
+             (const_string "evex")
+           (eq_attr "alternative" "4,5")
              (const_string "maybe_vex")
+           (eq_attr "alternative" "8,11")
+             (const_string "maybe_evex")
           ]
           (const_string "orig")))
-   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
+   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
 
 (define_expand "vec_unpacks_lo_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
--- gcc/testsuite/gcc.target/i386/avx512dq-concatv2di-1.c.jj    2016-06-01 
18:58:33.037699493 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2di-1.c       2016-06-01 
18:53:29.000000000 +0200
@@ -0,0 +1,78 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att -mtune=haswell" } */
+
+typedef long long V __attribute__((vector_size (16)));
+
+void
+f1 (long long x, long long y)
+{
+  register long long a __asm ("xmm16");
+  register V c __asm ("xmm17");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler 
"vpinsrq\[^\n\r]*\\\$1\[^\n\r]*%rsi\[^\n\r]*%xmm16\[^\n\r]*%xmm17" } } */
+
+void
+f2 (long long x, long long *y)
+{
+  register long long a __asm ("xmm18");
+  register V c __asm ("xmm19");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, *y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler 
"vpinsrq\[^\n\r]*\\\$1\[^\n\r]*%\[re]si\[^\n\r]*%xmm18\[^\n\r]*%xmm19" } } */
+
+void
+f3 (long long x)
+{
+  register V a __asm ("xmm20");
+  a = (V) { x, 0 };
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vmov\[dq]\[^\n\r]*%rdi\[^\n\r]*%xmm20" } } */
+
+void
+f4 (long long *x)
+{
+  register V a __asm ("xmm21");
+  a = (V) { *x, 0 };
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vmovq\[^\n\r]*%\[re]di\[^\n\r]*%xmm21" } } */
+
+void
+f5 (long long x)
+{
+  register long long a __asm ("xmm22");
+  register V c __asm ("xmm23");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, 0 };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vmovq\[^\n\r]*%xmm22\[^\n\r]*%xmm23" } } */
+
+void
+f6 (long long x, long long y)
+{
+  register long long a __asm ("xmm24");
+  register long long b __asm ("xmm25");
+  register V c __asm ("xmm26");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  c = (V) { a, b };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler 
"vpunpcklqdq\[^\n\r]*%xmm25\[^\n\r]*%xmm24\[^\n\r]*%xmm26" } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-concatv2di-1.c.jj    2016-06-01 
18:58:45.761535711 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2di-1.c       2016-06-01 
19:04:11.367344524 +0200
@@ -0,0 +1,79 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att -mtune=haswell" } */
+
+typedef long long V __attribute__((vector_size (16)));
+
+void
+f1 (long long x, long long y)
+{
+  register long long a __asm ("xmm16");
+  register V c __asm ("xmm17");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler-not "vpinsrq\[^\n\r]*\[^\n\r]*%xmm1\[6-9]" } } 
*/
+/* { dg-final { scan-assembler-not "vpinsrq\[^\n\r]*\[^\n\r]*%xmm\[23]\[0-9]" 
} } */
+
+void
+f2 (long long x, long long *y)
+{
+  register long long a __asm ("xmm18");
+  register V c __asm ("xmm19");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, *y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler 
"vmovhps\[^\n\r]*%\[re]si\[^\n\r]*%xmm18\[^\n\r]*%xmm19" } } */
+
+void
+f3 (long long x)
+{
+  register V a __asm ("xmm20");
+  a = (V) { x, 0 };
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vmov\[dq]\[^\n\r]*%rdi\[^\n\r]*%xmm20" } } */
+
+void
+f4 (long long *x)
+{
+  register V a __asm ("xmm21");
+  a = (V) { *x, 0 };
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vmovq\[^\n\r]*%\[re]di\[^\n\r]*%xmm21" } } */
+
+void
+f5 (long long x)
+{
+  register long long a __asm ("xmm22");
+  register V c __asm ("xmm23");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, 0 };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vmovq\[^\n\r]*%xmm22\[^\n\r]*%xmm23" } } */
+
+void
+f6 (long long x, long long y)
+{
+  register long long a __asm ("xmm24");
+  register long long b __asm ("xmm25");
+  register V c __asm ("xmm26");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  c = (V) { a, b };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler 
"vpunpcklqdq\[^\n\r]*%xmm25\[^\n\r]*%xmm24\[^\n\r]*%xmm26" } } */
--- gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c.jj 2015-12-31 
01:11:11.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c    2016-06-01 
21:23:02.455281080 +0200
@@ -10,4 +10,4 @@ test (long long b)
   return _mm_cvtsi64_si128 (b); 
 }
 
-/* { dg-final { scan-assembler-times "vec_concatv2di/4" 1 } } */
+/* { dg-final { scan-assembler-times "vec_concatv2di/5" 1 } } */

        Jakub

Reply via email to