Hi!

This patch adds vec_pack_sfix_trunc_v4df and vec_pack_sfix_v4df
expanders, which allows e.g. f1 in the testcases to be vectorized using
32-byte vectors, even with -mavx.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-10-27  Jakub Jelinek  <ja...@redhat.com>

        * config/i386/sse.md (avx_cvtpd2dq256_2, avx_cvttpd2dq256_2,
        vec_pack_sfix_trunc_v4df, vec_pack_sfix_v4df): New expanders.
        (*avx_cvtpd2dq256_2, *avx_cvttpd2dq256_2): New insns.

        * gcc.target/i386/sse2-cvt-1.c: New test.
        * gcc.target/i386/sse2-cvt-2.c: New test.
        * gcc.target/i386/avx-cvt-1.c: New test.
        * gcc.target/i386/avx-cvt-2.c: New test.
        * gcc.target/i386/avx2-cvt-1.c: New test.
        * gcc.target/i386/avx2-cvt-2.c: New test.

--- gcc/config/i386/sse.md.jj   2011-10-27 08:42:58.000000000 +0200
+++ gcc/config/i386/sse.md      2011-10-27 14:45:16.000000000 +0200
@@ -2544,6 +2548,27 @@ (define_insn "avx_cvtpd2dq256"
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
+(define_expand "avx_cvtpd2dq256_2"
+  [(set (match_operand:V8SI 0 "register_operand" "")
+       (vec_concat:V8SI
+         (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
+                      UNSPEC_FIX_NOTRUNC)
+         (match_dup 2)))]
+  "TARGET_AVX"
+  "operands[2] = CONST0_RTX (V4SImode);")
+
+(define_insn "*avx_cvtpd2dq256_2"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+       (vec_concat:V8SI
+         (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+                      UNSPEC_FIX_NOTRUNC)
+         (match_operand:V4SI 2 "const0_operand" "")))]
+  "TARGET_AVX"
+  "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
 (define_expand "sse2_cvtpd2dq"
   [(set (match_operand:V4SI 0 "register_operand" "")
        (vec_concat:V4SI
@@ -2584,6 +2609,25 @@ (define_insn "avx_cvttpd2dq256"
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
+(define_expand "avx_cvttpd2dq256_2"
+  [(set (match_operand:V8SI 0 "register_operand" "")
+       (vec_concat:V8SI
+         (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
+         (match_dup 2)))]
+  "TARGET_AVX"
+  "operands[2] = CONST0_RTX (V4SImode);")
+
+(define_insn "*avx_cvttpd2dq256_2"
+  [(set (match_operand:V8SI 0 "register_operand" "=x")
+       (vec_concat:V8SI
+         (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
+         (match_operand:V4SI 2 "const0_operand" "")))]
+  "TARGET_AVX"
+  "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
 (define_expand "sse2_cvttpd2dq"
   [(set (match_operand:V4SI 0 "register_operand" "")
        (vec_concat:V4SI
@@ -3027,6 +3075,23 @@ (define_expand "vec_pack_trunc_v2df"
   DONE;
 })
 
+(define_expand "vec_pack_sfix_trunc_v4df"
+  [(match_operand:V8SI 0 "register_operand" "")
+   (match_operand:V4DF 1 "nonimmediate_operand" "")
+   (match_operand:V4DF 2 "nonimmediate_operand" "")]
+  "TARGET_AVX"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V8SImode);
+  r2 = gen_reg_rtx (V8SImode);
+
+  emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1]));
+  emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2]));
+  emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
+  DONE;
+})
+
 (define_expand "vec_pack_sfix_trunc_v2df"
   [(match_operand:V4SI 0 "register_operand" "")
    (match_operand:V2DF 1 "nonimmediate_operand" "")
@@ -3046,6 +3111,23 @@ (define_expand "vec_pack_sfix_trunc_v2df
   DONE;
 })
 
+(define_expand "vec_pack_sfix_v4df"
+  [(match_operand:V8SI 0 "register_operand" "")
+   (match_operand:V4DF 1 "nonimmediate_operand" "")
+   (match_operand:V4DF 2 "nonimmediate_operand" "")]
+  "TARGET_AVX"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V8SImode);
+  r2 = gen_reg_rtx (V8SImode);
+
+  emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1]));
+  emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2]));
+  emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
+  DONE;
+})
+
 (define_expand "vec_pack_sfix_v2df"
   [(match_operand:V4SI 0 "register_operand" "")
    (match_operand:V2DF 1 "nonimmediate_operand" "")
--- gcc/testsuite/gcc.target/i386/sse2-cvt-1.c.jj       2011-10-27 
13:48:44.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-cvt-1.c  2011-10-27 14:10:42.000000000 
+0200
@@ -0,0 +1,111 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -msse2 -mno-avx" } */
+/* { dg-require-effective-target sse2 } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#define N 16
+float f[N];
+double d[N];
+int n[N];
+
+__attribute__((noinline)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    n[i] = d[i];
+}
+
+__attribute__((noinline)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    f[i] = n[i];
+}
+
+__attribute__((noinline)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = f[i];
+}
+
+__attribute__((noinline)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    n[i] = f[i];
+}
+
+__attribute__((noinline)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = n[i];
+}
+
+__attribute__((noinline)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    f[i] = d[i];
+}
+
+static void
+TEST ()
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      asm ("");
+      d[i] = i + 2.5;
+    }
+  f1 ();
+  for (i = 0; i < N; i++)
+    if (n[i] != i + 2)
+      abort ();
+    else
+      n[i] = i + 7;
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (f[i] != i + 7)
+      abort ();
+    else
+      f[i] = i - 2.25f;
+  f3 ();
+  for (i = 0; i < N; i++)
+    if (d[i] != i - 2.25)
+      abort ();
+    else
+      f[i] = i + 3.5;
+  f4 ();
+  for (i = 0; i < N; i++)
+    if (n[i] != i + 3)
+      abort ();
+    else
+      n[i] = i + 9;
+  f5 ();
+  for (i = 0; i < N; i++)
+    if (d[i] != i + 9)
+      abort ();
+    else
+      d[i] = i - 7.25;
+  f6 ();
+  for (i = 0; i < N; i++)
+    if (f[i] != i - 7.25)
+      abort ();
+}
--- gcc/testsuite/gcc.target/i386/sse2-cvt-2.c.jj       2011-10-27 
14:12:38.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-cvt-2.c  2011-10-27 14:14:05.000000000 
+0200
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+
+#include "sse2-cvt-1.c"
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 6 
"vect" } } */
+/* { dg-final { scan-assembler "cvttpd2dq" } } */
+/* { dg-final { scan-assembler "cvtdq2ps" } } */
+/* { dg-final { scan-assembler "cvtps2pd" } } */
+/* { dg-final { scan-assembler "cvttps2dq" } } */
+/* { dg-final { scan-assembler "cvtdq2pd" } } */
+/* { dg-final { scan-assembler "cvtpd2ps" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.target/i386/avx-cvt-1.c.jj        2011-10-27 
13:49:36.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx-cvt-1.c   2011-10-27 14:08:49.000000000 
+0200
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx -mno-avx2" } */
+/* { dg-require-effective-target avx_runtime } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include "sse2-cvt-1.c"
--- gcc/testsuite/gcc.target/i386/avx-cvt-2.c.jj        2011-10-27 
13:54:52.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx-cvt-2.c   2011-10-27 14:11:08.000000000 
+0200
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx -mno-avx2 -fdump-tree-vect-details" } */
+
+#include "avx-cvt-1.c"
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 6 
"vect" } } */
+/* { dg-final { scan-assembler 
"vcvttpd2dq(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */
+/* { dg-final { scan-assembler "vcvtdq2ps\[^\n\r\]*xmm" } } */
+/* { dg-final { scan-assembler 
"vcvtps2pd\[^\n\r\]*(%xmm\[^\n\r\]*%ymm|ymm\[^\n\r\]*xmm)" } } */
+/* { dg-final { scan-assembler "vcvttps2dq\[^\n\r\]*ymm" } } */
+/* { dg-final { scan-assembler "vcvtdq2pd\[^\n\r\]*xmm\[^\n\r\]*xmm" } } */
+/* { dg-final { scan-assembler 
"vcvtpd2ps(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.target/i386/avx2-cvt-1.c.jj       2011-10-27 
13:50:58.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-cvt-1.c  2011-10-27 14:08:44.000000000 
+0200
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx2" } */
+/* { dg-require-effective-target avx2 } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx2_test
+#endif
+
+#include "sse2-cvt-1.c"
--- gcc/testsuite/gcc.target/i386/avx2-cvt-2.c.jj       2011-10-27 
14:11:33.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-cvt-2.c  2011-10-27 14:15:10.000000000 
+0200
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
+
+#include "avx2-cvt-1.c"
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 6 
"vect" } } */
+/* { dg-final { scan-assembler 
"vcvttpd2dq(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */
+/* { dg-final { scan-assembler "vcvtdq2ps\[^\n\r\]*ymm" } } */
+/* { dg-final { scan-assembler 
"vcvtps2pd\[^\n\r\]*(%xmm\[^\n\r\]*%ymm|ymm\[^\n\r\]*xmm)" } } */
+/* { dg-final { scan-assembler "vcvttps2dq\[^\n\r\]*ymm" } } */
+/* { dg-final { scan-assembler 
"vcvtdq2pd\[^\n\r\]*(%xmm\[^\n\r\]*%ymm|ymm\[^\n\r\]*xmm)" } } */
+/* { dg-final { scan-assembler 
"vcvtpd2ps(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

        Jakub

Reply via email to