Hi:
For rtx like
(vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
(parallel [(const_int 0) (const_int 1)]))
it could be simplified as inner.
Bootstrap is ok, regression test on i386 backend is ok.
gcc/ChangeLog
PR rtl-optimization/97249
* simplify-rtx.c (simplify_binary_operation_1): Simplify
vec_select of paradoxical subreg.
gcc/testsuite/ChangeLog
* gcc.target/i386/pr97249-1.c: New test.
--
BR,
Hongtao
From c00369aa36d2e169b59287c58872c915953dd2a2 Mon Sep 17 00:00:00 2001
From: liuhongt <[email protected]>
Date: Tue, 13 Oct 2020 15:35:29 +0800
Subject: [PATCH] Simplify vec_select of paradoxical subreg.
For rtx like
(vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
(parallel [(const_int 0) (const_int 1)]))
it could be simplified as inner.
gcc/ChangeLog
PR rtl-optimization/97249
* simplify-rtx.c (simplify_binary_operation_1): Simplify
vec_select of paradoxical subreg.
gcc/testsuite/ChangeLog
* gcc.target/i386/pr97249-1.c: New test.
---
gcc/simplify-rtx.c | 27 ++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr97249-1.c | 30 +++++++++++++++++++++++
2 files changed, 57 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr97249-1.c
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 869f0d11b2e..9c397157f28 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -4170,6 +4170,33 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
return subop1;
}
}
+
+ /* For cases like
+ (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
+ (parallel [(const_int 0) (const_int 1)])).
+ return inner directly. */
+ if (GET_CODE (trueop0) == SUBREG
+ && paradoxical_subreg_p (trueop0)
+ && mode == GET_MODE (XEXP (trueop0, 0))
+ && (GET_MODE_NUNITS (GET_MODE (trueop0))).is_constant (&l0)
+ && (GET_MODE_NUNITS (mode)).is_constant (&l1)
+ && l0 % l1 == 0)
+ {
+ gcc_assert (known_eq (XVECLEN (trueop1, 0), l1));
+ unsigned HOST_WIDE_INT expect = (HOST_WIDE_INT_1U << l1) - 1;
+ unsigned HOST_WIDE_INT sel = 0;
+ int i = 0;
+ for (;i != l1; i++)
+ {
+ rtx j = XVECEXP (trueop1, 0, i);
+ if (!CONST_INT_P (j))
+ break;
+ sel |= HOST_WIDE_INT_1U << UINTVAL (j);
+ }
+ /* ??? Need to simplify XEXP (trueop0, 0) here. */
+ if (sel == expect)
+ return XEXP (trueop0, 0);
+ }
}
if (XVECLEN (trueop1, 0) == 1
diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c
new file mode 100644
index 00000000000..bc34aa8baa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c
@@ -0,0 +1,30 @@
+/* PR target/97249 */
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3 -masm=att" } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+
+void
+foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
+{
+ for (int i = 0 ; i != 8; i++)
+ p3[i] = p1[i] + p2[i];
+ return;
+}
+
+void
+foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
+{
+ for (int i = 0 ; i != 4; i++)
+ p3[i] = p1[i] + p2[i];
+ return;
+}
+
+void
+foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3)
+{
+ for (int i = 0 ; i != 2; i++)
+ p3[i] = (long long)p1[i] + (long long)p2[i];
+ return;
+}
--
2.18.1
From c00369aa36d2e169b59287c58872c915953dd2a2 Mon Sep 17 00:00:00 2001
From: liuhongt <[email protected]>
Date: Tue, 13 Oct 2020 15:35:29 +0800
Subject: [PATCH] Simplify vec_select of paradoxical subreg.
For rtx like
(vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
(parallel [(const_int 0) (const_int 1)]))
it could be simplified as inner.
gcc/ChangeLog
PR rtl-optimization/97249
* simplify-rtx.c (simplify_binary_operation_1): Simplify
vec_select of paradoxical subreg.
gcc/testsuite/ChangeLog
* gcc.target/i386/pr97249-1.c: New test.
---
gcc/simplify-rtx.c | 27 ++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr97249-1.c | 30 +++++++++++++++++++++++
2 files changed, 57 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr97249-1.c
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 869f0d11b2e..9c397157f28 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -4170,6 +4170,33 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
return subop1;
}
}
+
+ /* For cases like
+ (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
+ (parallel [(const_int 0) (const_int 1)])).
+ return inner directly. */
+ if (GET_CODE (trueop0) == SUBREG
+ && paradoxical_subreg_p (trueop0)
+ && mode == GET_MODE (XEXP (trueop0, 0))
+ && (GET_MODE_NUNITS (GET_MODE (trueop0))).is_constant (&l0)
+ && (GET_MODE_NUNITS (mode)).is_constant (&l1)
+ && l0 % l1 == 0)
+ {
+ gcc_assert (known_eq (XVECLEN (trueop1, 0), l1));
+ unsigned HOST_WIDE_INT expect = (HOST_WIDE_INT_1U << l1) - 1;
+ unsigned HOST_WIDE_INT sel = 0;
+ int i = 0;
+ for (;i != l1; i++)
+ {
+ rtx j = XVECEXP (trueop1, 0, i);
+ if (!CONST_INT_P (j))
+ break;
+ sel |= HOST_WIDE_INT_1U << UINTVAL (j);
+ }
+ /* ??? Need to simplify XEXP (trueop0, 0) here. */
+ if (sel == expect)
+ return XEXP (trueop0, 0);
+ }
}
if (XVECLEN (trueop1, 0) == 1
diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c
new file mode 100644
index 00000000000..bc34aa8baa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c
@@ -0,0 +1,30 @@
+/* PR target/97249 */
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3 -masm=att" } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+
+void
+foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
+{
+ for (int i = 0 ; i != 8; i++)
+ p3[i] = p1[i] + p2[i];
+ return;
+}
+
+void
+foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
+{
+ for (int i = 0 ; i != 4; i++)
+ p3[i] = p1[i] + p2[i];
+ return;
+}
+
+void
+foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3)
+{
+ for (int i = 0 ; i != 2; i++)
+ p3[i] = (long long)p1[i] + (long long)p2[i];
+ return;
+}
--
2.18.1