Hi:
  This patch extend change_zero_ext to change illegitimate constant
into constant pool, this will enable simplification of below:

Trying 5 -> 7:
    5: r85:V4SF=[`*.LC0']
      REG_EQUAL const_vector
    7: r84:V4SF=vec_select(vec_concat(r85:V4SF,r85:V4SF),parallel)
      REG_DEAD r85:V4SF
      REG_EQUAL const_vector
Failed to match this instruction:
(set (reg:V4SF 84)
    (const_vector:V4SF [
            (const_double:SF 3.0e+0 [0x0.cp+2])
            (const_double:SF 2.0e+0 [0x0.8p+2])
            (const_double:SF 4.0e+0 [0x0.8p+3])
            (const_double:SF 1.0e+0 [0x0.8p+1])
        ]))

(insn 5 2 7 2 (set (reg:V4SF 85)
        (mem/u/c:V4SF (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S16
A128])) 
1600 {movv4sf_internal}
     (expr_list:REG_EQUAL (const_vector:V4SF [
                (const_double:SF 4.0e+0 [0x0.8p+3])
                (const_double:SF 3.0e+0 [0x0.cp+2])
                (const_double:SF 2.0e+0 [0x0.8p+2])
                (const_double:SF 1.0e+0 [0x0.8p+1])
            ])
        (nil)))
(insn 7 5 11 2 (set (reg:V4SF 84)
        (vec_select:V4SF (vec_concat:V8SF (reg:V4SF 85)
                (reg:V4SF 85))
            (parallel [
                    (const_int 1 [0x1])
                    (const_int 2 [0x2])
                    (const_int 4 [0x4])
                    (const_int 7 [0x7])
                ])))
3015 {sse_shufps_v4sf}
     (expr_list:REG_DEAD (reg:V4SF 85)
        (expr_list:REG_EQUAL (const_vector:V4SF [
                    (const_double:SF 3.0e+0 [0x0.cp+2])
                    (const_double:SF 2.0e+0 [0x0.8p+2])
                    (const_double:SF 4.0e+0 [0x0.8p+3])
                    (const_double:SF 1.0e+0 [0x0.8p+1])
                ])
            (nil))))

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
  Ok for trunk?

gcc/ChangeLog:

        PR rtl-optimization/43147
        * combine.c (recog_for_combine_1): Adjust comments of ..
        (change_zero_ext):.. this, and extend to change illegitimate
        constant into constant pool.

gcc/testsuite/ChangeLog:

        PR rtl-optimization/43147
        * gcc.target/i386/pr43147.c: New test.
        * gcc.target/i386/pr22076.c: Adjust testcase.
---
 gcc/combine.c                           | 20 +++++++++++++++++++-
 gcc/testsuite/gcc.target/i386/pr22076.c |  4 ++--
 gcc/testsuite/gcc.target/i386/pr43147.c | 15 +++++++++++++++
 3 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr43147.c

diff --git a/gcc/combine.c b/gcc/combine.c
index cb5fa401fcb..0b2afdf45af 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -11404,7 +11404,8 @@ recog_for_combine_1 (rtx *pnewpat, rtx_insn *insn, rtx 
*pnotes)
 
 /* Change every ZERO_EXTRACT and ZERO_EXTEND of a SUBREG that can be
    expressed as an AND and maybe an LSHIFTRT, to that formulation.
-   Return whether anything was so changed.  */
+   Return whether anything was so changed.
+   Also change illegitimate constant into memref of constant pool.  */
 
 static bool
 change_zero_ext (rtx pat)
@@ -11417,6 +11418,23 @@ change_zero_ext (rtx pat)
     {
       rtx x = **iter;
       scalar_int_mode mode, inner_mode;
+      machine_mode const_mode = GET_MODE (x);
+
+      /* Change illegitimate constant into memref of constant pool.  */
+      if (CONSTANT_P (x)
+         && !const_vec_duplicate_p (x)
+         && const_mode != BLKmode
+         && GET_CODE (x) != HIGH
+         && GET_MODE_SIZE (const_mode).is_constant ()
+         && !targetm.legitimate_constant_p (const_mode, x)
+         && !targetm.cannot_force_const_mem (const_mode, x))
+       {
+         x = force_const_mem (GET_MODE (x), x);
+         SUBST (**iter, x);
+         changed = true;
+         continue;
+       }
+
       if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
        continue;
       int size;
diff --git a/gcc/testsuite/gcc.target/i386/pr22076.c 
b/gcc/testsuite/gcc.target/i386/pr22076.c
index 427ffcd4920..866c387280f 100644
--- a/gcc/testsuite/gcc.target/i386/pr22076.c
+++ b/gcc/testsuite/gcc.target/i386/pr22076.c
@@ -15,5 +15,5 @@ void test ()
   x = _mm_add_pi8 (mm0, mm1);
 }
 
-/* { dg-final { scan-assembler-times "movq" 2 } } */
-/* { dg-final { scan-assembler-not "movl" { target nonpic } } } */
+/* { dg-final { scan-assembler-times "movq" 2 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movl" 4  { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr43147.c 
b/gcc/testsuite/gcc.target/i386/pr43147.c
new file mode 100644
index 00000000000..3c30f917c06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr43147.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-final { scan-assembler "movaps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+#include <x86intrin.h>
+
+__m128
+foo (void)
+{
+  __m128 m = _mm_set_ps(1.0f, 2.0f, 3.0f, 4.0f);
+  m = _mm_shuffle_ps(m, m, 0xC9);
+  m = _mm_shuffle_ps(m, m, 0x2D);
+  return m;
+}
-- 
2.27.0

Reply via email to