Hi,

I previously added POWER9 support for lxvx and stxvx to replace the
load-swap and swap-store patterns for POWER8.  However, I missed the
fact that we have different patterns for loads and stores of 128-bit
floats and other scalars.  This patch expands the previous POWER9
override to catch these cases, and disables those other patterns when P9
vector support is available.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Ok for trunk?

Thanks,
Bill


[gcc]

2015-01-06  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * config/rs6000/vsx.md (*p9_vecload_<mode>): Replace VSX_M
        mode iterator with VSX_M2.
        (*p9_vecstore_<mode>): Likewise.
        (*vsx_le_permute_<mode>): Restrict to !TARGET_P9_VECTOR.
        (*vsx_le_perm_load_<mode> for VSX_LE_128): Likewise.
        (*vsx_le_perm_store_<mode> for VSX_LE_128): Likewise.
        (define_split for VSX_LE128 stores): Likewise.
        (define_peephole2 for TImode LE swaps): Likewise.
        (define_split for VSX_LE128 post-reload stores): Likewise.

[gcc/testsuite]

2015-01-06  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * gcc.target/powerpc/p9-lxvx-stxvx-3.c: New test.


Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md    (revision 232077)
+++ gcc/config/rs6000/vsx.md    (working copy)
@@ -304,8 +304,8 @@
 ;; VSX (P9) moves
 
 (define_insn "*p9_vecload_<mode>"
-  [(set (match_operand:VSX_M 0 "vsx_register_operand" "=<VSa>")
-        (match_operand:VSX_M 1 "memory_operand" "Z"))]
+  [(set (match_operand:VSX_M2 0 "vsx_register_operand" "=<VSa>")
+        (match_operand:VSX_M2 1 "memory_operand" "Z"))]
   "TARGET_P9_VECTOR"
   "lxvx %x0,%y1"
   [(set_attr "type" "vecload")
@@ -312,8 +312,8 @@
    (set_attr "length" "4")])
 
 (define_insn "*p9_vecstore_<mode>"
-  [(set (match_operand:VSX_M 0 "memory_operand" "=Z")
-        (match_operand:VSX_M 1 "vsx_register_operand" "<VSa>"))]
+  [(set (match_operand:VSX_M2 0 "memory_operand" "=Z")
+        (match_operand:VSX_M2 1 "vsx_register_operand" "<VSa>"))]
   "TARGET_P9_VECTOR"
   "stxvx %x1,%y0"
   [(set_attr "type" "vecstore")
@@ -680,7 +680,7 @@
        (rotate:VSX_LE_128
         (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
         (const_int 64)))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "@
    xxpermdi %x0,%x1,%x1,2
    lxvd2x %x0,%y1
@@ -714,9 +714,9 @@
 (define_insn_and_split "*vsx_le_perm_load_<mode>"
   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>")
         (match_operand:VSX_LE_128 1 "memory_operand" "Z"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   [(set (match_dup 2)
        (rotate:VSX_LE_128 (match_dup 1)
                           (const_int 64)))
@@ -735,7 +735,7 @@
 (define_insn "*vsx_le_perm_store_<mode>"
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   [(set_attr "type" "vecstore")
    (set_attr "length" "12")])
@@ -743,7 +743,7 @@
 (define_split
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
   [(set (match_dup 2)
        (rotate:VSX_LE_128 (match_dup 1)
                           (const_int 64)))
@@ -765,7 +765,7 @@
    (set (match_operand:TI 2 "vsx_register_operand" "")
        (rotate:TI (match_dup 0)
                   (const_int 64)))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE && !TARGET_P9_VECTOR
    && (rtx_equal_p (operands[0], operands[2])
        || peep2_reg_dead_p (2, operands[0]))"
    [(set (match_dup 2) (match_dup 1))])
@@ -775,7 +775,7 @@
 (define_split
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
   [(set (match_dup 1)
        (rotate:VSX_LE_128 (match_dup 1)
                           (const_int 64)))
Index: gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-3.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-3.c  (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-3.c  (working copy)
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O3" } */
+/* { dg-final { scan-assembler "lxvx" } } */
+/* { dg-final { scan-assembler "stxvx" } } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* Verify P9 vector loads and stores are used rather than the
+   load-swap/swap-store workarounds for P8.  */
+#define SIZE (16384/sizeof(__float128))
+
+static __float128 x[SIZE] __attribute__ ((aligned (16)));
+static __float128 y[SIZE] __attribute__ ((aligned (16)));
+static __float128 a;
+
+void obfuscate(void *a, ...);
+
+void __attribute__((noinline)) do_one(void)
+{
+  unsigned long i;
+
+  obfuscate(x, y, &a);
+
+  for (i = 0; i < SIZE; i++)
+    y[i] = a * x[i];
+
+  obfuscate(x, y, &a);
+}


Reply via email to