swap: Fix incorrect lane extraction by vec_extract() [PR106770]

In the routine rs6000_analyze_swaps(), special handling of swappable
instructions is done even if the webs that contain the swappable
instructions are not optimized, i.e., the webs do not contain any
permuting load/store instructions along with the associated register
swap instructions. Doing special handling in such webs will result in
the extracted lane being adjusted unnecessarily for vec_extract.

Modifying swappable instructions is also incorrect in webs where
loads/stores on quad word aligned addresses are changed to lvx/stvx.
Similarly, in webs where swap(load(vector constant)) instructions are
replaced with load(swapped vector constant), the swappable
instructions should not be modified.

2023-01-04  Surya Kumari Jangala  <jskum...@linux.ibm.com>

gcc/
        PR rtl-optimization/106770
        * rs6000-p8swap.cc (rs6000_analyze_swaps): .

gcc/testsuite/
        PR rtl-optimization/106770
        * gcc.target/powerpc/pr106770.c: New test.
---

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
b/gcc/config/rs6000/rs6000-p8swap.cc
index 19fbbfb67dc..7ed39251df9 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -179,6 +179,9 @@ class swap_web_entry : public web_entry_base
   unsigned int special_handling : 4;
   /* Set if the web represented by this entry cannot be optimized.  */
   unsigned int web_not_optimizable : 1;
+  /* Set if the web represented by this entry has been optimized, ie,
+     register swaps of permuting loads/stores have been removed.  */
+  unsigned int web_is_optimized : 1;
   /* Set if this insn should be deleted.  */
   unsigned int will_delete : 1;
 };
@@ -2627,22 +2630,43 @@ rs6000_analyze_swaps (function *fun)
   /* For each load and store in an optimizable web (which implies
      the loads and stores are permuting), find the associated
      register swaps and mark them for removal.  Due to various
-     optimizations we may mark the same swap more than once.  Also
-     perform special handling for swappable insns that require it.  */
+     optimizations we may mark the same swap more than once. Fix up
+     the non-permuting loads and stores by converting them into
+     permuting ones.  */
   for (i = 0; i < e; ++i)
     if ((insn_entry[i].is_load || insn_entry[i].is_store)
        && insn_entry[i].is_swap)
       {
        swap_web_entry* root_entry
          = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
-       if (!root_entry->web_not_optimizable)
+       if (!root_entry->web_not_optimizable) {
          mark_swaps_for_removal (insn_entry, i);
+          root_entry->web_is_optimized = true;
+        }
       }
-    else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
+    else if (insn_entry[i].is_swappable
+             && (insn_entry[i].special_handling == SH_NOSWAP_LD ||
+                 insn_entry[i].special_handling == SH_NOSWAP_ST))
+      {
+        swap_web_entry* root_entry
+          = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
+        if (!root_entry->web_not_optimizable) {
+          handle_special_swappables (insn_entry, i);
+          root_entry->web_is_optimized = true;
+        }
+      }
+
+  /* Perform special handling for swappable insns that require it. 
+     Note that special handling should be done only for those 
+     swappable insns that are present in webs optimized above.  */
+  for (i = 0; i < e; ++i)
+    if (insn_entry[i].is_swappable && insn_entry[i].special_handling &&
+        !(insn_entry[i].special_handling == SH_NOSWAP_LD || 
+          insn_entry[i].special_handling == SH_NOSWAP_ST))
       {
        swap_web_entry* root_entry
          = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
-       if (!root_entry->web_not_optimizable)
+       if (root_entry->web_is_optimized)
          handle_special_swappables (insn_entry, i);
       }
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106770.c 
b/gcc/testsuite/gcc.target/powerpc/pr106770.c
new file mode 100644
index 00000000000..84e9aead975
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106770.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O3 " } */
+/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */
+
+/* Test case to resolve PR106770  */
+
+#include <altivec.h>
+
+int cmp2(double a, double b)
+{
+    vector double va = vec_promote(a, 1);
+    vector double vb = vec_promote(b, 1);
+    vector long long vlt = (vector long long)vec_cmplt(va, vb);
+    vector long long vgt = (vector long long)vec_cmplt(vb, va);
+    vector signed long long vr = vec_sub(vlt, vgt);
+
+    return vec_extract(vr, 1);
+}
+

Reply via email to