> I think this is better, but I'd make this partial_store_elidable_p only and 
> for
> loads replace with 'else'?  Alternatively rename it to
> partial_load_store_all_lanes_masked_p
> or so?  Btw, I see we're oddly rejecting any mask != -1 even when len
> == 0?  Likewise
> we don't seem to treat mask == 0 the same as len == 0?

Yes, I also noticed the inconsistencies.  At first I wanted to change as little 
as possible but maybe another approach is also still ok:

What we're still missing to treat things more uniformly is an else operand for 
len_load.  Then we can use internal_fn_else_index for all partial loads and use 
its value.

So I went ahead and did that in a separate preparation patch.

The attached v3 of the elision patch goes a bit further in than v1/v2 in it 
tries to classify "all active", "all inactive", and "mixed" and also checks all 
partial loads (like gathers, lanes).  It depends on the preparation patch, 
though, because it calls internal_fn_else_index unconditionally.

They have been bootstrapped and regtested individually as well as together on 
x86 and power10.  Regtested on riscv64, aarch64, and s390 (via qemu).

Regards
 Robin


[PATCH v3] fold: Elide MASK_LEN_LOAD/STORE with zero length [PR122635].

This patch adds zero-length handling to gimple_fold_partial_store and
gimple_fold_partial_load and unifies them into
gimple_fold_partial_load_store.

It introduces a new function partial_load_store_mask_state that
returns
 MASK_ALL_INACTIVE,
 MASK_ALL_ACTIVE, or
 MASK_UNKNOWN.

This result is used to either replace a load with its else value and
elide a store (when all inactive), turn the load/store into a regular
mem ref (all_active), or do nothing.

        PR tree-optimization/122635

gcc/ChangeLog:

        * gimple-fold.cc (enum mask_load_store_state): New enum.
        (gimple_fold_partial_load_store_mem_ref): Only fold
        "all active" loads/stores.
        (partial_load_store_mask_state): New function to compute mask
        state.
        (gimple_fold_partial_load): Remove.
        (gimple_fold_partial_load_store): New function.
        (gimple_fold_partial_store): Remove.
        (gimple_fold_call): Use new function.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/pfalse-store.c: Expect more elided
        stores.
        * gcc.target/riscv/rvv/autovec/pr122635-1.c: New test.
        * gcc.target/riscv/rvv/autovec/pr122635-2.c: New test.
        * gcc.target/powerpc/p9-vec-length-epil-8.c: Expect two lxvl
        less.
---
 gcc/gimple-fold.cc                            | 225 ++++++++++++------
 .../gcc.target/aarch64/sve/pfalse-store.c     |   5 +-
 .../gcc.target/powerpc/p9-vec-length-epil-8.c |   2 +-
 .../gcc.target/riscv/rvv/autovec/pr122635-1.c |  20 ++
 .../gcc.target/riscv/rvv/autovec/pr122635-2.c |  18 ++
 5 files changed, 198 insertions(+), 72 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 3fc76313622..a7b5d8352e7 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -5757,50 +5757,112 @@ arith_overflowed_p (enum tree_code code, const_tree 
type,
   return wi::min_precision (wres, sign) > TYPE_PRECISION (type);
 }
 
-/* If IFN_{MASK,LEN,MASK_LEN}_LOAD/STORE call CALL is unconditional,
-   return a MEM_REF for the memory it references, otherwise return null.
-   VECTYPE is the type of the memory vector.  MASK_P indicates it's for
-   MASK if true, otherwise it's for LEN.  */
+/* Mask state for partial load/store operations (mask and length).  */
+enum mask_load_store_state {
+  MASK_ALL_INACTIVE,  /* All lanes/elements are inactive (can be elided).  */
+  MASK_ALL_ACTIVE,    /* All lanes/elements are active (unconditional).  */
+  MASK_UNKNOWN
+};
 
-static tree
-gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype, bool mask_p)
+/* Check the mask/length state of IFN_{MASK,LEN,MASK_LEN}_LOAD/STORE call CALL.
+   Returns whether all elements are active, all inactive, or mixed.
+   VECTYPE is the vector type of the operation.  */
+
+static enum mask_load_store_state
+partial_load_store_mask_state (gcall *call, tree vectype)
 {
-  tree ptr = gimple_call_arg (call, 0);
-  tree alias_align = gimple_call_arg (call, 1);
-  if (!tree_fits_uhwi_p (alias_align))
-    return NULL_TREE;
+  internal_fn ifn = gimple_call_internal_fn (call);
+  int mask_index = internal_fn_mask_index (ifn);
+  int len_index = internal_fn_len_index (ifn);
+
+  /* Extract length and mask arguments up front.  */
+  tree len = len_index != -1 ? gimple_call_arg (call, len_index) : NULL_TREE;
+  tree bias = len ? gimple_call_arg (call, len_index + 1) : NULL_TREE;
+  tree mask = mask_index != -1 ? gimple_call_arg (call, mask_index) : 
NULL_TREE;
+
+  poly_int64 nelts = GET_MODE_NUNITS (TYPE_MODE (vectype));
 
-  if (mask_p)
+  poly_widest_int wlen = -1;
+  bool full_length_p = !len;  /* No length means full length.  */
+
+  /* Compute effective length.  */
+  if (len && poly_int_tree_p (len))
     {
-      tree mask = gimple_call_arg (call, 2);
-      if (!integer_all_onesp (mask))
-       return NULL_TREE;
+      gcc_assert (TREE_CODE (bias) == INTEGER_CST);
+      wlen = wi::to_poly_widest (len) + wi::to_widest (bias);
+
+      if (known_eq (wlen, 0))
+       return MASK_ALL_INACTIVE;
+
+      if (known_eq (wlen, nelts))
+       full_length_p = true;
+      else
+       full_length_p = false;
     }
-  else
+
+  /* Check mask for early return cases.  */
+  if (mask)
     {
-      internal_fn ifn = gimple_call_internal_fn (call);
-      int len_index = internal_fn_len_index (ifn);
-      tree basic_len = gimple_call_arg (call, len_index);
-      if (!poly_int_tree_p (basic_len))
-       return NULL_TREE;
-      tree bias = gimple_call_arg (call, len_index + 1);
-      gcc_assert (TREE_CODE (bias) == INTEGER_CST);
-      /* For LEN_LOAD/LEN_STORE/MASK_LEN_LOAD/MASK_LEN_STORE,
-        we don't fold when (bias + len) != VF.  */
-      if (maybe_ne (wi::to_poly_widest (basic_len) + wi::to_widest (bias),
-                   GET_MODE_NUNITS (TYPE_MODE (vectype))))
-       return NULL_TREE;
+      if (integer_zerop (mask))
+       return MASK_ALL_INACTIVE;
+
+      if (full_length_p && integer_all_onesp (mask))
+       return MASK_ALL_ACTIVE;
+    }
+  else if (full_length_p)
+    /* No mask and full length means all active.  */
+    return MASK_ALL_ACTIVE;
+
+  /* For VLA vectors, we can't do much more.  */
+  if (!nelts.is_constant ())
+    return MASK_UNKNOWN;
+
+  /* Same for VLS vectors with non-constant mask.  */
+  if (mask && TREE_CODE (mask) != VECTOR_CST)
+    return MASK_UNKNOWN;
 
-      /* For MASK_LEN_{LOAD,STORE}, we should also check whether
-         the mask is all ones mask.  */
-      if (ifn == IFN_MASK_LEN_LOAD || ifn == IFN_MASK_LEN_STORE)
+  /* Check VLS vector elements.  */
+  gcc_assert (wlen.is_constant ());
+
+  HOST_WIDE_INT active_len = wlen.to_constant ().to_shwi ();
+  if (active_len == -1)
+    active_len = nelts.to_constant ();
+
+  /* Check if all elements in the active range match the mask.  */
+  for (HOST_WIDE_INT i = 0; i < active_len; i++)
+    {
+      bool elt_active = !mask || !integer_zerop (vector_cst_elt (mask, i));
+      if (!elt_active)
        {
-         tree mask = gimple_call_arg (call, internal_fn_mask_index (ifn));
-         if (!integer_all_onesp (mask))
-           return NULL_TREE;
+         /* Found an inactive element.  Check if all are inactive.  */
+         for (HOST_WIDE_INT j = 0; j < active_len; j++)
+           if (!mask || !integer_zerop (vector_cst_elt (mask, j)))
+             return MASK_UNKNOWN;  /* Mixed state.  */
+         return MASK_ALL_INACTIVE;
        }
     }
 
+  /* All elements in active range are active.  */
+  return full_length_p ? MASK_ALL_ACTIVE : MASK_UNKNOWN;
+}
+
+
+/* If IFN_{MASK,LEN,MASK_LEN}_LOAD/STORE call CALL is unconditional
+   (all lanes active), return a MEM_REF for the memory it references.
+   Otherwise return NULL_TREE.  VECTYPE is the type of the memory vector.  */
+
+static tree
+gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype)
+{
+  /* Only fold if all lanes are active (unconditional).  */
+  if (partial_load_store_mask_state (call, vectype) != MASK_ALL_ACTIVE)
+    return NULL_TREE;
+
+  tree ptr = gimple_call_arg (call, 0);
+  tree alias_align = gimple_call_arg (call, 1);
+  if (!tree_fits_uhwi_p (alias_align))
+    return NULL_TREE;
+
   unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align);
   if (TYPE_ALIGN (vectype) != align)
     vectype = build_aligned_type (vectype, align);
@@ -5808,41 +5870,68 @@ gimple_fold_partial_load_store_mem_ref (gcall *call, 
tree vectype, bool mask_p)
   return fold_build2 (MEM_REF, vectype, ptr, offset);
 }
 
-/* Try to fold IFN_{MASK,LEN}_LOAD call CALL.  Return true on success.
-   MASK_P indicates it's for MASK if true, otherwise it's for LEN.  */
+/* Try to fold IFN_{MASK,LEN}_LOAD/STORE call CALL.  Return true on success.  
*/
 
 static bool
-gimple_fold_partial_load (gimple_stmt_iterator *gsi, gcall *call, bool mask_p)
+gimple_fold_partial_load_store (gimple_stmt_iterator *gsi, gcall *call)
 {
+  internal_fn ifn = gimple_call_internal_fn (call);
   tree lhs = gimple_call_lhs (call);
-  if (!lhs)
-    return false;
+  bool is_load = (lhs != NULL_TREE);
+  tree vectype;
 
-  if (tree rhs
-      = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (lhs), mask_p))
+  if (is_load)
+    vectype = TREE_TYPE (lhs);
+  else
     {
-      gassign *new_stmt = gimple_build_assign (lhs, rhs);
-      gimple_set_location (new_stmt, gimple_location (call));
-      gimple_move_vops (new_stmt, call);
-      gsi_replace (gsi, new_stmt, false);
-      return true;
+      tree rhs = gimple_call_arg (call, internal_fn_stored_value_index (ifn));
+      vectype = TREE_TYPE (rhs);
     }
-  return false;
-}
 
-/* Try to fold IFN_{MASK,LEN}_STORE call CALL.  Return true on success.
-   MASK_P indicates it's for MASK if true, otherwise it's for LEN.  */
+  enum mask_load_store_state state
+    = partial_load_store_mask_state (call, vectype);
 
-static bool
-gimple_fold_partial_store (gimple_stmt_iterator *gsi, gcall *call,
-                          bool mask_p)
-{
-  internal_fn ifn = gimple_call_internal_fn (call);
-  tree rhs = gimple_call_arg (call, internal_fn_stored_value_index (ifn));
-  if (tree lhs
-      = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (rhs), mask_p))
+  /* Handle all-inactive case.  */
+  if (state == MASK_ALL_INACTIVE)
     {
-      gassign *new_stmt = gimple_build_assign (lhs, rhs);
+      if (is_load)
+       {
+         /* Replace load with else value.  */
+         int else_index = internal_fn_else_index (ifn);
+         tree else_value = gimple_call_arg (call, else_index);
+         gassign *new_stmt = gimple_build_assign (lhs, else_value);
+         gimple_set_location (new_stmt, gimple_location (call));
+         gsi_replace (gsi, new_stmt, false);
+         return true;
+       }
+      else
+       {
+         /* Remove inactive store.  */
+         unlink_stmt_vdef (call);
+         release_defs (call);
+         gsi_replace (gsi, gimple_build_nop (), true);
+         return true;
+       }
+    }
+
+  /* We cannot simplify a gather/scatter or load/store lanes further.  */
+  if (internal_gather_scatter_fn_p (ifn)
+      || TREE_CODE (vectype) == ARRAY_TYPE)
+    return false;
+
+  /* Handle all-active case - fold to regular memory operation.  */
+  if (tree mem_ref = gimple_fold_partial_load_store_mem_ref (call, vectype))
+    {
+      gassign *new_stmt;
+      if (is_load)
+       new_stmt = gimple_build_assign (lhs, mem_ref);
+      else
+       {
+         tree rhs
+           = gimple_call_arg (call, internal_fn_stored_value_index (ifn));
+         new_stmt = gimple_build_assign (mem_ref, rhs);
+       }
+
       gimple_set_location (new_stmt, gimple_location (call));
       gimple_move_vops (new_stmt, call);
       gsi_replace (gsi, new_stmt, false);
@@ -6075,19 +6164,21 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool 
inplace)
          cplx_result = true;
          uaddc_usubc = true;
          break;
-       case IFN_MASK_LOAD:
-         changed |= gimple_fold_partial_load (gsi, stmt, true);
-         break;
-       case IFN_MASK_STORE:
-         changed |= gimple_fold_partial_store (gsi, stmt, true);
-         break;
        case IFN_LEN_LOAD:
+       case IFN_MASK_LOAD:
        case IFN_MASK_LEN_LOAD:
-         changed |= gimple_fold_partial_load (gsi, stmt, false);
-         break;
+       case IFN_MASK_GATHER_LOAD:
+       case IFN_MASK_LEN_GATHER_LOAD:
+       case IFN_MASK_LOAD_LANES:
+       case IFN_MASK_LEN_LOAD_LANES:
        case IFN_LEN_STORE:
+       case IFN_MASK_STORE:
        case IFN_MASK_LEN_STORE:
-         changed |= gimple_fold_partial_store (gsi, stmt, false);
+       case IFN_MASK_SCATTER_STORE:
+       case IFN_MASK_LEN_SCATTER_STORE:
+       case IFN_MASK_STORE_LANES:
+       case IFN_MASK_LEN_STORE_LANES:
+         changed |= gimple_fold_partial_load_store (gsi, stmt);
          break;
        default:
          break;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pfalse-store.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pfalse-store.c
index 1539f58c824..39db13bbcd6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pfalse-store.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pfalse-store.c
@@ -46,8 +46,5 @@ ALL_DATA (st2, x2_t)
 ALL_DATA (st3, x3_t)
 ALL_DATA (st4, x4_t)
 
-/* FIXME: Currently, st1/2/3/4 are not folded with a pfalse
-   predicate, which is the reason for the 48 missing cases below. Once
-   folding is implemented for these intrinsics, the sum should be 60.  */
-/* { dg-final { scan-assembler-times {\t.cfi_startproc\n\tret\n} 12 } } */
+/* { dg-final { scan-assembler-times {\t.cfi_startproc\n\tret\n} 60 } } */
 /* { dg-final { scan-assembler-times {\t.cfi_startproc\n} 60 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c 
b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
index 34a2c8eb11b..5dff0d0ceb9 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
@@ -13,5 +13,5 @@
 
 #include "p9-vec-length-8.h"
 
-/* { dg-final { scan-assembler-times {\mlxvl\M} 16 } } */
+/* { dg-final { scan-assembler-times {\mlxvl\M} 14 } } */
 /* { dg-final { scan-assembler-times {\mstxvl\M} 7 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
new file mode 100644
index 00000000000..0beb3d70866
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl 
-mno-autovec-segment" } */
+
+typedef struct {
+  int a[6];
+  float b[3];
+} c;
+
+int d(c *e) {
+  int f =0;
+  for (; f < 3; f++) {
+    e->a[2 * f] = e->b[f];
+    e->a[2 * f + 1] = -e->a[2 * f];
+    e->a[2 * f] = f + 3 * e->a[2 * f];
+    e->a[2 * f + 1] = f + 3 * e->a[2 * f + 1];
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "vsetivli.*zero,0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c
new file mode 100644
index 00000000000..0de69b52cb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl 
-mno-autovec-segment" } */
+
+typedef struct {
+  int A[6];
+  float b[];
+} a;
+
+int b(a *a) {
+  int b = 0;
+  for (; b < 3; b++) {
+    a->A[2 * b] = a->b[b] - b + a->A[2 * b];
+    a->A[2 * b + 1] = b * a->A[2 * b + 1];
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "vsetivli.*zero,0" } } */
-- 
2.51.1

Reply via email to