> This hashing looks unnecessarily complex.  The values we're hashing are
> vector SSA_NAMEs, so I think we should be able to hash and compare them
> as a plain pair of pointers.
> 
> The type could then be std::pair and the hashing could be done using
> pair_hash from hash-traits.h.
> 

Fancy.. TIL...

Here's the respun patch.

Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and no 
issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * tree-vect-stmts.c (prepare_load_store_mask): Rename to...
        (prepare_vec_mask): ...This and record operations that have already been
        masked.
        (vectorizable_call): Use it.
        (vectorizable_operation): Likewise.
        (vectorizable_store): Likewise.
        (vectorizable_load): Likewise.
        * tree-vectorizer.h (class _loop_vec_info): Add vec_cond_masked_set.
        (vec_cond_masked_set_type, tree_cond_mask_hash,
        vec_cond_masked_key): New.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/pred-combine-and.c: New test.

--- inline copy of patch ---

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
new file mode 100644
index 
0000000000000000000000000000000000000000..ee927346abe518caa3cba397b11dfd1ee7e93630
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+void f5(float * restrict z0, float * restrict z1, float *restrict x, float * 
restrict y, float c, int n)
+{
+    for (int i = 0; i < n; i++) {
+        float a = x[i];
+        float b = y[i];
+        if (a > b) {
+            z0[i] = a + b;
+            if (a > c) {
+                z1[i] = a - b;
+            }
+        }
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, 
z[0-9]+\.s, z[0-9]+\.s} 2 } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 
2284ad069e4d521f4e0bd43d34181a258cd636ef..2a02ff0b1e53be6eda49770b240f8f58f3928a87
 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1796,23 +1796,30 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
    that needs to be applied to all loads and stores in a vectorized loop.
-   Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
+   Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
+   otherwise return VEC_MASK & LOOP_MASK.
 
    MASK_TYPE is the type of both masks.  If new statements are needed,
    insert them before GSI.  */
 
 static tree
-prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
-                        gimple_stmt_iterator *gsi)
+prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
+                 tree vec_mask, gimple_stmt_iterator *gsi)
 {
   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
   if (!loop_mask)
     return vec_mask;
 
   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
+
+  vec_cond_masked_key cond (vec_mask, loop_mask);
+  if (loop_vinfo->vec_cond_masked_set.contains (cond))
+    return vec_mask;
+
   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
                                          vec_mask, loop_mask);
+
   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
   return and_res;
 }
@@ -3526,8 +3533,9 @@ vectorizable_call (vec_info *vinfo,
                          gcc_assert (ncopies == 1);
                          tree mask = vect_get_loop_mask (gsi, masks, vec_num,
                                                          vectype_out, i);
-                         vargs[mask_opno] = prepare_load_store_mask
-                           (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
+                         vargs[mask_opno] = prepare_vec_mask
+                           (loop_vinfo, TREE_TYPE (mask), mask,
+                            vargs[mask_opno], gsi);
                        }
 
                      gcall *call;
@@ -3564,8 +3572,8 @@ vectorizable_call (vec_info *vinfo,
              tree mask = vect_get_loop_mask (gsi, masks, ncopies,
                                              vectype_out, j);
              vargs[mask_opno]
-               = prepare_load_store_mask (TREE_TYPE (mask), mask,
-                                          vargs[mask_opno], gsi);
+               = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
+                                   vargs[mask_opno], gsi);
            }
 
          gimple *new_stmt;
@@ -6302,10 +6310,46 @@ vectorizable_operation (vec_info *vinfo,
        }
       else
        {
+         tree mask = NULL_TREE;
+         /* When combining two masks check is either of them is elsewhere
+            combined with a loop mask, if that's the case we can mark that the
+            new combined mask doesn't need to be combined with a loop mask.  */
+         if (masked_loop_p && code == BIT_AND_EXPR)
+           {
+             scalar_cond_masked_key cond0 (op0, ncopies);
+             if (loop_vinfo->scalar_cond_masked_set.contains (cond0))
+               {
+                 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+                                                 vectype, i);
+
+                 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
+                                          vop0, gsi);
+               }
+
+             scalar_cond_masked_key cond1 (op1, ncopies);
+             if (loop_vinfo->scalar_cond_masked_set.contains (cond1))
+               {
+                 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+                                                 vectype, i);
+
+                 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
+                                          vop1, gsi);
+               }
+           }
+
          new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
          new_temp = make_ssa_name (vec_dest, new_stmt);
          gimple_assign_set_lhs (new_stmt, new_temp);
          vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+
+         /* Enter the combined value into the vector cond hash so we don't
+            AND it with a loop mask again.  */
+         if (mask)
+           {
+             vec_cond_masked_key cond (new_temp, mask);
+             loop_vinfo->vec_cond_masked_set.add (cond);
+           }
+
          if (vec_cvt_dest)
            {
              new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
@@ -8166,8 +8210,8 @@ vectorizable_store (vec_info *vinfo,
            final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
                                             vectype, j);
          if (vec_mask)
-           final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-                                                 vec_mask, gsi);
+           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+                                          final_mask, vec_mask, gsi);
 
          gcall *call;
          if (final_mask)
@@ -8221,8 +8265,8 @@ vectorizable_store (vec_info *vinfo,
                                                 vec_num * ncopies,
                                                 vectype, vec_num * j + i);
              if (vec_mask)
-               final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-                                                     vec_mask, gsi);
+               final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+                                              final_mask, vec_mask, gsi);
 
              if (memory_access_type == VMAT_GATHER_SCATTER)
                {
@@ -9442,8 +9486,8 @@ vectorizable_load (vec_info *vinfo,
            final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
                                             vectype, j);
          if (vec_mask)
-           final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-                                                 vec_mask, gsi);
+           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+                                          final_mask, vec_mask, gsi);
 
          gcall *call;
          if (final_mask)
@@ -9494,8 +9538,8 @@ vectorizable_load (vec_info *vinfo,
                                                 vec_num * ncopies,
                                                 vectype, vec_num * j + i);
              if (vec_mask)
-               final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-                                                     vec_mask, gsi);
+               final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+                                              final_mask, vec_mask, gsi);
 
              if (i > 0)
                dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 
bd6f334d15fae4ffbe8c5ffb496ed0a820971638..d587356a32ff4c7230678e69162d639a31ea4baa
 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -327,6 +327,13 @@ struct default_hash_traits<scalar_cond_masked_key>
 
 typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
 
+/* Key and map that records association between vector conditions and
+   corresponding loop mask, and is populated by prepare_vec_mask.  */
+
+typedef std::pair<tree, tree> vec_cond_masked_key;
+typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_cond_mask_hash;
+typedef hash_set<tree_cond_mask_hash> vec_cond_masked_set_type;
+
 /* Describes two objects whose addresses must be unequal for the vectorized
    loop to be valid.  */
 typedef std::pair<tree, tree> vec_object_pair;
@@ -646,6 +653,9 @@ public:
   /* Set of scalar conditions that have loop mask applied.  */
   scalar_cond_masked_set_type scalar_cond_masked_set;
 
+  /* Set of vector conditions that have loop mask applied.  */
+  vec_cond_masked_set_type vec_cond_masked_set;
+
   /* If we are using a loop mask to align memory addresses, this variable
      contains the number of vector elements that we should skip in the
      first iteration of the vector loop (i.e. the number of leading

Attachment: rb14777 (1).patch
Description: rb14777 (1).patch

Reply via email to