Tamar Christina <tamar.christ...@arm.com> writes:
>> This hashing looks unnecessarily complex.  The values we're hashing are
>> vector SSA_NAMEs, so I think we should be able to hash and compare them
>> as a plain pair of pointers.
>> 
>> The type could then be std::pair and the hashing could be done using
>> pair_hash from hash-traits.h.
>> 
>
> Fancy.. TIL...
>
> Here's the respun patch.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and no 
> issues.

LGTM, just some very minor nits…

> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>       * tree-vect-stmts.c (prepare_load_store_mask): Rename to...
>       (prepare_vec_mask): ...This and record operations that have already been
>       masked.
>       (vectorizable_call): Use it.
>       (vectorizable_operation): Likewise.
>       (vectorizable_store): Likewise.
>       (vectorizable_load): Likewise.
>       * tree-vectorizer.h (class _loop_vec_info): Add vec_cond_masked_set.
>       (vec_cond_masked_set_type, tree_cond_mask_hash,
>       vec_cond_masked_key): New.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/sve/pred-combine-and.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..ee927346abe518caa3cba397b11dfd1ee7e93630
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +void f5(float * restrict z0, float * restrict z1, float *restrict x, float * 
> restrict y, float c, int n)
> +{
> +    for (int i = 0; i < n; i++) {
> +        float a = x[i];
> +        float b = y[i];
> +        if (a > b) {
> +            z0[i] = a + b;
> +            if (a > c) {
> +                z1[i] = a - b;
> +            }
> +        }
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, 
> z[0-9]+\.s, z[0-9]+\.s} 2 } } */
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 
> 2284ad069e4d521f4e0bd43d34181a258cd636ef..2a02ff0b1e53be6eda49770b240f8f58f3928a87
>  100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -1796,23 +1796,30 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>  /* Return the mask input to a masked load or store.  VEC_MASK is the 
> vectorized
>     form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
>     that needs to be applied to all loads and stores in a vectorized loop.
> -   Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & 
> LOOP_MASK.
> +   Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
> +   otherwise return VEC_MASK & LOOP_MASK.
>  
>     MASK_TYPE is the type of both masks.  If new statements are needed,
>     insert them before GSI.  */
>  
>  static tree
> -prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
> -                      gimple_stmt_iterator *gsi)
> +prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
> +               tree vec_mask, gimple_stmt_iterator *gsi)
>  {
>    gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
>    if (!loop_mask)
>      return vec_mask;
>  
>    gcc_assert (TREE_TYPE (loop_mask) == mask_type);
> +
> +  vec_cond_masked_key cond (vec_mask, loop_mask);
> +  if (loop_vinfo->vec_cond_masked_set.contains (cond))
> +    return vec_mask;
> +

Guess this is pushing a personal preference, sorry, but now that
the code is C++11, I think we should use:

  if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
    return vec_mask;

for cases like this, avoiding the need for the separate “cond” variable.

>    tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
>    gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
>                                         vec_mask, loop_mask);
> +
>    gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
>    return and_res;
>  }
> @@ -3526,8 +3533,9 @@ vectorizable_call (vec_info *vinfo,
>                         gcc_assert (ncopies == 1);
>                         tree mask = vect_get_loop_mask (gsi, masks, vec_num,
>                                                         vectype_out, i);
> -                       vargs[mask_opno] = prepare_load_store_mask
> -                         (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
> +                       vargs[mask_opno] = prepare_vec_mask
> +                         (loop_vinfo, TREE_TYPE (mask), mask,
> +                          vargs[mask_opno], gsi);
>                       }
>  
>                     gcall *call;
> @@ -3564,8 +3572,8 @@ vectorizable_call (vec_info *vinfo,
>             tree mask = vect_get_loop_mask (gsi, masks, ncopies,
>                                             vectype_out, j);
>             vargs[mask_opno]
> -             = prepare_load_store_mask (TREE_TYPE (mask), mask,
> -                                        vargs[mask_opno], gsi);
> +             = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
> +                                 vargs[mask_opno], gsi);
>           }
>  
>         gimple *new_stmt;
> @@ -6302,10 +6310,46 @@ vectorizable_operation (vec_info *vinfo,
>       }
>        else
>       {
> +       tree mask = NULL_TREE;
> +       /* When combining two masks check is either of them is elsewhere

s/is either/if either/

didn't notice last time, sorry

> +          combined with a loop mask, if that's the case we can mark that the
> +          new combined mask doesn't need to be combined with a loop mask.  */
> +       if (masked_loop_p && code == BIT_AND_EXPR)
> +         {
> +           scalar_cond_masked_key cond0 (op0, ncopies);
> +           if (loop_vinfo->scalar_cond_masked_set.contains (cond0))
> +             {
> +               mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
> +                                               vectype, i);

The indentation of this line is off; it should line up with “gsi”.
Same for the call below.

> +
> +               vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
> +                                        vop0, gsi);
> +             }
> +
> +           scalar_cond_masked_key cond1 (op1, ncopies);
> +           if (loop_vinfo->scalar_cond_masked_set.contains (cond1))
> +             {
> +               mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
> +                                               vectype, i);
> +
> +               vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
> +                                        vop1, gsi);
> +             }
> +         }
> +
>         new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
>         new_temp = make_ssa_name (vec_dest, new_stmt);
>         gimple_assign_set_lhs (new_stmt, new_temp);
>         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
> +
> +       /* Enter the combined value into the vector cond hash so we don't
> +          AND it with a loop mask again.  */
> +       if (mask)
> +         {
> +           vec_cond_masked_key cond (new_temp, mask);
> +           loop_vinfo->vec_cond_masked_set.add (cond);
> +         }
> +

Similarly to the above, I think it'd be neater to do:

          if (mask)
            loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });

> […]
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 
> bd6f334d15fae4ffbe8c5ffb496ed0a820971638..d587356a32ff4c7230678e69162d639a31ea4baa
>  100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -327,6 +327,13 @@ struct default_hash_traits<scalar_cond_masked_key>
>  
>  typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
>  
> +/* Key and map that records association between vector conditions and
> +   corresponding loop mask, and is populated by prepare_vec_mask.  */
> +
> +typedef std::pair<tree, tree> vec_cond_masked_key;
> +typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_cond_mask_hash;

Even more petty, but dropping the space before the “<” would make these
three lines consistent.

OK with those changes, thanks.

Richard

> +typedef hash_set<tree_cond_mask_hash> vec_cond_masked_set_type;
> +
>  /* Describes two objects whose addresses must be unequal for the vectorized
>     loop to be valid.  */
>  typedef std::pair<tree, tree> vec_object_pair;
> @@ -646,6 +653,9 @@ public:
>    /* Set of scalar conditions that have loop mask applied.  */
>    scalar_cond_masked_set_type scalar_cond_masked_set;
>  
> +  /* Set of vector conditions that have loop mask applied.  */
> +  vec_cond_masked_set_type vec_cond_masked_set;
> +
>    /* If we are using a loop mask to align memory addresses, this variable
>       contains the number of vector elements that we should skip in the
>       first iteration of the vector loop (i.e. the number of leading

Reply via email to