Ping

Richard Sandiford <richard.sandif...@linaro.org> writes:
> This patch checks whether two data references x and y cannot
> partially overlap and so are independent whenever &x != &y.
> We can then use this in the vectoriser to optimise alias checks.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
>
> Thanks,
> Richard
>
>
> gcc/
> 2016-05-03  Richard Sandiford  <richard.sandif...@linaro.org>
>
>       * hash-traits.h (pair_hash): New struct.
>       * tree-data-ref.h (data_dependence_relation): Add object_a and
>       object_b fields.
>       (DDR_OBJECT_A, DDR_OBJECT_B): New macros.
>       * tree-data-ref.c (initialize_data_dependence_relation): Initialize
>       DDR_OBJECT_A and DDR_OBJECT_B.
>       * tree-vectorizer.h (vec_object_pair): New type.
>       (_loop_vec_info): Add a check_unequal_addrs field.
>       (LOOP_VINFO_CHECK_UNEQUAL_ADDRS): New macro.
>       (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Return true if there is an
>       entry in check_unequal_addrs.  Check comp_alias_ddrs instead of
>       may_alias_ddrs.
>       * tree-vect-loop.c (destroy_loop_vec_info): Release
>       LOOP_VINFO_CHECK_UNEQUAL_ADDRS.
>       (vect_analyze_loop_2): Likewise, when restarting.
>       (vect_estimate_min_profitable_iters): Estimate the cost of
>       LOOP_VINFO_CHECK_UNEQUAL_ADDRS.
>       * tree-vect-data-refs.c: Include tree-hash-traits.h.
>       (vect_prune_runtime_alias_test_list): Try to handle conflicts
>       using LOOP_VINFO_CHECK_UNEQUAL_ADDRS, if the data dependence allows.
>       Count such tests in the final summary.
>       * tree-vect-loop-manip.c (chain_cond_expr): New function.
>       (vect_create_cond_for_align_checks): Use it.
>       (vect_create_cond_for_alias_checks): Likewise.
>       (vect_create_cond_for_unequal_addrs): New function.
>       (vect_loop_versioning): Call it.
>
> gcc/testsuite/
>       * gcc.dg/vect/vect-alias-check-6.c: New test.
>
> Index: gcc/hash-traits.h
> ===================================================================
> --- gcc/hash-traits.h 2017-02-23 19:54:15.000000000 +0000
> +++ gcc/hash-traits.h 2017-05-03 08:48:53.312035228 +0100
> @@ -301,6 +301,76 @@ struct ggc_cache_ptr_hash : pointer_hash
>  
>  struct nofree_string_hash : string_hash, typed_noop_remove <const char *> {};
>  
> +/* Traits for pairs of values, using the first to record empty and
> +   deleted slots.  */
> +
> +template <typename T1, typename T2>
> +struct pair_hash
> +{
> +  typedef std::pair <typename T1::value_type,
> +                  typename T2::value_type> value_type;
> +  typedef std::pair <typename T1::compare_type,
> +                  typename T2::compare_type> compare_type;
> +
> +  static inline hashval_t hash (const value_type &);
> +  static inline bool equal (const value_type &, const compare_type &);
> +  static inline void remove (value_type &);
> +  static inline void mark_deleted (value_type &);
> +  static inline void mark_empty (value_type &);
> +  static inline bool is_deleted (const value_type &);
> +  static inline bool is_empty (const value_type &);
> +};
> +
> +template <typename T1, typename T2>
> +inline hashval_t
> +pair_hash <T1, T2>::hash (const value_type &x)
> +{
> +  return iterative_hash_hashval_t (T1::hash (x.first), T2::hash (x.second));
> +}
> +
> +template <typename T1, typename T2>
> +inline bool
> +pair_hash <T1, T2>::equal (const value_type &x, const compare_type &y)
> +{
> +  return T1::equal (x.first, y.first) && T2::equal (x.second, y.second);
> +}
> +
> +template <typename T1, typename T2>
> +inline void
> +pair_hash <T1, T2>::remove (value_type &x)
> +{
> +  T1::remove (x.first);
> +  T2::remove (x.second);
> +}
> +
> +template <typename T1, typename T2>
> +inline void
> +pair_hash <T1, T2>::mark_deleted (value_type &x)
> +{
> +  T1::mark_deleted (x.first);
> +}
> +
> +template <typename T1, typename T2>
> +inline void
> +pair_hash <T1, T2>::mark_empty (value_type &x)
> +{
> +  T1::mark_empty (x.first);
> +}
> +
> +template <typename T1, typename T2>
> +inline bool
> +pair_hash <T1, T2>::is_deleted (const value_type &x)
> +{
> +  return T1::is_deleted (x.first);
> +}
> +
> +template <typename T1, typename T2>
> +inline bool
> +pair_hash <T1, T2>::is_empty (const value_type &x)
> +{
> +  return T1::is_empty (x.first);
> +}
> +
>  template <typename T> struct default_hash_traits : T {};
>  
>  template <typename T>
> Index: gcc/tree-data-ref.h
> ===================================================================
> --- gcc/tree-data-ref.h       2017-05-03 08:48:48.737038502 +0100
> +++ gcc/tree-data-ref.h       2017-05-03 08:48:53.313041828 +0100
> @@ -240,6 +240,13 @@ struct data_dependence_relation
>         but the analyzer cannot be more specific.  */
>    tree are_dependent;
>  
> +  /* If nonnull, COULD_BE_INDEPENDENT_P is true and the accesses are
> +     independent when the runtime addresses of OBJECT_A and OBJECT_B
> +     are different.  The addresses of both objects are invariant in the
> +     loop nest.  */
> +  tree object_a;
> +  tree object_b;
> +
>    /* For each subscript in the dependence test, there is an element in
>       this array.  This is the attribute that labels the edge A->B of
>       the data_dependence_relation.  */
> @@ -303,6 +310,8 @@ #define DDR_A(DDR) (DDR)->a
>  #define DDR_B(DDR) (DDR)->b
>  #define DDR_AFFINE_P(DDR) (DDR)->affine_p
>  #define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent
> +#define DDR_OBJECT_A(DDR) (DDR)->object_a
> +#define DDR_OBJECT_B(DDR) (DDR)->object_b
>  #define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts
>  #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I]
>  #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length ()
> Index: gcc/tree-data-ref.c
> ===================================================================
> --- gcc/tree-data-ref.c       2017-05-03 08:48:48.737038502 +0100
> +++ gcc/tree-data-ref.c       2017-05-03 08:48:53.313041828 +0100
> @@ -1715,6 +1715,15 @@ initialize_data_dependence_relation (str
>       }
>  
>        DDR_COULD_BE_INDEPENDENT_P (res) = true;
> +      if (!loop_nest.exists ()
> +       || (object_address_invariant_in_loop_p (loop_nest[0],
> +                                               struct_ref_a)
> +           && object_address_invariant_in_loop_p (loop_nest[0],
> +                                                  struct_ref_b)))
> +     {
> +       DDR_OBJECT_A (res) = struct_ref_a;
> +       DDR_OBJECT_B (res) = struct_ref_b;
> +     }
>      }
>  
>    DDR_AFFINE_P (res) = true;
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h     2017-05-03 08:48:48.738045102 +0100
> +++ gcc/tree-vectorizer.h     2017-05-03 08:48:53.315055028 +0100
> @@ -174,6 +174,10 @@ struct dr_with_seg_len_pair_t
>  
>  
>  
> +/* Describes two objects whose addresses must be unequal for the vectorized
> +   loop to be valid.  */
> +typedef std::pair<tree, tree> vec_object_pair;
> +
>  /* Vectorizer state common between loop and basic-block vectorization.  */
>  struct vec_info {
>    enum { bb, loop } kind;
> @@ -270,6 +274,9 @@ typedef struct _loop_vec_info : public v
>       lengths from which the run-time aliasing check is built.  */
>    vec<dr_with_seg_len_pair_t> comp_alias_ddrs;
>  
> +  /* Check that the addresses of each pair of objects is unequal.  */
> +  vec<vec_object_pair> check_unequal_addrs;
> +
>    /* Statements in the loop that have data references that are candidates 
> for a
>       runtime (loop versioning) misalignment check.  */
>    vec<gimple *> may_misalign_stmts;
> @@ -364,6 +371,7 @@ #define LOOP_VINFO_UNALIGNED_DR(L)
>  #define LOOP_VINFO_MAY_MISALIGN_STMTS(L)   (L)->may_misalign_stmts
>  #define LOOP_VINFO_MAY_ALIAS_DDRS(L)       (L)->may_alias_ddrs
>  #define LOOP_VINFO_COMP_ALIAS_DDRS(L)      (L)->comp_alias_ddrs
> +#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L)  (L)->check_unequal_addrs
>  #define LOOP_VINFO_GROUPED_STORES(L)       (L)->grouped_stores
>  #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances
>  #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
> @@ -383,7 +391,8 @@ #define LOOP_VINFO_ORIG_LOOP_INFO(L)
>  #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L)    \
>    ((L)->may_misalign_stmts.length () > 0)
>  #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L)                \
> -  ((L)->comp_alias_ddrs.length () > 0)
> +  ((L)->comp_alias_ddrs.length () > 0 \
> +   || (L)->check_unequal_addrs.length () > 0)
>  #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L)               \
>    (LOOP_VINFO_NITERS_ASSUMPTIONS (L))
>  #define LOOP_REQUIRES_VERSIONING(L)                  \
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c      2017-04-18 19:52:35.059158859 +0100
> +++ gcc/tree-vect-loop.c      2017-05-03 08:48:53.315055028 +0100
> @@ -1275,6 +1275,8 @@ destroy_loop_vec_info (loop_vec_info loo
>    destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
>    loop_vinfo->scalar_cost_vec.release ();
>  
> +  LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();
> +
>    free (loop_vinfo);
>    loop->aux = NULL;
>  }
> @@ -2299,6 +2301,7 @@ vect_analyze_loop_2 (loop_vec_info loop_
>      }
>    /* Free optimized alias test DDRS.  */
>    LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
> +  LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();
>    /* Reset target cost data.  */
>    destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
>    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
> @@ -3261,6 +3264,11 @@ vect_estimate_min_profitable_iters (loop
>        unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
>        (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
>                           vect_prologue);
> +      len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
> +      if (len)
> +     /* Count LEN - 1 ANDs and LEN comparisons.  */
> +     (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt,
> +                           NULL, 0, vect_prologue);
>        dump_printf (MSG_NOTE,
>                     "cost model: Adding cost of checks for loop "
>                     "versioning aliasing.\n");
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c 2017-05-03 08:48:48.738045102 +0100
> +++ gcc/tree-vect-data-refs.c 2017-05-03 08:48:53.314048428 +0100
> @@ -50,6 +50,7 @@ Software Foundation; either version 3, o
>  #include "expr.h"
>  #include "builtins.h"
>  #include "params.h"
> +#include "tree-hash-traits.h"
>  
>  /* Return true if load- or store-lanes optab OPTAB is implemented for
>     COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
> @@ -3085,10 +3086,14 @@ dependence_distance_ge_vf (data_dependen
>  bool
>  vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
>  {
> -  vec<ddr_p> may_alias_ddrs =
> -    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
> -  vec<dr_with_seg_len_pair_t>& comp_alias_ddrs =
> -    LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
> +  typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
> +  hash_set <tree_pair_hash> compared_objects;
> +
> +  vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
> +  vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
> +    = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
> +  vec<vec_object_pair> &check_unequal_addrs
> +    = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
>    int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
>    tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
>  
> @@ -3151,6 +3156,24 @@ vect_prune_runtime_alias_test_list (loop
>        if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
>       continue;
>  
> +      if (DDR_OBJECT_A (ddr))
> +     {
> +       vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
> +       if (!compared_objects.add (new_pair))
> +         {
> +           if (dump_enabled_p ())
> +             {
> +               dump_printf_loc (MSG_NOTE, vect_location, "checking that ");
> +               dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first);
> +               dump_printf (MSG_NOTE, " and ");
> +               dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second);
> +               dump_printf (MSG_NOTE, " have different addresses\n");
> +             }
> +           LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
> +         }
> +       continue;
> +     }
> +
>        dr_a = DDR_A (ddr);
>        stmt_a = DR_STMT (DDR_A (ddr));
>        dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
> @@ -3346,11 +3369,12 @@ vect_prune_runtime_alias_test_list (loop
>       }
>      }
>  
> +  unsigned int count = (comp_alias_ddrs.length ()
> +                     + check_unequal_addrs.length ());
>    dump_printf_loc (MSG_NOTE, vect_location,
>                  "improved number of alias checks from %d to %d\n",
> -                may_alias_ddrs.length (), comp_alias_ddrs.length ());
> -  if ((int) comp_alias_ddrs.length () >
> -      PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
> +                may_alias_ddrs.length (), count);
> +  if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
>      {
>        if (dump_enabled_p ())
>       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> Index: gcc/tree-vect-loop-manip.c
> ===================================================================
> --- gcc/tree-vect-loop-manip.c        2017-04-18 19:52:34.027608884 +0100
> +++ gcc/tree-vect-loop-manip.c        2017-05-03 08:48:53.314048428 +0100
> @@ -1926,6 +1926,20 @@ vect_create_cond_for_niters_checks (loop
>      *cond_expr = part_cond_expr;
>  }
>  
> +/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
> +   and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
> +
> +static void
> +chain_cond_expr (tree *cond_expr, tree part_cond_expr)
> +{
> +  if (*cond_expr)
> +    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
> +                           *cond_expr, part_cond_expr);
> +  else
> +    *cond_expr = part_cond_expr;
> +}
> +
> +
>  /* Function vect_create_cond_for_align_checks.
>  
>     Create a conditional expression that represents the alignment checks for
> @@ -2037,13 +2051,32 @@ vect_create_cond_for_align_checks (loop_
>    ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
>    part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
>                               and_tmp_name, ptrsize_zero);
> -  if (*cond_expr)
> -    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
> -                           *cond_expr, part_cond_expr);
> -  else
> -    *cond_expr = part_cond_expr;
> +  chain_cond_expr (cond_expr, part_cond_expr);
>  }
>  
> +
> +/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
> +   create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
> +   Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
> +   and this new condition are true.  Treat a null *COND_EXPR as "true".  */
> +
> +static void
> +vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree 
> *cond_expr)
> +{
> +  vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
> +  unsigned int i;
> +  vec_object_pair *pair;
> +  FOR_EACH_VEC_ELT (pairs, i, pair)
> +    {
> +      tree addr1 = build_fold_addr_expr (pair->first);
> +      tree addr2 = build_fold_addr_expr (pair->second);
> +      tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
> +                                      addr1, addr2);
> +      chain_cond_expr (cond_expr, part_cond_expr);
> +    }
> +}
> +
> +
>  /* Given two data references and segment lengths described by DR_A and DR_B,
>     create expression checking if the two addresses ranges intersect with
>     each other based on index of the two addresses.  This can only be done
> @@ -2280,11 +2313,7 @@ vect_create_cond_for_alias_checks (loop_
>  
>        /* Create condition expression for each pair data references.  */
>        create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, 
> dr_b);
> -      if (*cond_expr)
> -     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
> -                               *cond_expr, part_cond_expr);
> -      else
> -     *cond_expr = part_cond_expr;
> +      chain_cond_expr (cond_expr, part_cond_expr);
>      }
>  
>    if (dump_enabled_p ())
> @@ -2353,7 +2382,10 @@ vect_loop_versioning (loop_vec_info loop
>                                      &cond_expr_stmt_list);
>  
>    if (version_alias)
> -    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
> +    {
> +      vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
> +      vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
> +    }
>  
>    cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
>                                     is_gimple_condexpr, NULL_TREE);
> Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c
> ===================================================================
> --- /dev/null 2017-05-03 08:16:26.972699664 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c    2017-05-03 
> 08:48:53.312035228 +0100
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +
> +#define N 16
> +
> +struct s { int x[N]; };
> +
> +void
> +f1 (struct s *a, struct s *b)
> +{
> +  for (int i = 0; i < N - 1; ++i)
> +    a->x[i + 1] += b->x[i];
> +}
> +
> +void
> +f2 (struct s *a, struct s *b)
> +{
> +  for (int i = 0; i < N; ++i)
> +    a->x[i] += b->x[N - i - 1];
> +}
> +
> +/* { dg-final { scan-tree-dump-times {checking that [^\n]* and [^\n]* have 
> different addresses} 2 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */

Reply via email to