On Thu, May 4, 2017 at 11:06 AM, Richard Sandiford <richard.sandif...@linaro.org> wrote: > "Bin.Cheng" <amker.ch...@gmail.com> writes: >> On Wed, May 3, 2017 at 9:00 AM, Richard Sandiford >> <richard.sandif...@linaro.org> wrote: >>> Index: gcc/tree-data-ref.h >>> =================================================================== >>> --- gcc/tree-data-ref.h 2017-05-03 08:48:11.977015306 +0100 >>> +++ gcc/tree-data-ref.h 2017-05-03 08:48:48.737038502 +0100 >>> @@ -191,6 +191,9 @@ struct conflict_function >>> >>> struct subscript >>> { >>> + /* The access functions of the two references. */ >>> + tree access_fn[2]; >> Is it better to follow existing code, i.e, name this as >> access_fn_a/access_fn_b. Thus we don't need to use const value 0/1 in >> various places, which is a little bit confusing. > > [Answered below] > >>> + >>> /* A description of the iterations for which the elements are >>> accessed twice. */ >>> conflict_function *conflicting_iterations_in_a; >>> @@ -209,6 +212,7 @@ struct subscript >>> >>> typedef struct subscript *subscript_p; >>> >>> +#define SUB_ACCESS_FN(SUB, I) (SUB)->access_fn[I] >>> #define SUB_CONFLICTS_IN_A(SUB) (SUB)->conflicting_iterations_in_a >>> #define SUB_CONFLICTS_IN_B(SUB) (SUB)->conflicting_iterations_in_b >>> #define SUB_LAST_CONFLICT(SUB) (SUB)->last_conflict >>> @@ -264,6 +268,33 @@ struct data_dependence_relation >>> /* Set to true when the dependence relation is on the same data >>> access. */ >>> bool self_reference_p; >>> + >>> + /* True if the dependence described is conservatively correct rather >>> + than exact, and if it is still possible for the accesses to be >>> + conditionally independent. For example, the a and b references in: >>> + >>> + struct s *a, *b; >>> + for (int i = 0; i < n; ++i) >>> + a->f[i] += b->f[i]; >>> + >>> + conservatively have a distance vector of (0), for the case in which >>> + a == b, but the accesses are independent if a != b. Similarly, >>> + the a and b references in: >>> + >>> + struct s *a, *b; >>> + for (int i = 0; i < n; ++i) >>> + a[0].f[i] += b[i].f[i]; >>> + >>> + conservatively have a distance vector of (0), but they are indepenent >>> + when a != b + i. In contrast, the references in: >>> + >>> + struct s *a; >>> + for (int i = 0; i < n; ++i) >>> + a->f[i] += a->f[i]; >>> + >>> + have the same distance vector of (0), but the accesses can never be >>> + independent. */ >>> + bool could_be_independent_p; >>> }; >>> >>> typedef struct data_dependence_relation *ddr_p; >>> @@ -294,6 +325,7 @@ #define DDR_DIR_VECT(DDR, I) \ >>> #define DDR_DIST_VECT(DDR, I) \ >>> DDR_DIST_VECTS (DDR)[I] >>> #define DDR_REVERSED_P(DDR) (DDR)->reversed_p >>> +#define DDR_COULD_BE_INDEPENDENT_P(DDR) (DDR)->could_be_independent_p >>> >>> >>> bool dr_analyze_innermost (struct data_reference *, struct loop *); >>> @@ -372,22 +404,6 @@ same_data_refs (data_reference_p a, data >>> return false; >>> >>> return true; >>> -} >>> - >>> -/* Return true when the DDR contains two data references that have the >>> - same access functions. */ >>> - >>> -static inline bool >>> -same_access_functions (const struct data_dependence_relation *ddr) >>> -{ >>> - unsigned i; >>> - >>> - for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) >>> - if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i), >>> - DR_ACCESS_FN (DDR_B (ddr), i))) >>> - return false; >>> - >>> - return true; >>> } >>> >>> /* Returns true when all the dependences are computable. */ >>> Index: gcc/tree-data-ref.c >>> =================================================================== >>> --- gcc/tree-data-ref.c 2017-02-23 19:54:15.000000000 +0000 >>> +++ gcc/tree-data-ref.c 2017-05-03 08:48:48.737038502 +0100 >>> @@ -123,8 +123,7 @@ Software Foundation; either version 3, o >>> } dependence_stats; >>> >>> static bool subscript_dependence_tester_1 (struct data_dependence_relation >>> *, >>> - struct data_reference *, >>> - struct data_reference *, >>> + unsigned int, unsigned int, >>> struct loop *); >> As mentioned, how about passing access_fn directly, rather than less >> meaningful 0/1 values? > > The problem is that access_fn is a property of the individual > subscripts, whereas this is operating on a full data_reference. > > One alternative would be to use conditions like: > > first_is_a ? SUB_ACCESS_FN_A (sub) : SUB_ACCESS_FN_B (sub) > > but IMO that's less readable than the existing: > > SUB_ACCESS_FN (sub, index) > > Or we could have individual access_fn arrays for A and B, separate > from the main subscript array, but that would mean allocating three > arrays instead of one. Thanks for explanation, I see the problem now. Even the latter sequence could be different for A and B, there should have the same number index? If that's the case, is it possible just recording the starting position (or length) in DR_ACCESS_FN and use that information to access to access_fn vector. This can save the copy in subscript. Anyway, this is not am important problem.
> >>> /* Returns true iff A divides B. */ >>> >>> @@ -144,6 +143,21 @@ int_divides_p (int a, int b) >>> return ((b % a) == 0); >>> } >>> >>> +/* Return true if reference REF contains a union access. */ >>> + >>> +static bool >>> +ref_contains_union_access_p (tree ref) >>> +{ >>> + while (handled_component_p (ref)) >>> + { >>> + ref = TREE_OPERAND (ref, 0); >>> + if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE >>> + || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE) >>> + return true; >>> + } >>> + return false; >>> +} >>> + >>> >>> >>> /* Dump into FILE all the data references from DATAREFS. */ >>> @@ -433,13 +447,14 @@ dump_data_dependence_relation (FILE *out >>> unsigned int i; >>> struct loop *loopi; >>> >>> - for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) >>> + subscript *sub; >>> + FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) >>> { >>> fprintf (outf, " access_fn_A: "); >>> - print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0); >>> + print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0), 0); >>> fprintf (outf, " access_fn_B: "); >>> - print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0); >>> - dump_subscript (outf, DDR_SUBSCRIPT (ddr, i)); >>> + print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1), 0); >>> + dump_subscript (outf, sub); >>> } >>> >>> fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr)); >>> @@ -1484,11 +1499,10 @@ initialize_data_dependence_relation (str >>> struct data_dependence_relation *res; >>> unsigned int i; >>> >>> - res = XNEW (struct data_dependence_relation); >>> + res = XCNEW (struct data_dependence_relation); >>> DDR_A (res) = a; >>> DDR_B (res) = b; >>> DDR_LOOP_NEST (res).create (0); >>> - DDR_REVERSED_P (res) = false; >>> DDR_SUBSCRIPTS (res).create (0); >>> DDR_DIR_VECTS (res).create (0); >>> DDR_DIST_VECTS (res).create (0); >>> @@ -1506,82 +1520,217 @@ initialize_data_dependence_relation (str >>> return res; >>> } >>> >>> - /* The case where the references are exactly the same. */ >>> - if (operand_equal_p (DR_REF (a), DR_REF (b), 0)) >>> + unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a); >>> + unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b); >>> + if (num_dimensions_a == 0 || num_dimensions_b == 0) >>> { >>> - if ((loop_nest.exists () >>> - && !object_address_invariant_in_loop_p (loop_nest[0], >>> - DR_BASE_OBJECT (a))) >>> - || DR_NUM_DIMENSIONS (a) == 0) >>> - { >>> - DDR_ARE_DEPENDENT (res) = chrec_dont_know; >>> - return res; >>> - } >>> - DDR_AFFINE_P (res) = true; >>> - DDR_ARE_DEPENDENT (res) = NULL_TREE; >>> - DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a)); >>> - DDR_LOOP_NEST (res) = loop_nest; >>> - DDR_INNER_LOOP (res) = 0; >>> - DDR_SELF_REFERENCE (res) = true; >>> - for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) >>> - { >>> - struct subscript *subscript; >>> + DDR_ARE_DEPENDENT (res) = chrec_dont_know; >>> + return res; >>> + } >>> + >>> + /* For unconstrained bases, the outer (highest-index) subscript >>> + describes a variation in the base of the original DR_REF rather >>> + than a component access. We have no type that accurately describes >>> + the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after* >>> + applying the outer subscript) so limit the search to the last real >>> + component access. >>> + >>> + E.g. for: >>> >>> - subscript = XNEW (struct subscript); >>> - SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); >>> - SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); >>> - SUB_LAST_CONFLICT (subscript) = chrec_dont_know; >>> - SUB_DISTANCE (subscript) = chrec_dont_know; >>> - DDR_SUBSCRIPTS (res).safe_push (subscript); >>> + void >>> + f (int a[][8], int b[][8]) >>> + { >>> + for (int i = 0; i < 8; ++i) >>> + a[i * 2][0] = b[i][0]; >>> } >>> - return res; >>> + >>> + the a and b accesses have a single ARRAY_REF component reference [0] >>> + but have two subscripts. */ >>> + if (DR_UNCONSTRAINED_BASE (a)) >>> + num_dimensions_a -= 1; >>> + if (DR_UNCONSTRAINED_BASE (b)) >>> + num_dimensions_b -= 1; >>> + >>> + /* Now look for two sequences of component references that have the same >>> + type in both A and B. The first sequence includes an arbitrary >>> mixture >>> + of array and structure references while the second always ends on a >>> + structure reference. >>> + >>> + The former (arbitrary) sequence uses access functions: >>> + >>> + [START_A, START_A + NUM_DIMENSIONS) of A >>> + [START_B, START_B + NUM_DIMENSIONS) of B >>> + >>> + The latter sequence uses access functions: >>> + >>> + [STRUCT_START_A, STRUCT_START_A + STRUCT_NUM_DIMENSIONS) of A >>> + [STRUCT_START_B, STRUCT_START_B + STRUCT_NUM_DIMENSIONS) of B >>> + >>> + STRUCT_REF_A and STRUCT_REF_B are the outer references for the >> IIUC, A and B always share the same latter sequence, and the common >> latter sequence ends at a structure reference providing alias >> information. > > The A and B accesses aren't necessarily the same, they just have the > compatible types. E.g. for: > > struct s { int x[8]; int y[8]; } *a, *b; > > ... a->x[0] = b->y[1] ... > > the sequence would include: > > a: [0] .x > b: [1] .y I see. > >> Is it possible to record the the former arbitrary >> references instead of simple flag DDR_COULD_BE_INDEPENDENT_P. With >> this information, alias check can be simplified by stripping away >> address computation for the shared common sub-sequence. I doubt >> vect_create_cond_for_alias_checks could detect this kind CSE for now. >> Ah, I see you changed alias check code generation in order to handle >> this. > > The num_dimensions sequence is only used if it ends at the original > base and if the bases are equal. In other cases it doesn't really help. > The struct_num_dimensions sequence is meant to be the one that is > helpful even when the bases aren't equal. > > Like you say, there's a follow-on patch that uses this for runtime > alias checking. > >>> + latter sequence. */ >>> + unsigned int start_a = 0; >>> + unsigned int start_b = 0; >>> + unsigned int num_dimensions = 0; >>> + unsigned int struct_start_a = 0; >>> + unsigned int struct_start_b = 0; >>> + unsigned int struct_num_dimensions = 0; >>> + unsigned int index_a = 0; >>> + unsigned int index_b = 0; >>> + tree next_ref_a = DR_REF (a); >>> + tree next_ref_b = DR_REF (b); >>> + tree struct_ref_a = NULL_TREE; >>> + tree struct_ref_b = NULL_TREE; >>> + while (index_a < num_dimensions_a && index_b < num_dimensions_b) >>> + { >>> + gcc_checking_assert (handled_component_p (next_ref_a)); >>> + gcc_checking_assert (handled_component_p (next_ref_b)); >>> + tree outer_ref_a = TREE_OPERAND (next_ref_a, 0); >>> + tree outer_ref_b = TREE_OPERAND (next_ref_b, 0); >>> + tree type_a = TREE_TYPE (outer_ref_a); >>> + tree type_b = TREE_TYPE (outer_ref_b); >>> + if (types_compatible_p (type_a, type_b)) >>> + { >>> + /* This pair of accesses belong to a suitable sequence. */ >>> + if (start_a + num_dimensions != index_a >>> + || start_b + num_dimensions != index_b) >>> + { >>> + /* Start a new sequence here. */ >>> + start_a = index_a; >>> + start_b = index_b; >>> + num_dimensions = 0; >>> + } >>> + num_dimensions += 1; >>> + if (TREE_CODE (type_a) == RECORD_TYPE) >>> + { >>> + struct_start_a = start_a; >>> + struct_start_b = start_b; >>> + struct_num_dimensions = num_dimensions; >>> + struct_ref_a = outer_ref_a; >>> + struct_ref_b = outer_ref_b; >>> + } >>> + next_ref_a = outer_ref_a; >>> + next_ref_b = outer_ref_b; >>> + index_a += 1; >>> + index_b += 1; >>> + continue; >>> + } >>> + /* Try to approach equal type sizes. */ >>> + if (!COMPLETE_TYPE_P (type_a) >>> + || !COMPLETE_TYPE_P (type_b) >>> + || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a)) >>> + || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b))) >>> + break; >>> + unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT >>> (type_a)); >>> + unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT >>> (type_b)); >>> + if (size_a <= size_b) >>> + { >>> + index_a += 1; >>> + next_ref_a = outer_ref_a; >>> + } >>> + if (size_b <= size_a) >>> + { >>> + index_b += 1; >>> + next_ref_b = outer_ref_b; >>> + } >>> } >>> >>> - /* If the references do not access the same object, we do not know >>> - whether they alias or not. We do not care about TBAA or alignment >>> - info so we can use OEP_ADDRESS_OF to avoid false negatives. >>> - But the accesses have to use compatible types as otherwise the >>> - built indices would not match. */ >>> - if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), >> OEP_ADDRESS_OF) >>> - || !types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (a)), >>> - TREE_TYPE (DR_BASE_OBJECT (b)))) >>> + /* See whether the sequence ends at the base and whether the two bases >>> + are equal. We do not care about TBAA or alignment info so we can use >>> + OEP_ADDRESS_OF to avoid false negatives. */ >>> + tree base_a = DR_BASE_OBJECT (a); >>> + tree base_b = DR_BASE_OBJECT (b); >>> + bool same_base_p = (start_a + num_dimensions == num_dimensions_a >>> + && start_b + num_dimensions == num_dimensions_b >>> + && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b) >>> + && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF) >>> + && types_compatible_p (TREE_TYPE (base_a), >>> + TREE_TYPE (base_b)) >>> + && (!loop_nest.exists () >>> + || (object_address_invariant_in_loop_p >>> + (loop_nest[0], base_a)))); >> Major change is in function initialize_data_dependence_relation in >> order to detect partial alias opportunity. The original equality >> check on DR_BASE_OBJECT is bypassed now. IMHO better to introduce a >> new parameter to compute_data_reference_for_loop etc., indicating >> whether we want to handle partial alias opportunity or not. After >> all, such computation is unnecessary for predcom/prefetch/parloop. >> It's only a waste of time computing it. > > Well, it also means that we can now prove the accesses are independent > in more cases. E.g. previously we would assume the a and b accesses in: Predcom cares about dependent refs with constant distance, so independent (neither possible dependent) information based on partial alias is not interested. > > struct s { int x[16]; } *a, *b; > for (int i = 0; i < 8; ++i) > a->x[i] = b->x[i + 8]; > > could conflict. > > If callers don't need to know what the relationship between a and b is, > I think they should check for that before going through the process of > initialising and analysing the ddr. This I don't think so. Users don't have the information to pre-check/analyze reference pair. Even it can do that by repeating most work as in data-ref-analyzer, it sounds not a good practice. That's exactly analyzer's job and the reason why interfaces like compute_data_dependence_for_loop are introduced. It doesn't make much sense requiring users to do additional analysis before looking for help from data-ref-analyzer. Thanks, bin > >>> + >>> + /* If the bases are the same, we can include the base variation too. >>> + E.g. the b accesses in: >>> + >>> + for (int i = 0; i < n; ++i) >>> + b[i + 4][0] = b[i][0]; >>> + >>> + have a definite dependence distance of 4, while for: >>> + >>> + for (int i = 0; i < n; ++i) >>> + a[i + 4][0] = b[i][0]; >>> + >>> + the dependence distance depends on the gap between a and b. >>> + >>> + If the bases are different then we can only rely on the sequence >>> + rooted at a structure access, since arrays are allowed to overlap >>> + arbitrarily and change shape arbitrarily. E.g. we treat this as >>> + valid code: >>> + >>> + int a[256]; >>> + ... >>> + ((int (*)[4][3])&a[1])[i][0] += ((int (*)[4][3])&a[2])[i][0]; >>> + >>> + where two lvalues with the same int[4][3] type overlap, and where >>> + both lvalues are distinct from the object's declared type. */ >>> + if (same_base_p) >>> { >>> - DDR_ARE_DEPENDENT (res) = chrec_dont_know; >>> - return res; >>> + if (DR_UNCONSTRAINED_BASE (a)) >>> + num_dimensions += 1; >>> + } >>> + else >>> + { >>> + start_a = struct_start_a; >>> + start_b = struct_start_b; >>> + num_dimensions = struct_num_dimensions; >>> } >>> >>> - /* If the base of the object is not invariant in the loop nest, we cannot >>> - analyze it. TODO -- in fact, it would suffice to record that there >>> may >>> - be arbitrary dependences in the loops where the base object varies. >>> */ >>> - if ((loop_nest.exists () >>> - && !object_address_invariant_in_loop_p (loop_nest[0], DR_BASE_OBJECT >> (a))) >>> - || DR_NUM_DIMENSIONS (a) == 0) >>> + /* Punt if we didn't find a suitable sequence. */ >>> + if (num_dimensions == 0) >>> { >>> DDR_ARE_DEPENDENT (res) = chrec_dont_know; >>> return res; >>> } >>> >>> - /* If the number of dimensions of the access to not agree we can have >>> - a pointer access to a component of the array element type and an >>> - array access while the base-objects are still the same. Punt. */ >>> - if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b)) >>> + if (!same_base_p) >>> { >>> - DDR_ARE_DEPENDENT (res) = chrec_dont_know; >>> - return res; >>> + /* Partial overlap is possible for different bases when strict >>> aliasing >>> + is not in effect. It's also possible if either base involves a >>> union >>> + access; e.g. for: >>> + >>> + struct s1 { int a[2]; }; >>> + struct s2 { struct s1 b; int c; }; >>> + struct s3 { int d; struct s1 e; }; >>> + union u { struct s2 f; struct s3 g; } *p, *q; >>> + >>> + the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at >>> + "p->g.e" (base "p->g") and might partially overlap the s1 at >>> + "q->g.e" (base "q->g"). */ >>> + if (!flag_strict_aliasing >>> + || ref_contains_union_access_p (struct_ref_a) >>> + || ref_contains_union_access_p (struct_ref_b)) >>> + { >>> + DDR_ARE_DEPENDENT (res) = chrec_dont_know; >>> + return res; >>> + } >>> + >>> + DDR_COULD_BE_INDEPENDENT_P (res) = true; >>> } >>> >>> DDR_AFFINE_P (res) = true; >>> DDR_ARE_DEPENDENT (res) = NULL_TREE; >>> - DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a)); >>> + DDR_SUBSCRIPTS (res).create (num_dimensions); >>> DDR_LOOP_NEST (res) = loop_nest; >>> DDR_INNER_LOOP (res) = 0; >>> DDR_SELF_REFERENCE (res) = false; >>> >>> - for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) >>> + for (i = 0; i < num_dimensions; ++i) >>> { >>> struct subscript *subscript; >>> >>> subscript = XNEW (struct subscript); >>> + SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, start_a + i); >>> + SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, start_b + i); >>> SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); >>> SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); >>> SUB_LAST_CONFLICT (subscript) = chrec_dont_know; >>> @@ -3163,14 +3312,15 @@ add_outer_distances (struct data_depende >>> } >>> >>> /* Return false when fail to represent the data dependence as a >>> - distance vector. INIT_B is set to true when a component has been >>> + distance vector. A_INDEX is the index of the first reference >>> + (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the >>> + second reference. INIT_B is set to true when a component has been >>> added to the distance vector DIST_V. INDEX_CARRY is then set to >>> the index in DIST_V that carries the dependence. */ >>> >>> static bool >>> build_classic_dist_vector_1 (struct data_dependence_relation *ddr, >>> - struct data_reference *ddr_a, >>> - struct data_reference *ddr_b, >>> + unsigned int a_index, unsigned int b_index, >>> lambda_vector dist_v, bool *init_b, >>> int *index_carry) >>> { >>> @@ -3188,8 +3338,8 @@ build_classic_dist_vector_1 (struct data >>> return false; >>> } >>> >>> - access_fn_a = DR_ACCESS_FN (ddr_a, i); >>> - access_fn_b = DR_ACCESS_FN (ddr_b, i); >>> + access_fn_a = SUB_ACCESS_FN (subscript, a_index); >>> + access_fn_b = SUB_ACCESS_FN (subscript, b_index); >>> >>> if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC >>> && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) >>> @@ -3249,10 +3399,11 @@ build_classic_dist_vector_1 (struct data >>> constant_access_functions (const struct data_dependence_relation *ddr) >>> { >>> unsigned i; >>> + subscript *sub; >>> >>> - for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) >>> - if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i)) >>> - || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), >>> i))) >>> + FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) >>> + if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0)) >>> + || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1))) >>> return false; >>> >>> return true; >>> @@ -3315,10 +3466,11 @@ add_other_self_distances (struct data_de >>> lambda_vector dist_v; >>> unsigned i; >>> int index_carry = DDR_NB_LOOPS (ddr); >>> + subscript *sub; >>> >>> - for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) >>> + FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) >>> { >>> - tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i); >>> + tree access_fun = SUB_ACCESS_FN (sub, 0); >>> >>> if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC) >>> { >>> @@ -3330,7 +3482,7 @@ add_other_self_distances (struct data_de >>> return; >>> } >>> >>> - access_fun = DR_ACCESS_FN (DDR_A (ddr), 0); >>> + access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0); >>> >>> if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC) >>> add_multivariate_self_dist (ddr, access_fun); >>> @@ -3401,6 +3553,23 @@ add_distance_for_zero_overlaps (struct d >>> } >>> } >>> >>> +/* Return true when the DDR contains two data references that have the >>> + same access functions. */ >>> + >>> +static inline bool >>> +same_access_functions (const struct data_dependence_relation *ddr) >>> +{ >>> + unsigned i; >>> + subscript *sub; >>> + >>> + FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) >>> + if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0), >>> + SUB_ACCESS_FN (sub, 1))) >>> + return false; >>> + >>> + return true; >>> +} >>> + >>> /* Compute the classic per loop distance vector. DDR is the data >>> dependence relation to build a vector from. Return false when fail >>> to represent the data dependence as a distance vector. */ >>> @@ -3432,8 +3601,7 @@ build_classic_dist_vector (struct data_d >>> } >>> >>> dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); >>> - if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr), >>> - dist_v, &init_b, &index_carry)) >>> + if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, >> &index_carry)) >>> return false; >>> >>> /* Save the distance vector if we initialized one. */ >>> @@ -3466,12 +3634,11 @@ build_classic_dist_vector (struct data_d >>> if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr))) >>> { >>> lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); >>> - if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr), >>> - loop_nest)) >>> + if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest)) >>> return false; >>> compute_subscript_distance (ddr); >>> - if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr), >>> - save_v, &init_b, &index_carry)) >>> + if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b, >>> + &index_carry)) >>> return false; >>> save_dist_v (ddr, save_v); >>> DDR_REVERSED_P (ddr) = true; >>> @@ -3507,12 +3674,10 @@ build_classic_dist_vector (struct data_d >>> { >>> lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); >>> >>> - if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), >>> - DDR_A (ddr), loop_nest)) >>> + if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest)) >>> return false; >>> compute_subscript_distance (ddr); >>> - if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A >>> (ddr), >>> - opposite_v, &init_b, >>> + if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b, >>> &index_carry)) >>> return false; >>> >>> @@ -3591,13 +3756,13 @@ build_classic_dir_vector (struct data_de >>> } >>> } >>> >>> -/* Helper function. Returns true when there is a dependence between >>> - data references DRA and DRB. */ >>> +/* Helper function. Returns true when there is a dependence between the >>> + data references. A_INDEX is the index of the first reference (0 for >>> + DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. >>> */ >>> >>> static bool >>> subscript_dependence_tester_1 (struct data_dependence_relation *ddr, >>> - struct data_reference *dra, >>> - struct data_reference *drb, >>> + unsigned int a_index, unsigned int b_index, >>> struct loop *loop_nest) >>> { >>> unsigned int i; >>> @@ -3609,8 +3774,8 @@ subscript_dependence_tester_1 (struct da >>> { >>> conflict_function *overlaps_a, *overlaps_b; >>> >>> - analyze_overlapping_iterations (DR_ACCESS_FN (dra, i), >>> - DR_ACCESS_FN (drb, i), >>> + analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index), >>> + SUB_ACCESS_FN (subscript, b_index), >>> &overlaps_a, &overlaps_b, >>> &last_conflicts, loop_nest); >>> >>> @@ -3659,7 +3824,7 @@ subscript_dependence_tester_1 (struct da >>> subscript_dependence_tester (struct data_dependence_relation *ddr, >>> struct loop *loop_nest) >>> { >>> - if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), >> loop_nest)) >>> + if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest)) >>> dependence_stats.num_dependence_dependent++; >>> >>> compute_subscript_distance (ddr); >>> Index: gcc/tree-ssa-loop-prefetch.c >>> =================================================================== >>> --- gcc/tree-ssa-loop-prefetch.c 2017-03-28 16:19:28.000000000 +0100 >>> +++ gcc/tree-ssa-loop-prefetch.c 2017-05-03 08:48:48.737038502 +0100 >>> @@ -1650,6 +1650,7 @@ determine_loop_nest_reuse (struct loop * >>> refb = (struct mem_ref *) DDR_B (dep)->aux; >>> >>> if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know >>> + || DDR_COULD_BE_INDEPENDENT_P (dep) >>> || DDR_NUM_DIST_VECTS (dep) == 0) >>> { >>> /* If the dependence cannot be analyzed, assume that there might >>> be >> As said, we could avoid computing such information in the first place. >> I can see predcom ingores it by explicitly checking DR_BASE_OBJECT, >> what about tree-parloops.c? > > For parloops, it should help that we can now prove lack of dependence > in more cases. > > Thanks, > Richard