From: Mikael Morin <mik...@gcc.gnu.org> Continue the second set of loops where the first one stopped in the generated inline MINLOC/MAXLOC code in the cases where the generated code contains two sets of loops. This fixes a regression that was introduced when enabling the generation of inline MINLOC/MAXLOC code with ARRAY of rank greater than 1, no DIM argument, and either non-scalar MASK or floating- point ARRAY.
In the cases where two sets of loops are generated as inline MINLOC/MAXLOC code, we previously generated code such as (for rank 2 ARRAY, so with two levels of nesting): for (idx11 in lower1..upper1) { for (idx12 in lower2..upper2) { ... if (...) { ... goto second_loop; } } } second_loop: for (idx21 in lower1..upper1) { for (idx22 in lower2..upper2) { ... } } which means we process the first elements twice, once in the first set of loops and once in the second one. This change avoids this duplicate processing by using a conditional as lower bound for the second set of loops, generating code like: second_loop_entry = false; for (idx11 in lower1..upper1) { for (idx12 in lower2..upper2) { ... if (...) { ... second_loop_entry = true; goto second_loop; } } } second_loop: for (idx21 in (second_loop_entry ? idx11 : lower1)..upper1) { for (idx22 in (second_loop_entry ? idx12 : lower2)..upper2) { ... second_loop_entry = false; } } It was expected that the compiler optimizations would be able to remove the state variable second_loop_entry. It is the case if ARRAY has rank 1 (so without loop nesting), the variable is removed and the loop bounds become unconditional, which restores previously generated code, fully fixing the regression. For larger rank, unfortunately, the state variable and conditional loop bounds remain, but those cases were previously using library calls, so it's not a regression. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate a set of index variables. Set them using the loop indexes before leaving the first set of loops. Generate a new loop entry predicate. Initialize it. Set it before leaving the first set of loops. Clear it in the body of the second set of loops. For the second set of loops, update each loop lower bound to use the corresponding index variable if the predicate variable is set. --- gcc/fortran/trans-intrinsic.cc | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 3d29bcaf590..f490e795c02 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5371,6 +5371,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual) pos0 = 0; pos1 = 0; S1 = from1; + second_loop_entry = false; while (S1 <= to1) { S0 = from0; while (s0 <= to0 { @@ -5383,6 +5384,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual) limit = a[S1][S0]; pos0 = S0 + (1 - from0); pos1 = S1 + (1 - from1); + second_loop_entry = true; goto lab1; } } @@ -5392,9 +5394,9 @@ strip_kind_from_actual (gfc_actual_arglist * actual) } goto lab2; lab1:; - S1 = from1; + S1 = second_loop_entry ? S1 : from1; while (S1 <= to1) { - S0 = from0; + S0 = second_loop_entry ? S0 : from0; while (S0 <= to0) { if (mask[S1][S0]) if (a[S1][S0] < limit) { @@ -5402,6 +5404,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual) pos0 = S + (1 - from0); pos1 = S + (1 - from1); } + second_loop_entry = false; S0++; } S1++; @@ -5473,6 +5476,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_expr *backexpr; gfc_se backse; tree pos[GFC_MAX_DIMENSIONS]; + tree idx[GFC_MAX_DIMENSIONS]; tree result_var = NULL_TREE; int n; bool optional_mask; @@ -5554,6 +5558,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_get_string ("pos%d", i)); offset[i] = gfc_create_var (gfc_array_index_type, gfc_get_string ("offset%d", i)); + idx[i] = gfc_create_var (gfc_array_index_type, + gfc_get_string ("idx%d", i)); } /* Walk the arguments. */ @@ -5640,6 +5646,18 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_add_modify (&se->pre, limit, tmp); + /* If we are in a case where we generate two sets of loops, the second one + should continue where the first stopped instead of restarting from the + beginning. So nested loops in the second set should have a partial range + on the first iteration, but they should start from the beginning and span + their full range on the following iterations. So we use conditionals in + the loops lower bounds, and use the following variable in those + conditionals to decide whether to use the original loop bound or to use + the index at which the loop from the first set stopped. */ + tree second_loop_entry = gfc_create_var (logical_type_node, + "second_loop_entry"); + gfc_add_modify (&se->pre, second_loop_entry, logical_false_node); + /* Initialize the scalarizer. */ gfc_init_loopinfo (&loop); @@ -5783,8 +5801,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[i]), loop.loopvar[i], offset[i]); gfc_add_modify (&ifblock, pos[i], tmp); + gfc_add_modify (&ifblock, idx[i], loop.loopvar[i]); } + gfc_add_modify (&ifblock, second_loop_entry, logical_true_node); + if (lab1) gfc_add_expr_to_block (&ifblock, build1_v (GOTO_EXPR, lab1)); @@ -5847,6 +5868,12 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) if (lab1) { + for (int i = 0; i < loop.dimen; i++) + loop.from[i] = fold_build3_loc (input_location, COND_EXPR, + TREE_TYPE (loop.from[i]), + second_loop_entry, idx[i], + loop.from[i]); + gfc_trans_scalarized_loop_boundary (&loop, &body); stmtblock_t * const outer_block = &loop.code[loop.dimen - 1]; @@ -5952,7 +5979,9 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) } else tmp = gfc_finish_block (&block); + gfc_add_expr_to_block (&body, tmp); + gfc_add_modify (&body, second_loop_entry, logical_false_node); } gfc_trans_scalarizing_loops (&loop, &body); -- 2.43.0