[PATCH v2 1/3] Move prepare_decl_rtl to expr.[ch] as extern
From: Kewen Lin Previous version link: https://gcc.gnu.org/ml/gcc-patches/2019-04/msg00912.html This is a NFC (no functional change) patch. Ivopts has some codes to expand gimple to RTL seq, but before call expanding, we should call a preparation funciton prepare_decl_rtl. This patch is to change it and its dependents to non-static, can be shared with other passes. Bootstrapped and regression testing passed on powerpc64le. Is OK for trunk? gcc/ChangeLog 2019-05-13 Kewen Lin PR middle-end/80791 * expr.c (produce_memory_decl_rtl): New function. (prepare_decl_rtl): Likewise. * expr.h (produce_memory_decl_rtl): New declaration. (prepare_decl_rtl): Likewise. * tree-ssa-loop-ivopts.c (produce_memory_decl_rtl): Remove. (prepare_decl_rtl): Likewise. (computation_cost): Updated to call refactored prepare_decl_rtl. --- gcc/expr.c | 91 + gcc/expr.h | 16 +++- gcc/tree-ssa-loop-ivopts.c | 93 ++ 3 files changed, 110 insertions(+), 90 deletions(-) diff --git a/gcc/expr.c b/gcc/expr.c index 9ff5e5f..1f2ad45 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -12539,3 +12539,94 @@ int_expr_size (tree exp) return tree_to_shwi (size); } + +/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ + +rtx +produce_memory_decl_rtl (tree obj, int *regno) +{ + addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); + machine_mode address_mode = targetm.addr_space.address_mode (as); + rtx x; + + gcc_assert (obj); + if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) +{ + const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); + x = gen_rtx_SYMBOL_REF (address_mode, name); + SET_SYMBOL_REF_DECL (x, obj); + x = gen_rtx_MEM (DECL_MODE (obj), x); + set_mem_addr_space (x, as); + targetm.encode_section_info (obj, x, true); +} + else +{ + x = gen_raw_REG (address_mode, (*regno)++); + x = gen_rtx_MEM (DECL_MODE (obj), x); + set_mem_addr_space (x, as); +} + + return x; +} + +/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for + walk_tree. DATA contains the actual fake register number. */ + +tree +prepare_decl_rtl (tree *expr_p, int *ws, void *data) +{ + tree obj = NULL_TREE; + rtx x = NULL_RTX; + decl_rtl_data *info = (decl_rtl_data *) data; + int *regno = info->regno; + vec *treevec = info->treevec; + + switch (TREE_CODE (*expr_p)) +{ +case ADDR_EXPR: + for (expr_p = &TREE_OPERAND (*expr_p, 0); handled_component_p (*expr_p); + expr_p = &TREE_OPERAND (*expr_p, 0)) + continue; + obj = *expr_p; + if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj)) + x = produce_memory_decl_rtl (obj, regno); + break; + +case SSA_NAME: + *ws = 0; + obj = SSA_NAME_VAR (*expr_p); + /* Defer handling of anonymous SSA_NAMEs to the expander. */ + if (!obj) + return NULL_TREE; + if (!DECL_RTL_SET_P (obj)) + x = gen_raw_REG (DECL_MODE (obj), (*regno)++); + break; + +case VAR_DECL: +case PARM_DECL: +case RESULT_DECL: + *ws = 0; + obj = *expr_p; + + if (DECL_RTL_SET_P (obj)) + break; + + if (DECL_MODE (obj) == BLKmode) + x = produce_memory_decl_rtl (obj, regno); + else + x = gen_raw_REG (DECL_MODE (obj), (*regno)++); + + break; + +default: + break; +} + + if (x) +{ + treevec->safe_push (obj); + SET_DECL_RTL (obj, x); +} + + return NULL_TREE; +} diff --git a/gcc/expr.h b/gcc/expr.h index 17c3962..b1894a6b 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -53,7 +53,21 @@ typedef struct separate_ops tree type; tree op0, op1, op2; } *sepops; - + +/* This structure is used to pass information to tree walker function + prepare_decl_rtl. */ +typedef struct data_for_decl_rtl +{ + int *regno; + vec *treevec; +} decl_rtl_data; + +/* Produce decl_rtl for object so it looks like it is stored in memory. */ +rtx produce_memory_decl_rtl (tree, int *); + +/* Prepares decl_rtl for variables referred. Callback for walk_tree. */ +tree prepare_decl_rtl (tree *, int *, void *); + /* This is run during target initialization to set up which modes can be used directly in memory and to initialize the block move optab. */ extern void init_expr_target (void); diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index a44b4cb..885c8e8 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -3687,94 +3687,6 @@ get_group_iv_cost (struct ivopts_data *data, struct iv_group *group, return NULL; } -/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ -static rtx -produce_memory_decl_rtl (tree obj, int *regno) -{ - addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); - machine_mode address_mode = targ
[PATCH v2 2/3] Add predict_doloop_p target hook
From: Kewen Lin Previous version link for background: https://gcc.gnu.org/ml/gcc-patches/2019-04/msg00912.html This hook is to predict whether one loop in gimple will be transformed to low-overhead loop later in RTL, and designed to be called in ivopts. "Since the low-overhead loop optimize transformation is based on RTL, some of those checks are hard to be imitated on gimple, so it's not possible to predict the current loop will be transformed exactly in middle-end. But if we can have most loop predicted precisely, it would be helpful. It highly depends on target hook fine tuning. It's acceptable to have some loops which can be transformed to low-overhead loop but we don't catch. But we should try our best to avoid to predict some loop as low-overhead loop but which isn't." Bootstrapped and regression testing passed on powerpc64le. Is it ok for trunk? gcc/ChangeLog 2019-05-13 Kewen Lin PR middle-end/80791 * target.def (predict_doloop_p): New hook. * targhooks.h (default_predict_doloop_p): New declaration. * targhooks.c (default_predict_doloop_p): New function. * doc/tm.texi.in (TARGET_PREDICT_DOLOOP_P): New hook. * doc/tm.texi: Regenerate. * config/rs6000/rs6000.c (invalid_insn_for_doloop_p): New function. (costly_iter_for_doloop_p): Likewise. (rs6000_predict_doloop_p): Likewise. (TARGET_PREDICT_DOLOOP_P): New macro. --- gcc/config/rs6000/rs6000.c | 174 - gcc/doc/tm.texi| 8 +++ gcc/doc/tm.texi.in | 2 + gcc/target.def | 9 +++ gcc/targhooks.c| 13 gcc/targhooks.h| 1 + 6 files changed, 206 insertions(+), 1 deletion(-) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index a21f4f7..1c1c8eb 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -83,6 +83,9 @@ #include "tree-ssa-propagate.h" #include "tree-vrp.h" #include "tree-ssanames.h" +#include "tree-ssa-loop-niter.h" +#include "tree-cfg.h" +#include "tree-scalar-evolution.h" /* This file should be included last. */ #include "target-def.h" @@ -1914,6 +1917,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost +#undef TARGET_PREDICT_DOLOOP_P +#define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p + #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv @@ -39436,7 +39442,173 @@ rs6000_mangle_decl_assembler_name (tree decl, tree id) return id; } - +/* Check whether there are some instructions preventing doloop transformation + inside loop body, mainly for instructions which are possible to kill CTR. + + Return true if some invalid insn exits, otherwise return false. */ + +static bool +invalid_insn_for_doloop_p (struct loop *loop) +{ + basic_block *body = get_loop_body (loop); + gimple_stmt_iterator gsi; + + for (unsigned i = 0; i < loop->num_nodes; i++) +for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt) && !gimple_call_internal_p (stmt) + && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "predict doloop failure due to finding call.\n"); + return true; + } + if (computed_goto_p (stmt)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "predict doloop failure due to" + "finding computed jump.\n"); + return true; + } + /* TODO: Now this hook is expected to be called in ivopts, which is + before switchlower1/switchlower2. Checking for SWITCH at this point + will eliminate some good candidates. But since there are only a few + cases having _a_ switch statement in the innermost loop, it can be a low + priority enhancement. */ + + if (gimple_code (stmt) == GIMPLE_SWITCH) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "predict doloop failure due to finding switch.\n"); + return true; + } + } + + return false; +} + +/* Check whether number of iteration computation is too costly for doloop + transformation. It expands the gimple sequence to equivalent RTL insn + sequence, then evaluate the cost. + + Return true if it's costly, otherwise return false. */ + +static bool +costly_iter_for_doloop_p (struct loop *loop, tree niters) +{ + tree type = TREE_TYPE (niters); + unsigned cost = 0, i; + tree obj; + bool speed = optimize_loop_for_speed_p (loop); + int regno = LAST_VIRTUAL_REGISTER + 1; + vec tvec;
[PATCH v2 3/3] Consider doloop cmp use in ivopts
From: Kewen Lin Previous version link for background: https://gcc.gnu.org/ml/gcc-patches/2019-04/msg00912.html Firstly, it's to call predict_doloop_p hook to check this loop will be transformed to doloop or not, if yes, find the expected comp iv use and its dependent original iv, set the iv candidate as bind_cand of the group. In following candidate selection process, we will bypass the group with bind_cand, since we don't want to affect global decision making for an iv use which will be eliminated eventually. At the time of iv candidate set finalization, we will fill the cost pair for the group with bind_cand. If the bind_cand is already in the final set, then just use it. Otherwise, we can check whether one of existing final set is better and fill with that if so. Bootstrapped and regression testing passed on powerpc64le. Is it ok for trunk? gcc/ChangeLog 2019-05-14 Kewen Lin PR middle-end/80791 * tree-ssa-loop-ivopts.c (tree_ssa_iv_optimize_loop): Call predict_doloop_p hook and bind_cand_for_doloop_uses. (bind_cand_for_doloop_uses): New function. (find_optimal_iv_set): Call handle_groups_with_bind_cand. (handle_groups_with_bind_cand): New function. (record_group): Init bind_cand. (set_group_iv_cost): Consider bind_cand group. (iv_ca_dump): Add dump for bind_cand. (try_add_cand_for): Bypass bind_cand group. (iv_ca_extend): Likewise. (iv_ca_narrow): Likewise. (iv_ca_replace): Likewise. gcc/testsuite/ChangeLog 2019-05-14 Kewen Lin PR middle-end/80791 * gcc.dg/tree-ssa/ivopts-lt.c : Adjust. --- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c | 7 +- gcc/tree-ssa-loop-ivopts.c| 155 +- 2 files changed, 156 insertions(+), 6 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c index 171c85a..f61288c 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c @@ -17,6 +17,7 @@ f1 (char *p, uintptr_t i, uintptr_t n) while (i < n); } -/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" } } */ -/* { dg-final { scan-tree-dump-times "PHI vuses; }; @@ -1592,6 +1594,7 @@ record_group (struct ivopts_data *data, enum use_type type) group->type = type; group->related_cands = BITMAP_ALLOC (NULL); group->vuses.create (1); + group->bind_cand = NULL; data->vgroups.safe_push (group); return group; @@ -3612,7 +3615,9 @@ set_group_iv_cost (struct ivopts_data *data, { unsigned i, s; - if (cost.infinite_cost_p ()) + gcc_assert (cand); + /* For the group with bind_cand, make it always have cost pair. */ + if (cost.infinite_cost_p () && group->bind_cand != cand) { BITMAP_FREE (inv_vars); BITMAP_FREE (inv_exprs); @@ -6067,7 +6072,8 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) group->id, cp->cand->id, cp->cost.cost, cp->cost.complexity); else - fprintf (file, " group:%d --> ??\n", group->id); + fprintf (file, " group:%d --> ?? %s\n", group->id, +group->bind_cand ? "(bind)" : ""); } const char *pref = ""; @@ -6110,6 +6116,9 @@ iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, for (i = 0; i < ivs->upto; i++) { group = data->vgroups[i]; + /* Ignore groups binded with some cand. */ + if (group->bind_cand) + continue; old_cp = iv_ca_cand_for_group (ivs, group); if (old_cp @@ -6165,7 +6174,9 @@ iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, for (i = 0; i < data->vgroups.length (); i++) { group = data->vgroups[i]; - + /* Ignore groups binded with some cand. */ + if (group->bind_cand) + continue; old_cp = iv_ca_cand_for_group (ivs, group); if (old_cp->cand != cand) continue; @@ -6348,6 +6359,9 @@ iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs, for (j = 0; j < ivs->upto; j++) { struct iv_group *group = data->vgroups[j]; + /* Ignore groups binded with some cand. */ + if (group->bind_cand) + continue; old_cp = iv_ca_cand_for_group (ivs, group); if (old_cp->cand != cand) @@ -6406,6 +6420,15 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, struct iv_ca_delta *best_delta = NULL, *act_delta; struct cost_pair *cp; + /* Bypass the candidate selection for the groups with bind_cand, but need + to keep upto up to date, to avoid the count of visited groups becomes + inconsistent in futher handlings. */ + if (group->bind_cand) +{ + ivs->upto++; + return true; +} + iv_ca_add_group (data, ivs, group); best_cost = iv_ca_cost (ivs); cp = iv_ca_cand_for_group (ivs, group); @@ -6635,6 +6658,59 @@ find_optimal_iv_set_1 (struct ivopts_data *data, b
[PATCH v3 2/3] Add predict_doloop_p target hook
From: Kewen Lin Hi, Previous version link: https://gcc.gnu.org/ml/gcc-patches/2019-05/msg00654.html Comparing with the previous version, I moved the generic parts of rs6000 target hook to IVOPTs. But I still kept the target hook as previous which checks some target specific criteria like innermost, max iteration counts etc, and checks for invalid stmt in loop. The reason I decided not to move this part to generic is they are not generic enough. 1) For the part of target specific criteria, if we want to put it in generic, we can call the hook targetm.can_use_doloop_p, which requires us to prepare those four parameters, but each target only needs one or two parameters, it means we will evaluate some things which aren't required for that target. So I'd like to leave this part to target hook. 2) For the other part of target invalid stmt check, as the hook invalid_within_doloop grep data shows, no all targets need to check whether invalid instructions exist in doloop. If we scan all stmts as generic, it can waste time for those targets which don't need to check. Besides, the scope of the current check on SWITCH in rs6000 hook is wide, later if we want it more exact, we may need to check more stmts instead of single. To let target hook scan the BBs/stmts by itself is also more flexible. Bootstrapped and regression testing ongoing on powerpc64le. Any more comments? gcc/ChangeLog 2019-05-17 Kewen Lin PR middle-end/80791 * target.def (predict_doloop_p): New hook. * targhooks.h (default_predict_doloop_p): New declaration. * targhooks.c (default_predict_doloop_p): New function. * doc/tm.texi.in (TARGET_PREDICT_DOLOOP_P): New hook. * doc/tm.texi: Regenerate. * config/rs6000/rs6000.c (invalid_insn_for_doloop_p): New function. (rs6000_predict_doloop_p): Likewise. (TARGET_PREDICT_DOLOOP_P): New macro. * tree-ssa-loop-ivopts.c (generic_predict_doloop_p): New function. (costly_iter_for_doloop_p): Likewise. --- gcc/config/rs6000/rs6000.c | 79 +- gcc/doc/tm.texi| 8 gcc/doc/tm.texi.in | 2 + gcc/target.def | 9 gcc/targhooks.c| 13 ++ gcc/targhooks.h| 1 + gcc/tree-ssa-loop-ivopts.c | 105 + 7 files changed, 216 insertions(+), 1 deletion(-) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index a21f4f7..2fd52d7 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -83,6 +83,7 @@ #include "tree-ssa-propagate.h" #include "tree-vrp.h" #include "tree-ssanames.h" +#include "tree-cfg.h" /* This file should be included last. */ #include "target-def.h" @@ -1914,6 +1915,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost +#undef TARGET_PREDICT_DOLOOP_P +#define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p + #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv @@ -39436,7 +39440,80 @@ rs6000_mangle_decl_assembler_name (tree decl, tree id) return id; } - +/* Check whether there are some instructions preventing doloop transformation + inside loop body, mainly for instructions which are possible to kill CTR. + + Return true if some invalid insn exits, otherwise return false. */ + +static bool +invalid_insn_for_doloop_p (struct loop *loop) +{ + basic_block *body = get_loop_body (loop); + gimple_stmt_iterator gsi; + + for (unsigned i = 0; i < loop->num_nodes; i++) +for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt) && !gimple_call_internal_p (stmt) + && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "predict doloop failure due to finding call.\n"); + return true; + } + if (computed_goto_p (stmt)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "predict doloop failure due to" + "finding computed jump.\n"); + return true; + } + + /* TODO: Now this hook is expected to be called in ivopts, which is + before switchlower1/switchlower2. Checking for SWITCH at this point + will eliminate some good candidates. But since there are only a few + cases having _a_ switch statement in the innermost loop, it can be a + low priority enhancement. */ + if (gimple_code (stmt) == GIMPLE_SWITCH) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "predict doloop failur