Hello Vineet: Thanks for your time and valuable comments.
On 21/10/23 5:26 am, Vineet Gupta wrote: > On 10/19/23 23:50, Ajit Agarwal wrote: >> Hello All: >> >> This version 9 of the patch uses abi interfaces to remove zero and sign >> extension elimination. >> Bootstrapped and regtested on powerpc-linux-gnu. >> >> In this version (version 9) of the patch following review comments are >> incorporated. >> >> a) Removal of hard code zero_extend and sign_extend in abi interfaces. >> b) Source and destination with different registers are considered. >> c) Further enhancements. >> d) Added sign extension elimination using abi interfaces. > > As has been trend in the past, I don't think all the review comments have > been addressed. > The standard practice is to reply to reviewer's email and say yay/nay > explicitly to each comment. Some of my comments in [1a] are still not > resolved, importantly the last two. To be fair you did reply [1b] but the > comments were not addressed explicitly. > > [1a] https://gcc.gnu.org/pipermail/gcc-patches/2023-September/630814.html > [1b] https://gcc.gnu.org/pipermail/gcc-patches/2023-September/630865.html > I have addressed the last 2 comments in the version 10 of the patch. Please let me know if there is anything missing. Regarding last comments with a providing different tests if you have any suggestions please let me know. > Anyhow I gave this a try for RISC-V, specially after [2a][2b] as I was > curious to see if this uncovers REE handling extraneous extensions which > could potentially be eliminated in Expand itself, which is generally better > as it happens earlier in the pipeline. > > [2a] 2023-10-16 8eb9cdd14218 expr: don't clear SUBREG_PROMOTED_VAR_P flag for > a promoted subreg [target/111466] > [2b] https://gcc.gnu.org/pipermail/gcc-patches/2023-October/631818.html > > Bad news is with the patch, we fail to even bootstrap risc-v, buckles over > when building libgcc itself. > > The reproducer is embarrassingly simple, build with -O2: > > float a; > b() { return a; } > > See details below.... > >> Thanks & Regards >> Ajit >> >> ree: Improve ree pass for rs6000 target using defined abi interfaces >> >> For rs6000 target we see redundant zero and sign extension and done >> to improve ree pass to eliminate such redundant zero and sign extension >> using defined ABI interfaces. >> >> 2023-10-20 Ajit Kumar Agarwal <aagar...@linux.ibm.com> >> >> gcc/ChangeLog: >> >> * ree.cc (combine_reaching_defs): Use of zero_extend and sign_extend >> defined abi interfaces. >> (add_removable_extension): Use of defined abi interfaces for no >> reaching defs. >> (abi_extension_candidate_return_reg_p): New function. >> (abi_extension_candidate_p): New function. >> (abi_extension_candidate_argno_p): New function. >> (abi_handle_regs): New function. >> (abi_target_promote_function_mode): New function. >> >> gcc/testsuite/ChangeLog: >> >> * g++.target/powerpc/zext-elim-3.C >> --- >> gcc/ree.cc | 151 +++++++++++++++++- >> .../g++.target/powerpc/zext-elim-3.C | 13 ++ >> 2 files changed, 161 insertions(+), 3 deletions(-) >> create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C >> >> diff --git a/gcc/ree.cc b/gcc/ree.cc >> index fc04249fa84..9f21f0e9907 100644 >> --- a/gcc/ree.cc >> +++ b/gcc/ree.cc >> @@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg) >> if (REGNO (DF_REF_REG (def)) == REGNO (reg)) >> break; >> - gcc_assert (def != NULL); >> + if (def == NULL) >> + return NULL; >> ref_chain = DF_REF_CHAIN (def); >> @@ -750,6 +751,124 @@ get_extended_src_reg (rtx src) >> return src; >> } >> +/* Return TRUE if target mode is equal to source mode of zero_extend >> + or sign_extend otherwise false. */ >> + >> +static bool >> +abi_target_promote_function_mode (machine_mode mode) >> +{ >> + int unsignedp; >> + machine_mode tgt_mode >> + = targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp, >> + NULL_TREE, 1); >> + >> + if (tgt_mode == mode) >> + return true; >> + else >> + return false; >> +} >> + >> +/* Return TRUE if the candidate insn is zero extend and regno is >> + a return registers. */ >> + >> +static bool >> +abi_extension_candidate_return_reg_p (/*rtx_insn *insn, */int regno) >> +{ >> + if (targetm.calls.function_value_regno_p (regno)) >> + return true; >> + >> + return false; >> +} >> + >> +/* Return TRUE if reg source operand of zero_extend is argument registers >> + and not return registers and source and destination operand are same >> + and mode of source and destination operand are not same. */ >> + >> +static bool >> +abi_extension_candidate_p (rtx_insn *insn) >> +{ >> + rtx set = single_set (insn); >> + machine_mode dst_mode = GET_MODE (SET_DEST (set)); >> + rtx orig_src = XEXP (SET_SRC (set), 0); >> + >> + if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src)) >> + || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src))) >> + return false; >> + >> + /* Mode of destination and source should be different. */ >> + if (dst_mode == GET_MODE (orig_src)) >> + return false; >> + >> + machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0)); >> + bool promote_p = abi_target_promote_function_mode (mode); >> + >> + /* REGNO of source and destination should be same if not >> + promoted. */ >> + if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src)) >> + return false; >> + >> + return true; >> +} >> + >> +/* Return TRUE if the candidate insn is zero extend and regno is >> + an argument registers. */ >> + >> +static bool >> +abi_extension_candidate_argno_p (/*rtx_code code, */int regno) >> +{ >> + if (FUNCTION_ARG_REGNO_P (regno)) >> + return true; >> + >> + return false; >> +} >> + >> +/* Return TRUE if the candidate insn doesn't have defs and have >> + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class. */ >> + >> +static bool >> +abi_handle_regs (rtx_insn *insn) >> +{ >> + if (side_effects_p (PATTERN (insn))) >> + return false; >> + >> + struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn))); >> + >> + if (!uses) >> + return false; >> + >> + for (df_link *use = uses; use; use = use->next) >> + { >> + if (!use->ref) >> + return false; >> + >> + if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref))) >> + return false; >> + >> + rtx_insn *use_insn = DF_REF_INSN (use->ref); >> + >> + if (GET_CODE (PATTERN (use_insn)) == SET) >> + { >> + rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn))); >> + >> + if (GET_RTX_CLASS (code) == RTX_BIN_ARITH >> + || GET_RTX_CLASS (code) == RTX_COMM_ARITH >> + || GET_RTX_CLASS (code) == RTX_UNARY) >> + return false; >> + } >> + } >> + >> + rtx set = single_set (insn); >> + >> + if (GET_CODE (SET_SRC (set)) == SIGN_EXTEND) >> + { >> + machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0)); >> + bool promote_p = abi_target_promote_function_mode (mode); >> + >> + return promote_p; >> + } >> + return true; >> +} >> + >> /* This function goes through all reaching defs of the source >> of the candidate for elimination (CAND) and tries to combine >> the extension with the definition instruction. The changes >> @@ -770,6 +889,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx >> set_pat, ext_state *state) >> state->defs_list.truncate (0); >> state->copies_list.truncate (0); >> + rtx orig_src = XEXP (SET_SRC (cand->expr),0); >> + >> + if (abi_extension_candidate_p (cand->insn) >> + && (!get_defs (cand->insn, orig_src, NULL))) >> + return abi_handle_regs (cand->insn); >> outcome = make_defs_and_copies_lists (cand->insn, set_pat, state); >> @@ -1036,6 +1160,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx >> set_pat, ext_state *state) >> } >> } >> + rtx insn_set = single_set (cand->insn); >> + >> + machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0))); >> + >> + bool promote_p = abi_target_promote_function_mode (mode); >> + >> + if (promote_p) >> + return true; >> + > > Is this early exit OK ? It skips a subsequent apply_change_group() call which > could potentially fail and thus extension would be undone. > And even for the passing case, we do want those instructions to be merged > normally. So I don't understand how this change is useful at all ? > > FWIW for my test, w/o the patch: apply_change_group would fail (likely > missing some combine pattern) and undo the merge. However w/ patch we just > return/continue, keeping the merged but invalid insn which ICE in next pass > cprop_hardreg as failure to recog that insn. > > Some more details in case you are curious: > > Coming into REE we have > > (insn 7 6 13 2 (set (reg:SI 10 a0 [136]) #DEF > (fix:SI (reg:SF 47 fa5 [orig:138 a ] [138]))) > {fix_truncsfsi2} > > (insn 13 7 14 2 (set (reg/i:DI 10 a0) # extension > (sign_extend:DI (reg:SI 10 a0 [136]))) > {extendsidi2} > > These are merged into unrecog insn as of now: > > (insn 7 6 14 2 (set (reg:DI 10 a0) > (sign_extend:DI (fix:SI (reg:SF 47 fa5 [orig:138 a ] [138])))) > Thanks for your input on this. I have addressed that in the version 10 of the patch please review. Thanks & Regards Ajit > Thx, > -Vineet > >> if (merge_successful) >> { >> /* Commit the changes here if possible >> @@ -1112,6 +1245,14 @@ add_removable_extension (const_rtx expr, rtx_insn >> *insn, >> rtx reg = XEXP (src, 0); >> struct df_link *defs, *def; >> ext_cand *cand; >> + defs = get_defs (insn, reg, NULL); >> + >> + if (!defs && abi_extension_candidate_argno_p (/*code,*/ REGNO (reg))) >> + { >> + ext_cand e = {expr, code, mode, insn}; >> + insn_list->safe_push (e); >> + return; >> + } >> /* Zero-extension of an undefined value is partly defined (it's >> completely undefined for sign-extension, though). So if there exists >> @@ -1131,7 +1272,6 @@ add_removable_extension (const_rtx expr, rtx_insn >> *insn, >> } >> /* Second, make sure we can get all the reaching definitions. */ >> - defs = get_defs (insn, reg, NULL); >> if (!defs) >> { >> if (dump_file) >> @@ -1321,7 +1461,8 @@ find_and_remove_re (void) >> && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)))) >> { >> reinsn_copy_list.safe_push (curr_cand->insn); >> - reinsn_copy_list.safe_push (state.defs_list[0]); >> + if (state.defs_list.length () != 0) >> + reinsn_copy_list.safe_push (state.defs_list[0]); >> } >> reinsn_del_list.safe_push (curr_cand->insn); >> state.modified[INSN_UID (curr_cand->insn)].deleted = 1; >> @@ -1345,6 +1486,10 @@ find_and_remove_re (void) >> for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2) >> { >> rtx_insn *curr_insn = reinsn_copy_list[i]; >> + >> + if ((i+1) >= reinsn_copy_list.length ()) >> + continue; >> + >> rtx_insn *def_insn = reinsn_copy_list[i + 1]; >> /* Use the mode of the destination of the defining insn >> diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C >> b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C >> new file mode 100644 >> index 00000000000..5a050df06ff >> --- /dev/null >> +++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C >> @@ -0,0 +1,13 @@ >> +/* { dg-options "-mcpu=power9 -O2" } */ >> + >> +void *memset(void *b, int c, unsigned long len) >> +{ >> + unsigned long i; >> + >> + for (i = 0; i < len; i++) >> + ((unsigned char *)b)[i] = c; >> + >> + return b; >> +} >> + >> +/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */ >