HAO CHEN GUI <guih...@linux.ibm.com> writes: > Hi, > Vector mode instructions are efficient for compare on some targets. > This patch enables vector mode for compare_by_pieces. Two help > functions are added to check if vector mode is available for certain > by pieces operations and if if optabs exists for the mode and certain > by pieces operations. One member is added in class op_by_pieces_d to > record the type of operations. > > The test case is in the second patch which is rs6000 specific. > > Compared to last version, the main change is to add a target hook > check - scalar_mode_supported_p when retrieving the available scalar > modes. The mode which is not supported for a target should be skipped. > (e.g. TImode on ppc). Also some function names and comments are refined > according to reviewer's advice. > > Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no > regressions. > > Thanks > Gui Haochen > > ChangeLog > Expand: Enable vector mode for by pieces compares > > Vector mode compare instructions are efficient for equality compare on > rs6000. This patch refactors the codes of by pieces operation to enable > vector mode for compare. > > gcc/ > PR target/111449 > * expr.cc (can_use_qi_vectors): New function to return true if > we know how to implement OP using vectors of bytes. > (qi_vector_mode_supported_p): New function to check if optabs > exists for the mode and certain by pieces operations. > (widest_fixed_size_mode_for_size): Replace the second argument > with the type of by pieces operations. Call can_use_qi_vectors > and qi_vector_mode_supported_p to do the check. Call > scalar_mode_supported_p to check if the scalar mode is supported. > (by_pieces_ninsns): Pass the type of by pieces operation to > widest_fixed_size_mode_for_size. > (class op_by_pieces_d): Remove m_qi_vector_mode. Add m_op to > record the type of by pieces operations. > (op_by_pieces_d::op_by_pieces_d): Change last argument to the > type of by pieces operations, initialize m_op with it. Pass > m_op to function widest_fixed_size_mode_for_size. > (op_by_pieces_d::get_usable_mode): Pass m_op to function > widest_fixed_size_mode_for_size. > (op_by_pieces_d::smallest_fixed_size_mode_for_size): Call > can_use_qi_vectors and qi_vector_mode_supported_p to do the > check. > (op_by_pieces_d::run): Pass m_op to function > widest_fixed_size_mode_for_size. > (move_by_pieces_d::move_by_pieces_d): Set m_op to MOVE_BY_PIECES. > (store_by_pieces_d::store_by_pieces_d): Set m_op with the op. > (can_store_by_pieces): Pass the type of by pieces operations to > widest_fixed_size_mode_for_size. > (clear_by_pieces): Initialize class store_by_pieces_d with > CLEAR_BY_PIECES. > (compare_by_pieces_d::compare_by_pieces_d): Set m_op to > COMPARE_BY_PIECES.
OK, thanks. And thanks for your patience. Richard > patch.diff > diff --git a/gcc/expr.cc b/gcc/expr.cc > index 2c9930ec674..ad5f9dd8ec2 100644 > --- a/gcc/expr.cc > +++ b/gcc/expr.cc > @@ -988,18 +988,44 @@ alignment_for_piecewise_move (unsigned int max_pieces, > unsigned int align) > return align; > } > > -/* Return the widest QI vector, if QI_MODE is true, or integer mode > - that is narrower than SIZE bytes. */ > +/* Return true if we know how to implement OP using vectors of bytes. */ > +static bool > +can_use_qi_vectors (by_pieces_operation op) > +{ > + return (op == COMPARE_BY_PIECES > + || op == SET_BY_PIECES > + || op == CLEAR_BY_PIECES); > +} > + > +/* Return true if optabs exists for the mode and certain by pieces > + operations. */ > +static bool > +qi_vector_mode_supported_p (fixed_size_mode mode, by_pieces_operation op) > +{ > + if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) > + && optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing) > + return true; > + > + if (op == COMPARE_BY_PIECES > + && optab_handler (mov_optab, mode) != CODE_FOR_nothing > + && can_compare_p (EQ, mode, ccp_jump)) > + return true; > > + return false; > +} > + > +/* Return the widest mode that can be used to perform part of an > + operation OP on SIZE bytes. Try to use QI vector modes where > + possible. */ > static fixed_size_mode > -widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) > +widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op) > { > fixed_size_mode result = NARROWEST_INT_MODE; > > gcc_checking_assert (size > 1); > > /* Use QI vector only if size is wider than a WORD. */ > - if (qi_vector && size > UNITS_PER_WORD) > + if (can_use_qi_vectors (op) && size > UNITS_PER_WORD) > { > machine_mode mode; > fixed_size_mode candidate; > @@ -1009,8 +1035,7 @@ widest_fixed_size_mode_for_size (unsigned int size, > bool qi_vector) > { > if (GET_MODE_SIZE (candidate) >= size) > break; > - if (optab_handler (vec_duplicate_optab, candidate) > - != CODE_FOR_nothing) > + if (qi_vector_mode_supported_p (candidate, op)) > result = candidate; > } > > @@ -1019,9 +1044,14 @@ widest_fixed_size_mode_for_size (unsigned int size, > bool qi_vector) > } > > opt_scalar_int_mode tmode; > + scalar_int_mode mode; > FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) > - if (GET_MODE_SIZE (tmode.require ()) < size) > - result = tmode.require (); > + { > + mode = tmode.require (); > + if (GET_MODE_SIZE (mode) < size > + && targetm.scalar_mode_supported_p (mode)) > + result = mode; > + } > > return result; > } > @@ -1061,8 +1091,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned > int align, > { > /* NB: Round up L and ALIGN to the widest integer mode for > MAX_SIZE. */ > - mode = widest_fixed_size_mode_for_size (max_size, > - op == SET_BY_PIECES); > + mode = widest_fixed_size_mode_for_size (max_size, op); > if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) > { > unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode)); > @@ -1076,8 +1105,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned > int align, > > while (max_size > 1 && l > 0) > { > - mode = widest_fixed_size_mode_for_size (max_size, > - op == SET_BY_PIECES); > + mode = widest_fixed_size_mode_for_size (max_size, op); > enum insn_code icode; > > unsigned int modesize = GET_MODE_SIZE (mode); > @@ -1317,8 +1345,8 @@ class op_by_pieces_d > bool m_push; > /* True if targetm.overlap_op_by_pieces_p () returns true. */ > bool m_overlap_op_by_pieces; > - /* True if QI vector mode can be used. */ > - bool m_qi_vector_mode; > + /* The type of operation that we're performing. */ > + by_pieces_operation m_op; > > /* Virtual functions, overriden by derived classes for the specific > operation. */ > @@ -1331,7 +1359,7 @@ class op_by_pieces_d > public: > op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn, > void *, unsigned HOST_WIDE_INT, unsigned int, bool, > - bool = false); > + by_pieces_operation); > void run (); > }; > > @@ -1349,11 +1377,11 @@ op_by_pieces_d::op_by_pieces_d (unsigned int > max_pieces, rtx to, > void *from_cfn_data, > unsigned HOST_WIDE_INT len, > unsigned int align, bool push, > - bool qi_vector_mode) > + by_pieces_operation op) > : m_to (to, to_load, NULL, NULL), > m_from (from, from_load, from_cfn, from_cfn_data), > m_len (len), m_max_size (max_pieces + 1), > - m_push (push), m_qi_vector_mode (qi_vector_mode) > + m_push (push), m_op (op) > { > int toi = m_to.get_addr_inc (); > int fromi = m_from.get_addr_inc (); > @@ -1375,8 +1403,7 @@ op_by_pieces_d::op_by_pieces_d (unsigned int > max_pieces, rtx to, > { > /* Find the mode of the largest comparison. */ > fixed_size_mode mode > - = widest_fixed_size_mode_for_size (m_max_size, > - m_qi_vector_mode); > + = widest_fixed_size_mode_for_size (m_max_size, m_op); > > m_from.decide_autoinc (mode, m_reverse, len); > m_to.decide_autoinc (mode, m_reverse, len); > @@ -1401,7 +1428,7 @@ op_by_pieces_d::get_usable_mode (fixed_size_mode mode, > unsigned int len) > if (len >= size && prepare_mode (mode, m_align)) > break; > /* widest_fixed_size_mode_for_size checks SIZE > 1. */ > - mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); > + mode = widest_fixed_size_mode_for_size (size, m_op); > } > while (1); > return mode; > @@ -1414,7 +1441,7 @@ fixed_size_mode > op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) > { > /* Use QI vector only for > size of WORD. */ > - if (m_qi_vector_mode && size > UNITS_PER_WORD) > + if (can_use_qi_vectors (m_op) && size > UNITS_PER_WORD) > { > machine_mode mode; > fixed_size_mode candidate; > @@ -1427,8 +1454,7 @@ op_by_pieces_d::smallest_fixed_size_mode_for_size > (unsigned int size) > break; > > if (GET_MODE_SIZE (candidate) >= size > - && (optab_handler (vec_duplicate_optab, candidate) > - != CODE_FOR_nothing)) > + && qi_vector_mode_supported_p (candidate, m_op)) > return candidate; > } > } > @@ -1451,7 +1477,7 @@ op_by_pieces_d::run () > > /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */ > fixed_size_mode mode > - = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode); > + = widest_fixed_size_mode_for_size (m_max_size, m_op); > mode = get_usable_mode (mode, length); > > by_pieces_prev to_prev = { nullptr, mode }; > @@ -1516,8 +1542,7 @@ op_by_pieces_d::run () > else > { > /* widest_fixed_size_mode_for_size checks SIZE > 1. */ > - mode = widest_fixed_size_mode_for_size (size, > - m_qi_vector_mode); > + mode = widest_fixed_size_mode_for_size (size, m_op); > mode = get_usable_mode (mode, length); > } > } > @@ -1543,7 +1568,7 @@ class move_by_pieces_d : public op_by_pieces_d > move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, > unsigned int align) > : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL, > - NULL, len, align, PUSHG_P (to)) > + NULL, len, align, PUSHG_P (to), MOVE_BY_PIECES) > { > } > rtx finish_retmode (memop_ret); > @@ -1632,15 +1657,16 @@ move_by_pieces (rtx to, rtx from, unsigned > HOST_WIDE_INT len, > class store_by_pieces_d : public op_by_pieces_d > { > insn_gen_fn m_gen_fun; > + > void generate (rtx, rtx, machine_mode) final override; > bool prepare_mode (machine_mode, unsigned int) final override; > > public: > store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, > unsigned HOST_WIDE_INT len, unsigned int align, > - bool qi_vector_mode) > + by_pieces_operation op) > : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn, > - cfn_data, len, align, false, qi_vector_mode) > + cfn_data, len, align, false, op) > { > } > rtx finish_retmode (memop_ret); > @@ -1729,8 +1755,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, > max_size = STORE_MAX_PIECES + 1; > while (max_size > 1 && l > 0) > { > - fixed_size_mode mode > - = widest_fixed_size_mode_for_size (max_size, memsetp); > + auto op = memsetp ? SET_BY_PIECES : STORE_BY_PIECES; > + auto mode = widest_fixed_size_mode_for_size (max_size, op); > > icode = optab_handler (mov_optab, mode); > if (icode != CODE_FOR_nothing > @@ -1793,7 +1819,7 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, > optimize_insn_for_speed_p ())); > > store_by_pieces_d data (to, constfun, constfundata, len, align, > - memsetp); > + memsetp ? SET_BY_PIECES : STORE_BY_PIECES); > data.run (); > > if (retmode != RETURN_BEGIN) > @@ -1814,7 +1840,7 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, > unsigned int align) > /* Use builtin_memset_read_str to support vector mode broadcast. */ > char c = 0; > store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align, > - true); > + CLEAR_BY_PIECES); > data.run (); > } > > @@ -1832,12 +1858,13 @@ class compare_by_pieces_d : public op_by_pieces_d > void generate (rtx, rtx, machine_mode) final override; > bool prepare_mode (machine_mode, unsigned int) final override; > void finish_mode (machine_mode) final override; > + > public: > compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, > void *op1_cfn_data, HOST_WIDE_INT len, int align, > rtx_code_label *fail_label) > : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn, > - op1_cfn_data, len, align, false) > + op1_cfn_data, len, align, false, COMPARE_BY_PIECES) > { > m_fail_label = fail_label; > }