Hi Haochen, on 2023/11/28 15:43, HAO CHEN GUI wrote: > Hi, > This patch passes down the equality only flags from > emit_block_cmp_hints to cmpmem optab so that the target specific expand > can generate optimized insns for equality only compare. Targets > (e.g. rs6000) can generate more efficient insn sequence if the block > compare is equality only.
Add more CCs since this patch changes generic part of code. > > Bootstrapped and tested on x86 and powerpc64-linux BE and LE with > no regressions. Is this OK for trunk? > > Thanks > Gui Haochen > > ChangeLog > Expand: Pass down equality only flag to cmpmem expand > > Targets (e.g. rs6000) can generate more efficient insn sequence if the > block compare is equality only. This patch passes down the equality > only flags from emit_block_cmp_hints to cmpmem optab so that the target > specific expand can generate optimized insns for equality only compare. > > gcc/ > * expr.cc (expand_cmpstrn_or_cmpmem): Rename to... Maybe we can still keep this function expand_cmpstrn_or_cmpmem but with an additional argument like (int equality_only = -1, default as -1 means the underlying optab expansion doesn't support equality_only, 1 and 0 stands for what you proposed), to avoid to duplicate expand_cmpstrn_or_cmpmem. > (expand_cmpstrn): ...this. > (expand_cmpmem): New function. Pass down equality only flag to > cmpmem expand. > (emit_block_cmp_via_cmpmem): Add an argument for equality only > flag and call expand_cmpmem instead of expand_cmpstrn_or_cmpmem. > (emit_block_cmp_hints): Call emit_block_cmp_via_cmpmem with > equality only flag. > * expr.h (expand_cmpstrn, expand_cmpmem): Declare. > * builtins.cc (expand_builtin_strcmp, expand_builtin_strncmp): > Call expand_cmpstrn instead of expand_cmpstrn_or_cmpmem. > * config/i386/i386.md (cmpmemsi): Add the sixth operand for > equality only flag. > * config/rs6000/rs6000.md (cmpmemsi): Likewise. > * config/s390/s390.md (cmpmemsi): Likewise. > * doc/md.texi (cmpmem): Modify the document and add an operand > for equality only flag. > > patch.diff > diff --git a/gcc/builtins.cc b/gcc/builtins.cc > index 5ece0d23eb9..c2dbc25433d 100644 > --- a/gcc/builtins.cc > +++ b/gcc/builtins.cc > @@ -4819,7 +4819,7 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx > target) > if (len && !TREE_SIDE_EFFECTS (len)) > { > arg3_rtx = expand_normal (len); > - result = expand_cmpstrn_or_cmpmem > + result = expand_cmpstrn > (cmpstrn_icode, target, arg1_rtx, arg2_rtx, TREE_TYPE (len), > arg3_rtx, MIN (arg1_align, arg2_align)); > } > @@ -4929,9 +4929,9 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx > target, > rtx arg1_rtx = get_memory_rtx (arg1, len); > rtx arg2_rtx = get_memory_rtx (arg2, len); > rtx arg3_rtx = expand_normal (len); > - result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx, > - arg2_rtx, TREE_TYPE (len), arg3_rtx, > - MIN (arg1_align, arg2_align)); > + result = expand_cmpstrn (cmpstrn_icode, target, arg1_rtx, arg2_rtx, > + TREE_TYPE (len), arg3_rtx, > + MIN (arg1_align, arg2_align)); > > tree fndecl = get_callee_fndecl (exp); > if (result) > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 1b5a794b9e5..775cba5d93d 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -23195,7 +23195,8 @@ (define_expand "cmpmemsi" > (compare:SI (match_operand:BLK 1 "memory_operand" "") > (match_operand:BLK 2 "memory_operand" "") ) ) > (use (match_operand 3 "general_operand")) > - (use (match_operand 4 "immediate_operand"))] > + (use (match_operand 4 "immediate_operand")) > + (use (match_operand 5 ""))] As the other operands are with predicates, maybe i386 folks want to have "const_0_to_1_operand". BR, Kewen > "" > { > if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1], > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 2a1b5ecfaee..e66330f876e 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -10097,7 +10097,8 @@ (define_expand "cmpmemsi" > (compare:SI (match_operand:BLK 1) > (match_operand:BLK 2))) > (use (match_operand:SI 3)) > - (use (match_operand:SI 4))])] > + (use (match_operand:SI 4)) > + (use (match_operand:SI 5))])] > "TARGET_POPCNTD" > { > if (expand_block_compare (operands)) > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md > index 4bdb679daf2..506e79fb035 100644 > --- a/gcc/config/s390/s390.md > +++ b/gcc/config/s390/s390.md > @@ -3790,7 +3790,8 @@ (define_expand "cmpmemsi" > (compare:SI (match_operand:BLK 1 "memory_operand" "") > (match_operand:BLK 2 "memory_operand" "") ) ) > (use (match_operand:SI 3 "general_operand" "")) > - (use (match_operand:SI 4 "" ""))] > + (use (match_operand:SI 4 "" "")) > + (use (match_operand:SI 5 "" ""))] > "" > { > if (s390_expand_cmpmem (operands[0], operands[1], > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi > index e01cdcbe22c..06955cd7e78 100644 > --- a/gcc/doc/md.texi > +++ b/gcc/doc/md.texi > @@ -6992,14 +6992,19 @@ result of the comparison. > > @cindex @code{cmpmem@var{m}} instruction pattern > @item @samp{cmpmem@var{m}} > -Block compare instruction, with five operands like the operands > -of @samp{cmpstr@var{m}}. The two memory blocks specified are compared > -byte by byte in lexicographic order starting at the beginning of each > -block. Unlike @samp{cmpstr@var{m}} the instruction can prefetch > -any bytes in the two memory blocks. Also unlike @samp{cmpstr@var{m}} > -the comparison will not stop if both bytes are zero. The effect of > -the instruction is to store a value in operand 0 whose sign indicates > -the result of the comparison. > +Block compare instruction, with six operands. The first five operands are > +like the operands of @samp{cmpstr@var{m}}. The last operand indicates > +whether the comparison is equality or not. Value one means it's an > +equality only compare and zero means it's a non-equality compare. > + > +The two memory blocks specified are compared byte by byte in lexicographic > +order starting at the beginning of each block. Unlike @samp{cmpstr@var{m}} > +the instruction can prefetch any bytes in the two memory blocks. Also > +unlike @samp{cmpstr@var{m}} the comparison will not stop if both bytes are > +zero. When last operand is zero, the effect of the instruction is to store > +a value in operand 0 whose sign indicates the result of the comparison. > +When last operand is one, zero in operand 0 indicates two blocks are equal. > +All other values in operand 0 indicate two blocks are not equal. > > @cindex @code{strlen@var{m}} instruction pattern > @item @samp{strlen@var{m}} > diff --git a/gcc/expr.cc b/gcc/expr.cc > index 6dd9b8f2ce6..3cdc5181bd3 100644 > --- a/gcc/expr.cc > +++ b/gcc/expr.cc > @@ -2381,14 +2381,13 @@ emit_block_op_via_libcall (enum built_in_function > fncode, rtx dst, rtx src, > return expand_call (call_expr, NULL_RTX, false); > } > > -/* Try to expand cmpstrn or cmpmem operation ICODE with the given operands. > +/* Try to expand cmpstrn operation ICODE with the given operands. > ARG3_TYPE is the type of ARG3_RTX. Return the result rtx on success, > otherwise return null. */ > > rtx > -expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx, > - rtx arg2_rtx, tree arg3_type, rtx arg3_rtx, > - HOST_WIDE_INT align) > +expand_cmpstrn (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx, > + tree arg3_type, rtx arg3_rtx, HOST_WIDE_INT align) > { > machine_mode insn_mode = insn_data[icode].operand[0].mode; > > @@ -2407,6 +2406,34 @@ expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, > rtx arg1_rtx, > return NULL_RTX; > } > > +/* Similar as expand_cmpstrn, the last operand indicates whether it is a > + equality comparison or not. */ > +rtx > +expand_cmpmem (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx, > + tree arg3_type, rtx arg3_rtx, HOST_WIDE_INT align, > + bool equality_only) > +{ > + machine_mode insn_mode = insn_data[icode].operand[0].mode; > + > + if (target && (!REG_P (target) || HARD_REGISTER_P (target))) > + target = NULL_RTX; > + > + class expand_operand ops[6]; > + create_output_operand (&ops[0], target, insn_mode); > + create_fixed_operand (&ops[1], arg1_rtx); > + create_fixed_operand (&ops[2], arg2_rtx); > + create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type), > + TYPE_UNSIGNED (arg3_type)); > + create_integer_operand (&ops[4], align); > + if (equality_only) > + create_integer_operand (&ops[5], 1); > + else > + create_integer_operand (&ops[5], 0); > + if (maybe_expand_insn (icode, 6, ops)) > + return ops[0].value; > + return NULL_RTX; > +} > + > /* Expand a block compare between X and Y with length LEN using the > cmpmem optab, placing the result in TARGET. LEN_TYPE is the type > of the expression that was used to calculate the length. ALIGN > @@ -2414,7 +2441,7 @@ expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, > rtx arg1_rtx, > > static rtx > emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target, > - unsigned align) > + unsigned align, bool equality_only) > { > /* Note: The cmpstrnsi pattern, if it exists, is not suitable for > implementing memcmp because it will stop if it encounters two > @@ -2424,7 +2451,8 @@ emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree > len_type, rtx target, > if (icode == CODE_FOR_nothing) > return NULL_RTX; > > - return expand_cmpstrn_or_cmpmem (icode, target, x, y, len_type, len, > align); > + return expand_cmpmem (icode, target, x, y, len_type, len, align, > + equality_only); > } > > /* Emit code to compare a block Y to a block X. This may be done with > @@ -2469,7 +2497,8 @@ emit_block_cmp_hints (rtx x, rtx y, rtx len, tree > len_type, rtx target, > result = compare_by_pieces (x, y, INTVAL (len), target, align, > y_cfn, y_cfndata); > else > - result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align); > + result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align, > + equality_only); > > return result; > } > diff --git a/gcc/expr.h b/gcc/expr.h > index 2a172867fdb..64dbbcfcaad 100644 > --- a/gcc/expr.h > +++ b/gcc/expr.h > @@ -199,8 +199,9 @@ extern void use_regs (rtx *, int, int); > extern void use_group_regs (rtx *, rtx); > > #ifdef GCC_INSN_CODES_H > -extern rtx expand_cmpstrn_or_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx, > - HOST_WIDE_INT); > +extern rtx expand_cmpstrn (insn_code, rtx, rtx, rtx, tree, rtx, > HOST_WIDE_INT); > +extern rtx expand_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx, HOST_WIDE_INT, > + bool); > #endif > > /* Write zeros through the storage of OBJECT.