Hi Haochen,

on 2023/11/28 15:43, HAO CHEN GUI wrote:
> Hi,
>   This patch passes down the equality only flags from
> emit_block_cmp_hints to cmpmem optab so that the target specific expand
> can generate optimized insns for equality only compare. Targets
> (e.g. rs6000) can generate more efficient insn sequence if the block
> compare is equality only.

Add more CCs since this patch changes generic part of code.

> 
>   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with
> no regressions. Is this OK for trunk?
> 
> Thanks
> Gui Haochen
> 
> ChangeLog
> Expand: Pass down equality only flag to cmpmem expand
> 
> Targets (e.g. rs6000) can generate more efficient insn sequence if the
> block compare is equality only.  This patch passes down the equality
> only flags from emit_block_cmp_hints to cmpmem optab so that the target
> specific expand can generate optimized insns for equality only compare.
> 
> gcc/
>       * expr.cc (expand_cmpstrn_or_cmpmem): Rename to...

Maybe we can still keep this function expand_cmpstrn_or_cmpmem but with
an additional argument like (int equality_only = -1, default as -1 means
the underlying optab expansion doesn't support equality_only, 1 and 0
stands for what you proposed), to avoid to duplicate expand_cmpstrn_or_cmpmem.

>       (expand_cmpstrn): ...this.
>       (expand_cmpmem): New function.  Pass down equality only flag to
>       cmpmem expand.
>       (emit_block_cmp_via_cmpmem): Add an argument for equality only
>       flag and call expand_cmpmem instead of expand_cmpstrn_or_cmpmem.
>       (emit_block_cmp_hints): Call emit_block_cmp_via_cmpmem with
>       equality only flag.
>       * expr.h (expand_cmpstrn, expand_cmpmem): Declare.
>       * builtins.cc (expand_builtin_strcmp, expand_builtin_strncmp):
>       Call expand_cmpstrn instead of expand_cmpstrn_or_cmpmem.
>       * config/i386/i386.md (cmpmemsi): Add the sixth operand for
>       equality only flag.
>       * config/rs6000/rs6000.md (cmpmemsi): Likewise.
>       * config/s390/s390.md (cmpmemsi): Likewise.
>       * doc/md.texi (cmpmem): Modify the document and add an operand
>       for equality only flag.
> 
> patch.diff
> diff --git a/gcc/builtins.cc b/gcc/builtins.cc
> index 5ece0d23eb9..c2dbc25433d 100644
> --- a/gcc/builtins.cc
> +++ b/gcc/builtins.cc
> @@ -4819,7 +4819,7 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx 
> target)
>        if (len && !TREE_SIDE_EFFECTS (len))
>       {
>         arg3_rtx = expand_normal (len);
> -       result = expand_cmpstrn_or_cmpmem
> +       result = expand_cmpstrn
>           (cmpstrn_icode, target, arg1_rtx, arg2_rtx, TREE_TYPE (len),
>            arg3_rtx, MIN (arg1_align, arg2_align));
>       }
> @@ -4929,9 +4929,9 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx 
> target,
>    rtx arg1_rtx = get_memory_rtx (arg1, len);
>    rtx arg2_rtx = get_memory_rtx (arg2, len);
>    rtx arg3_rtx = expand_normal (len);
> -  result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx,
> -                                  arg2_rtx, TREE_TYPE (len), arg3_rtx,
> -                                  MIN (arg1_align, arg2_align));
> +  result = expand_cmpstrn (cmpstrn_icode, target, arg1_rtx, arg2_rtx,
> +                        TREE_TYPE (len), arg3_rtx,
> +                        MIN (arg1_align, arg2_align));
> 
>    tree fndecl = get_callee_fndecl (exp);
>    if (result)
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 1b5a794b9e5..775cba5d93d 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -23195,7 +23195,8 @@ (define_expand "cmpmemsi"
>          (compare:SI (match_operand:BLK 1 "memory_operand" "")
>                      (match_operand:BLK 2 "memory_operand" "") ) )
>     (use (match_operand 3 "general_operand"))
> -   (use (match_operand 4 "immediate_operand"))]
> +   (use (match_operand 4 "immediate_operand"))
> +   (use (match_operand 5 ""))]

As the other operands are with predicates, maybe i386 folks want to
have "const_0_to_1_operand".

BR,
Kewen

>    ""
>  {
>    if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 2a1b5ecfaee..e66330f876e 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -10097,7 +10097,8 @@ (define_expand "cmpmemsi"
>                 (compare:SI (match_operand:BLK 1)
>                             (match_operand:BLK 2)))
>             (use (match_operand:SI 3))
> -           (use (match_operand:SI 4))])]
> +           (use (match_operand:SI 4))
> +           (use (match_operand:SI 5))])]
>    "TARGET_POPCNTD"
>  {
>    if (expand_block_compare (operands))
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 4bdb679daf2..506e79fb035 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -3790,7 +3790,8 @@ (define_expand "cmpmemsi"
>          (compare:SI (match_operand:BLK 1 "memory_operand" "")
>                      (match_operand:BLK 2 "memory_operand" "") ) )
>     (use (match_operand:SI 3 "general_operand" ""))
> -   (use (match_operand:SI 4 "" ""))]
> +   (use (match_operand:SI 4 "" ""))
> +   (use (match_operand:SI 5 "" ""))]
>    ""
>  {
>    if (s390_expand_cmpmem (operands[0], operands[1],
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index e01cdcbe22c..06955cd7e78 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6992,14 +6992,19 @@ result of the comparison.
> 
>  @cindex @code{cmpmem@var{m}} instruction pattern
>  @item @samp{cmpmem@var{m}}
> -Block compare instruction, with five operands like the operands
> -of @samp{cmpstr@var{m}}.  The two memory blocks specified are compared
> -byte by byte in lexicographic order starting at the beginning of each
> -block.  Unlike @samp{cmpstr@var{m}} the instruction can prefetch
> -any bytes in the two memory blocks.  Also unlike @samp{cmpstr@var{m}}
> -the comparison will not stop if both bytes are zero.  The effect of
> -the instruction is to store a value in operand 0 whose sign indicates
> -the result of the comparison.
> +Block compare instruction, with six operands.  The first five operands are
> +like the operands of @samp{cmpstr@var{m}}.  The last operand indicates
> +whether the comparison is equality or not.  Value one means it's an
> +equality only compare and zero means it's a non-equality compare.
> +
> +The two memory blocks specified are compared byte by byte in lexicographic
> +order starting at the beginning of each block.  Unlike @samp{cmpstr@var{m}}
> +the instruction can prefetch any bytes in the two memory blocks.  Also
> +unlike @samp{cmpstr@var{m}} the comparison will not stop if both bytes are
> +zero.  When last operand is zero, the effect of the instruction is to store
> +a value in operand 0 whose sign indicates the result of the comparison.
> +When last operand is one, zero in operand 0 indicates two blocks are equal.
> +All other values in operand 0 indicate two blocks are not equal.
> 
>  @cindex @code{strlen@var{m}} instruction pattern
>  @item @samp{strlen@var{m}}
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index 6dd9b8f2ce6..3cdc5181bd3 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -2381,14 +2381,13 @@ emit_block_op_via_libcall (enum built_in_function 
> fncode, rtx dst, rtx src,
>    return expand_call (call_expr, NULL_RTX, false);
>  }
> 
> -/* Try to expand cmpstrn or cmpmem operation ICODE with the given operands.
> +/* Try to expand cmpstrn operation ICODE with the given operands.
>     ARG3_TYPE is the type of ARG3_RTX.  Return the result rtx on success,
>     otherwise return null.  */
> 
>  rtx
> -expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
> -                       rtx arg2_rtx, tree arg3_type, rtx arg3_rtx,
> -                       HOST_WIDE_INT align)
> +expand_cmpstrn (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
> +             tree arg3_type, rtx arg3_rtx, HOST_WIDE_INT align)
>  {
>    machine_mode insn_mode = insn_data[icode].operand[0].mode;
> 
> @@ -2407,6 +2406,34 @@ expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, 
> rtx arg1_rtx,
>    return NULL_RTX;
>  }
> 
> +/* Similar as expand_cmpstrn, the last operand indicates whether it is a
> +   equality comparison or not.  */
> +rtx
> +expand_cmpmem (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
> +            tree arg3_type, rtx arg3_rtx, HOST_WIDE_INT align,
> +            bool equality_only)
> +{
> +  machine_mode insn_mode = insn_data[icode].operand[0].mode;
> +
> +  if (target && (!REG_P (target) || HARD_REGISTER_P (target)))
> +    target = NULL_RTX;
> +
> +  class expand_operand ops[6];
> +  create_output_operand (&ops[0], target, insn_mode);
> +  create_fixed_operand (&ops[1], arg1_rtx);
> +  create_fixed_operand (&ops[2], arg2_rtx);
> +  create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type),
> +                            TYPE_UNSIGNED (arg3_type));
> +  create_integer_operand (&ops[4], align);
> +  if (equality_only)
> +    create_integer_operand (&ops[5], 1);
> +  else
> +    create_integer_operand (&ops[5], 0);
> +  if (maybe_expand_insn (icode, 6, ops))
> +    return ops[0].value;
> +  return NULL_RTX;
> +}
> +
>  /* Expand a block compare between X and Y with length LEN using the
>     cmpmem optab, placing the result in TARGET.  LEN_TYPE is the type
>     of the expression that was used to calculate the length.  ALIGN
> @@ -2414,7 +2441,7 @@ expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, 
> rtx arg1_rtx,
> 
>  static rtx
>  emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target,
> -                        unsigned align)
> +                        unsigned align, bool equality_only)
>  {
>    /* Note: The cmpstrnsi pattern, if it exists, is not suitable for
>       implementing memcmp because it will stop if it encounters two
> @@ -2424,7 +2451,8 @@ emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree 
> len_type, rtx target,
>    if (icode == CODE_FOR_nothing)
>      return NULL_RTX;
> 
> -  return expand_cmpstrn_or_cmpmem (icode, target, x, y, len_type, len, 
> align);
> +  return expand_cmpmem (icode, target, x, y, len_type, len, align,
> +                     equality_only);
>  }
> 
>  /* Emit code to compare a block Y to a block X.  This may be done with
> @@ -2469,7 +2497,8 @@ emit_block_cmp_hints (rtx x, rtx y, rtx len, tree 
> len_type, rtx target,
>      result = compare_by_pieces (x, y, INTVAL (len), target, align,
>                               y_cfn, y_cfndata);
>    else
> -    result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align);
> +    result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align,
> +                                     equality_only);
> 
>    return result;
>  }
> diff --git a/gcc/expr.h b/gcc/expr.h
> index 2a172867fdb..64dbbcfcaad 100644
> --- a/gcc/expr.h
> +++ b/gcc/expr.h
> @@ -199,8 +199,9 @@ extern void use_regs (rtx *, int, int);
>  extern void use_group_regs (rtx *, rtx);
> 
>  #ifdef GCC_INSN_CODES_H
> -extern rtx expand_cmpstrn_or_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx,
> -                                  HOST_WIDE_INT);
> +extern rtx expand_cmpstrn (insn_code, rtx, rtx, rtx, tree, rtx, 
> HOST_WIDE_INT);
> +extern rtx expand_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx, HOST_WIDE_INT,
> +                       bool);
>  #endif
> 
>  /* Write zeros through the storage of OBJECT.

Reply via email to