On Mon, Jul 1, 2024 at 8:17 AM Kewen.Lin <li...@linux.ibm.com> wrote:
>
> Hi,
>
> As PR115659 shows, assuming c = x CMP y, there are some
> folding chances for patterns r = c ? 0/z : z/-1:
>   - For r = c ? 0 : z, it can be folded into r = ~c & z.
>   - For r = c ? z : -1, it can be folded into r = ~c | z.
>
> But BIT_AND/BIT_IOR applied on one BIT_NOT operand is a
> compound operation, I'm not sure if each target with
> vector capability have a single vector instruction for it,
> if no, it's arguable to consider it always beats vector
> selection (like vector constant gets hoisted or combined
> and selection has same latency as normal logical operation).
> So IMHO we probably need to query target with new optabs.
> So this patch is to introduce new optabs andc, iorc and its
> corresponding internal functions BIT_{ANDC,IORC} (looking
> for suggestion for naming optabs and ifns), and if targets
> defines such optabs for vector modes, it means targets
> support these hardware insns and should be not worse than
> vector selection.  btw, the rs6000 changes are meant to
> give an example for a target supporting andc/iorc.
>
> Does this sound reasonable?

I think it's reasonable to have andc - there are quite some CPUs
that have this op on GPRs as well I think, called andn (but I don't
want to get into bike-shedding).  A corresponding iorc is then
a natural extension (likewise xorc).  AVX512 has a very powerful
vector ternlog (but no scalar andn).

I was surprised to not see an existing optab for andn.

So OK from my side in case there are no negative comments or
bikeshedding on the name.  I can't approve the rs6000 changes
though.

Thanks,
Richard.

> BR,
> Kewen
> -----
>
>         PR tree-optimzation/115659
>
> gcc/ChangeLog:
>
>         * config/rs6000/rs6000-builtins.def: Update some bif expanders by
>         replacing orc<mode>3 with iorc<mode>3.
>         * config/rs6000/rs6000-string.cc (expand_cmp_vec_sequence): Update gen
>         function by replacing orc<mode>3 with iorc<mode>3.
>         * config/rs6000/rs6000.md (orc<mode>3): Rename to ...
>         (iorc<mode>3): ... this.
>         * doc/md.texi: Document andcm3 and iorcm3.
>         * gimple-isel.cc (gimple_expand_vec_cond_expr): Add more foldings for
>         patterns x CMP y ? 0 : z and x CMP y ? z : -1.
>         * internal-fn.def (BIT_ANDC): New internal function.
>         (BIT_IORC): Likewise.
>         * optabs.def (andc, iorc): New optab.
> ---
>  gcc/config/rs6000/rs6000-builtins.def | 24 ++++++++++++------------
>  gcc/config/rs6000/rs6000-string.cc    |  2 +-
>  gcc/config/rs6000/rs6000.md           |  2 +-
>  gcc/doc/md.texi                       | 10 ++++++++++
>  gcc/gimple-isel.cc                    | 24 ++++++++++++++++++++++++
>  gcc/internal-fn.def                   |  4 ++++
>  gcc/optabs.def                        |  2 ++
>  7 files changed, 54 insertions(+), 14 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index 3bc7fed6956..736890fe6cb 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2147,40 +2147,40 @@
>      NEG_V2DI negv2di2 {}
>
>    const vsc __builtin_altivec_orc_v16qi (vsc, vsc);
> -    ORC_V16QI orcv16qi3 {}
> +    ORC_V16QI iorcv16qi3 {}
>
>    const vuc __builtin_altivec_orc_v16qi_uns (vuc, vuc);
> -    ORC_V16QI_UNS orcv16qi3 {}
> +    ORC_V16QI_UNS iorcv16qi3 {}
>
>    const vsq __builtin_altivec_orc_v1ti (vsq, vsq);
> -    ORC_V1TI orcv1ti3 {}
> +    ORC_V1TI iorcv1ti3 {}
>
>    const vuq __builtin_altivec_orc_v1ti_uns (vuq, vuq);
> -    ORC_V1TI_UNS orcv1ti3 {}
> +    ORC_V1TI_UNS iorcv1ti3 {}
>
>    const vd __builtin_altivec_orc_v2df (vd, vd);
> -    ORC_V2DF orcv2df3 {}
> +    ORC_V2DF iorcv2df3 {}
>
>    const vsll __builtin_altivec_orc_v2di (vsll, vsll);
> -    ORC_V2DI orcv2di3 {}
> +    ORC_V2DI iorcv2di3 {}
>
>    const vull __builtin_altivec_orc_v2di_uns (vull, vull);
> -    ORC_V2DI_UNS orcv2di3 {}
> +    ORC_V2DI_UNS iorcv2di3 {}
>
>    const vf __builtin_altivec_orc_v4sf (vf, vf);
> -    ORC_V4SF orcv4sf3 {}
> +    ORC_V4SF iorcv4sf3 {}
>
>    const vsi __builtin_altivec_orc_v4si (vsi, vsi);
> -    ORC_V4SI orcv4si3 {}
> +    ORC_V4SI iorcv4si3 {}
>
>    const vui __builtin_altivec_orc_v4si_uns (vui, vui);
> -    ORC_V4SI_UNS orcv4si3 {}
> +    ORC_V4SI_UNS iorcv4si3 {}
>
>    const vss __builtin_altivec_orc_v8hi (vss, vss);
> -    ORC_V8HI orcv8hi3 {}
> +    ORC_V8HI iorcv8hi3 {}
>
>    const vus __builtin_altivec_orc_v8hi_uns (vus, vus);
> -    ORC_V8HI_UNS orcv8hi3 {}
> +    ORC_V8HI_UNS iorcv8hi3 {}
>
>    const vsc __builtin_altivec_vclzb (vsc);
>      VCLZB clzv16qi2 {}
> diff --git a/gcc/config/rs6000/rs6000-string.cc 
> b/gcc/config/rs6000/rs6000-string.cc
> index 917f5572a6d..c4c62e8e2f9 100644
> --- a/gcc/config/rs6000/rs6000-string.cc
> +++ b/gcc/config/rs6000/rs6000-string.cc
> @@ -743,7 +743,7 @@ expand_cmp_vec_sequence (unsigned HOST_WIDE_INT 
> bytes_to_compare,
>               rtx cmp_combined = gen_reg_rtx (load_mode);
>               emit_insn (gen_altivec_eqv16qi (cmp_res, s1data, s2data));
>               emit_insn (gen_altivec_eqv16qi (cmp_zero, s1data, zero_reg));
> -             emit_insn (gen_orcv16qi3 (vec_result, cmp_zero, cmp_res));
> +             emit_insn (gen_iorcv16qi3 (vec_result, cmp_zero, cmp_res));
>               emit_insn (gen_altivec_vcmpequb_p (cmp_combined, vec_result, 
> zero_reg));
>             }
>         }
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index a5d20594789..276a5c9cf2d 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -7324,7 +7324,7 @@ (define_expand "nand<mode>3"
>
>  ;; The canonical form is to have the negated element first, so we need to
>  ;; reverse arguments.
> -(define_expand "orc<mode>3"
> +(define_expand "iorc<mode>3"
>    [(set (match_operand:BOOL_128 0 "vlogical_operand")
>         (ior:BOOL_128
>          (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"))
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 5730bda80dc..fb448c946cd 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5543,6 +5543,16 @@ means of constraints requiring operands 1 and 0 to be 
> the same location.
>  @itemx @samp{and@var{m}3}, @samp{ior@var{m}3}, @samp{xor@var{m}3}
>  Similar, for other arithmetic operations.
>
> +@cindex @code{andc@var{m}3} instruction pattern
> +@item @samp{andc@var{m}3}
> +Like @code{and@var{m}3}, but it uses bitwise-complement of operand 2
> +rather than operand 2 itself.
> +
> +@cindex @code{iorc@var{m}3} instruction pattern
> +@item @samp{iorc@var{m}3}
> +Like @code{ior@var{m}3}, but it uses bitwise-complement of operand 2
> +rather than operand 2 itself.
> +
>  @cindex @code{addv@var{m}4} instruction pattern
>  @item @samp{addv@var{m}4}
>  Like @code{add@var{m}3} but takes a @code{code_label} as operand 3 and
> diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> index 71af1a8cd97..fd27bac41e2 100644
> --- a/gcc/gimple-isel.cc
> +++ b/gcc/gimple-isel.cc
> @@ -284,6 +284,30 @@ gimple_expand_vec_cond_expr (struct function *fun, 
> gimple_stmt_iterator *gsi,
>                   /* r = c ? z : c.  */
>                   op2 = new_op0;
>                 }
> +             bool op1_zerop = integer_zerop (op1);
> +             bool op2_minus_onep = integer_minus_onep (op2);
> +             if (op1_zerop
> +                 && (direct_internal_fn_supported_p (IFN_BIT_ANDC, vtype,
> +                                                     OPTIMIZE_FOR_BOTH)))
> +               {
> +                 tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
> +                 tree new_op0 = make_ssa_name (vtype);
> +                 gassign *new_stmt = gimple_build_assign (new_op0, conv_op);
> +                 gsi_insert_seq_before (gsi, new_stmt, GSI_SAME_STMT);
> +                 return gimple_build_call_internal (IFN_BIT_ANDC, 2, op2,
> +                                                    new_op0);
> +               }
> +             else if (op2_minus_onep
> +                      && (direct_internal_fn_supported_p (IFN_BIT_IORC, 
> vtype,
> +                                                          
> OPTIMIZE_FOR_BOTH)))
> +               {
> +                 tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
> +                 tree new_op0 = make_ssa_name (vtype);
> +                 gassign *new_stmt = gimple_build_assign (new_op0, conv_op);
> +                 gsi_insert_seq_before (gsi, new_stmt, GSI_SAME_STMT);
> +                 return gimple_build_call_internal (IFN_BIT_IORC, 2, op1,
> +                                                    new_op0);
> +               }
>             }
>
>           /* When the compare has EH we do not want to forward it when
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index a8c83437ada..994bbd9b4dd 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -593,6 +593,10 @@ DEF_INTERNAL_FN (DIVMODBITINT, ECF_LEAF, ". O . O . R . 
> R . ")
>  DEF_INTERNAL_FN (FLOATTOBITINT, ECF_LEAF | ECF_NOTHROW, ". O . . ")
>  DEF_INTERNAL_FN (BITINTTOFLOAT, ECF_PURE | ECF_LEAF, ". R . ")
>
> +/* Bitwise functions.  */
> +DEF_INTERNAL_OPTAB_FN (BIT_ANDC, ECF_CONST, andc, binary)
> +DEF_INTERNAL_OPTAB_FN (BIT_IORC, ECF_CONST, iorc, binary)
> +
>  #undef DEF_INTERNAL_WIDENING_OPTAB_FN
>  #undef DEF_INTERNAL_SIGNED_COND_FN
>  #undef DEF_INTERNAL_COND_FN
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index bc2611abdc2..bcf7cc0fa58 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -540,3 +540,5 @@ OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")
>  OPTAB_D (len_load_optab, "len_load_$a")
>  OPTAB_D (len_store_optab, "len_store_$a")
>  OPTAB_D (select_vl_optab, "select_vl$a")
> +OPTAB_D (andc_optab, "andc$a3")
> +OPTAB_D (iorc_optab, "iorc$a3")
> --
> 2.43.0

Reply via email to