Hi,

As PR115659 shows, assuming c = x CMP y, there are some
folding chances for patterns r = c ? 0/z : z/-1:
  - For r = c ? 0 : z, it can be folded into r = ~c & z.
  - For r = c ? z : -1, it can be folded into r = ~c | z.

But BIT_AND/BIT_IOR applied on one BIT_NOT operand is a
compound operation, I'm not sure if each target with
vector capability have a single vector instruction for it,
if no, it's arguable to consider it always beats vector
selection (like vector constant gets hoisted or combined
and selection has same latency as normal logical operation).
So IMHO we probably need to query target with new optabs.
So this patch is to introduce new optabs andc, iorc and its
corresponding internal functions BIT_{ANDC,IORC} (looking
for suggestion for naming optabs and ifns), and if targets
defines such optabs for vector modes, it means targets
support these hardware insns and should be not worse than
vector selection.  btw, the rs6000 changes are meant to
give an example for a target supporting andc/iorc.

Does this sound reasonable?

BR,
Kewen
-----

        PR tree-optimzation/115659

gcc/ChangeLog:

        * config/rs6000/rs6000-builtins.def: Update some bif expanders by
        replacing orc<mode>3 with iorc<mode>3.
        * config/rs6000/rs6000-string.cc (expand_cmp_vec_sequence): Update gen
        function by replacing orc<mode>3 with iorc<mode>3.
        * config/rs6000/rs6000.md (orc<mode>3): Rename to ...
        (iorc<mode>3): ... this.
        * doc/md.texi: Document andcm3 and iorcm3.
        * gimple-isel.cc (gimple_expand_vec_cond_expr): Add more foldings for
        patterns x CMP y ? 0 : z and x CMP y ? z : -1.
        * internal-fn.def (BIT_ANDC): New internal function.
        (BIT_IORC): Likewise.
        * optabs.def (andc, iorc): New optab.
---
 gcc/config/rs6000/rs6000-builtins.def | 24 ++++++++++++------------
 gcc/config/rs6000/rs6000-string.cc    |  2 +-
 gcc/config/rs6000/rs6000.md           |  2 +-
 gcc/doc/md.texi                       | 10 ++++++++++
 gcc/gimple-isel.cc                    | 24 ++++++++++++++++++++++++
 gcc/internal-fn.def                   |  4 ++++
 gcc/optabs.def                        |  2 ++
 7 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 3bc7fed6956..736890fe6cb 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2147,40 +2147,40 @@
     NEG_V2DI negv2di2 {}

   const vsc __builtin_altivec_orc_v16qi (vsc, vsc);
-    ORC_V16QI orcv16qi3 {}
+    ORC_V16QI iorcv16qi3 {}

   const vuc __builtin_altivec_orc_v16qi_uns (vuc, vuc);
-    ORC_V16QI_UNS orcv16qi3 {}
+    ORC_V16QI_UNS iorcv16qi3 {}

   const vsq __builtin_altivec_orc_v1ti (vsq, vsq);
-    ORC_V1TI orcv1ti3 {}
+    ORC_V1TI iorcv1ti3 {}

   const vuq __builtin_altivec_orc_v1ti_uns (vuq, vuq);
-    ORC_V1TI_UNS orcv1ti3 {}
+    ORC_V1TI_UNS iorcv1ti3 {}

   const vd __builtin_altivec_orc_v2df (vd, vd);
-    ORC_V2DF orcv2df3 {}
+    ORC_V2DF iorcv2df3 {}

   const vsll __builtin_altivec_orc_v2di (vsll, vsll);
-    ORC_V2DI orcv2di3 {}
+    ORC_V2DI iorcv2di3 {}

   const vull __builtin_altivec_orc_v2di_uns (vull, vull);
-    ORC_V2DI_UNS orcv2di3 {}
+    ORC_V2DI_UNS iorcv2di3 {}

   const vf __builtin_altivec_orc_v4sf (vf, vf);
-    ORC_V4SF orcv4sf3 {}
+    ORC_V4SF iorcv4sf3 {}

   const vsi __builtin_altivec_orc_v4si (vsi, vsi);
-    ORC_V4SI orcv4si3 {}
+    ORC_V4SI iorcv4si3 {}

   const vui __builtin_altivec_orc_v4si_uns (vui, vui);
-    ORC_V4SI_UNS orcv4si3 {}
+    ORC_V4SI_UNS iorcv4si3 {}

   const vss __builtin_altivec_orc_v8hi (vss, vss);
-    ORC_V8HI orcv8hi3 {}
+    ORC_V8HI iorcv8hi3 {}

   const vus __builtin_altivec_orc_v8hi_uns (vus, vus);
-    ORC_V8HI_UNS orcv8hi3 {}
+    ORC_V8HI_UNS iorcv8hi3 {}

   const vsc __builtin_altivec_vclzb (vsc);
     VCLZB clzv16qi2 {}
diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 917f5572a6d..c4c62e8e2f9 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -743,7 +743,7 @@ expand_cmp_vec_sequence (unsigned HOST_WIDE_INT 
bytes_to_compare,
              rtx cmp_combined = gen_reg_rtx (load_mode);
              emit_insn (gen_altivec_eqv16qi (cmp_res, s1data, s2data));
              emit_insn (gen_altivec_eqv16qi (cmp_zero, s1data, zero_reg));
-             emit_insn (gen_orcv16qi3 (vec_result, cmp_zero, cmp_res));
+             emit_insn (gen_iorcv16qi3 (vec_result, cmp_zero, cmp_res));
              emit_insn (gen_altivec_vcmpequb_p (cmp_combined, vec_result, 
zero_reg));
            }
        }
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a5d20594789..276a5c9cf2d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7324,7 +7324,7 @@ (define_expand "nand<mode>3"

 ;; The canonical form is to have the negated element first, so we need to
 ;; reverse arguments.
-(define_expand "orc<mode>3"
+(define_expand "iorc<mode>3"
   [(set (match_operand:BOOL_128 0 "vlogical_operand")
        (ior:BOOL_128
         (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"))
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 5730bda80dc..fb448c946cd 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5543,6 +5543,16 @@ means of constraints requiring operands 1 and 0 to be 
the same location.
 @itemx @samp{and@var{m}3}, @samp{ior@var{m}3}, @samp{xor@var{m}3}
 Similar, for other arithmetic operations.

+@cindex @code{andc@var{m}3} instruction pattern
+@item @samp{andc@var{m}3}
+Like @code{and@var{m}3}, but it uses bitwise-complement of operand 2
+rather than operand 2 itself.
+
+@cindex @code{iorc@var{m}3} instruction pattern
+@item @samp{iorc@var{m}3}
+Like @code{ior@var{m}3}, but it uses bitwise-complement of operand 2
+rather than operand 2 itself.
+
 @cindex @code{addv@var{m}4} instruction pattern
 @item @samp{addv@var{m}4}
 Like @code{add@var{m}3} but takes a @code{code_label} as operand 3 and
diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
index 71af1a8cd97..fd27bac41e2 100644
--- a/gcc/gimple-isel.cc
+++ b/gcc/gimple-isel.cc
@@ -284,6 +284,30 @@ gimple_expand_vec_cond_expr (struct function *fun, 
gimple_stmt_iterator *gsi,
                  /* r = c ? z : c.  */
                  op2 = new_op0;
                }
+             bool op1_zerop = integer_zerop (op1);
+             bool op2_minus_onep = integer_minus_onep (op2);
+             if (op1_zerop
+                 && (direct_internal_fn_supported_p (IFN_BIT_ANDC, vtype,
+                                                     OPTIMIZE_FOR_BOTH)))
+               {
+                 tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
+                 tree new_op0 = make_ssa_name (vtype);
+                 gassign *new_stmt = gimple_build_assign (new_op0, conv_op);
+                 gsi_insert_seq_before (gsi, new_stmt, GSI_SAME_STMT);
+                 return gimple_build_call_internal (IFN_BIT_ANDC, 2, op2,
+                                                    new_op0);
+               }
+             else if (op2_minus_onep
+                      && (direct_internal_fn_supported_p (IFN_BIT_IORC, vtype,
+                                                          OPTIMIZE_FOR_BOTH)))
+               {
+                 tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
+                 tree new_op0 = make_ssa_name (vtype);
+                 gassign *new_stmt = gimple_build_assign (new_op0, conv_op);
+                 gsi_insert_seq_before (gsi, new_stmt, GSI_SAME_STMT);
+                 return gimple_build_call_internal (IFN_BIT_IORC, 2, op1,
+                                                    new_op0);
+               }
            }

          /* When the compare has EH we do not want to forward it when
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index a8c83437ada..994bbd9b4dd 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -593,6 +593,10 @@ DEF_INTERNAL_FN (DIVMODBITINT, ECF_LEAF, ". O . O . R . R 
. ")
 DEF_INTERNAL_FN (FLOATTOBITINT, ECF_LEAF | ECF_NOTHROW, ". O . . ")
 DEF_INTERNAL_FN (BITINTTOFLOAT, ECF_PURE | ECF_LEAF, ". R . ")

+/* Bitwise functions.  */
+DEF_INTERNAL_OPTAB_FN (BIT_ANDC, ECF_CONST, andc, binary)
+DEF_INTERNAL_OPTAB_FN (BIT_IORC, ECF_CONST, iorc, binary)
+
 #undef DEF_INTERNAL_WIDENING_OPTAB_FN
 #undef DEF_INTERNAL_SIGNED_COND_FN
 #undef DEF_INTERNAL_COND_FN
diff --git a/gcc/optabs.def b/gcc/optabs.def
index bc2611abdc2..bcf7cc0fa58 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -540,3 +540,5 @@ OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")
 OPTAB_D (len_load_optab, "len_load_$a")
 OPTAB_D (len_store_optab, "len_store_$a")
 OPTAB_D (select_vl_optab, "select_vl$a")
+OPTAB_D (andc_optab, "andc$a3")
+OPTAB_D (iorc_optab, "iorc$a3")
--
2.43.0

Reply via email to