On Thu, Jun 27, 2024 at 10:30 AM liuhongt <hongtao....@intel.com> wrote: > > gcc/ChangeLog
In PR115659 Kewen notes that ISEL (and possibly folding) could do a better job with these. In addition to the mentioned issues we can also try whether the target can handle an alternate mask mode. So instead of gating with /* Try to fold x CMP y ? -1 : 0 to x CMP y. */ if (can_compute_op0 && integer_minus_onep (op1) && integer_zerop (op2) && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0))) when TYPE_MODE (TREE_TYPE (lhs)) != TYPE_MODE (TREE_TYPE (op0)) see if when we do build_truth_vector_type_for_mode (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)), TYPE_MODE (TREE_TYPE (op0)); and use that for the LHS type whether we can handle the resulting compare (can_compute_op0 with this mode) and rewrite it accordingly to make the transform. Richard. > PR target/115517 > * config/i386/sse.md > (*<avx512>_cvtmask2<ssemodesuffix><mode>_not): New pre_reload > splitter. > (*<avx512>_cvtmask2<ssemodesuffix><mode>_not): Ditto. > (*avx2_pcmp<mode>3_6): Ditto. > (*avx2_pcmp<mode>3_7): Ditto. > --- > gcc/config/i386/sse.md | 97 ++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 97 insertions(+) > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 1148ac84f3d..822159a869b 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -9986,6 +9986,24 @@ (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>" > [(set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > > +(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not" > + [(set (match_operand:VI12_AVX512VL 0 "register_operand") > + (vec_merge:VI12_AVX512VL > + (match_operand:VI12_AVX512VL 2 "const0_operand") > + (match_operand:VI12_AVX512VL 3 "vector_all_ones_operand") > + (match_operand:<avx512fmaskmode> 1 "register_operand")))] > + "TARGET_AVX512BW && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(set (match_dup 4) > + (not:<avx512fmaskmode> (match_dup 1))) > + (set (match_dup 0) > + (vec_merge:VI12_AVX512VL > + (match_dup 3) > + (match_dup 2) > + (match_dup 4)))] > + "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);") > + > (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>" > [(set (match_operand:VI48_AVX512VL 0 "register_operand") > (vec_merge:VI48_AVX512VL > @@ -10024,6 +10042,24 @@ (define_insn_and_split > "*<avx512>_cvtmask2<ssemodesuffix><mode>" > (set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > > +(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not" > + [(set (match_operand:VI48_AVX512VL 0 "register_operand") > + (vec_merge:VI48_AVX512VL > + (match_operand:VI48_AVX512VL 2 "const0_operand") > + (match_operand:VI48_AVX512VL 3 "vector_all_ones_operand") > + (match_operand:<avx512fmaskmode> 1 "register_operand")))] > + "TARGET_AVX512F && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(set (match_dup 4) > + (not:<avx512fmaskmode> (match_dup 1))) > + (set (match_dup 0) > + (vec_merge:VI48_AVX512VL > + (match_dup 3) > + (match_dup 2) > + (match_dup 4)))] > + "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);") > + > (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>_pternlog_false_dep" > [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") > (vec_merge:VI48_AVX512VL > @@ -17675,6 +17711,67 @@ (define_insn_and_split "*avx2_pcmp<mode>3_5" > std::swap (operands[1], operands[2]); > }) > > +(define_int_attr pcmp_usmin > + [(UNSPEC_PCMP "smin") (UNSPEC_UNSIGNED_PCMP "umin")]) > + > +(define_insn_and_split "*avx2_pcmp<mode>3_6" > + [(set (match_operand:VI_128_256 0 "register_operand") > + (vec_merge:VI_128_256 > + (match_operand:VI_128_256 1 "vector_all_ones_operand") > + (match_operand:VI_128_256 2 "const0_operand") > + (unspec:<avx512fmaskmode> > + [(match_operand:VI_128_256 3 "nonimmediate_operand") > + (match_operand:VI_128_256 4 "nonimmediate_operand") > + (match_operand:SI 5 "const_0_to_7_operand")] > + UNSPEC_PCMP_ITER)))] > + "TARGET_AVX512VL && ix86_pre_reload_split () > + && (INTVAL (operands[5]) == 2 || INTVAL (operands[5]) == 5)" > + "#" > + "&& 1" > + [(const_int 0)] > +{ > + rtx dst_min = gen_reg_rtx (<MODE>mode); > + > + if (MEM_P (operands[3]) && MEM_P (operands[4])) > + operands[3] = force_reg (<MODE>mode, operands[3]); > + emit_insn (gen_<pcmp_usmin><mode>3 (dst_min, operands[3], operands[4])); > + rtx eq_op = INTVAL (operands[5]) == 2 ? operands[3] : operands[4]; > + emit_move_insn (operands[0], gen_rtx_EQ (<MODE>mode, eq_op, dst_min)); > + DONE; > +}) > + > +(define_insn_and_split "*avx2_pcmp<mode>3_7" > + [(set (match_operand:VI_128_256 0 "register_operand") > + (vec_merge:VI_128_256 > + (match_operand:VI_128_256 1 "const0_operand") > + (match_operand:VI_128_256 2 "vector_all_ones_operand") > + (unspec:<avx512fmaskmode> > + [(match_operand:VI_128_256 3 "nonimmediate_operand") > + (match_operand:VI_128_256 4 "nonimmediate_operand") > + (match_operand:SI 5 "const_0_to_7_operand")] > + UNSPEC_PCMP_ITER)))] > + "TARGET_AVX512VL && ix86_pre_reload_split () > + /* NE is commutative. */ > + && (INTVAL (operands[5]) == 4 > + /* LE, 3 must be register. */ > + || INTVAL (operands[5]) == 2 > + /* NLT aka GE, 4 must be register and we swap operands. */ > + || INTVAL (operands[5]) == 5)" > + "#" > + "&& 1" > + [(const_int 0)] > +{ > + if (INTVAL (operands[5]) == 5) > + std::swap (operands[3], operands[4]); > + > + if (MEM_P (operands[3])) > + operands[3] = force_reg (<MODE>mode, operands[3]); > + enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ; > + emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode, > + operands[3], operands[4])); > + DONE; > +}) > + > (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>" > [(set (match_operand:<avx512fmaskmode> 0 "register_operand") > (unspec:<avx512fmaskmode> > -- > 2.31.1 >