Hi Segher, on 2019/11/11 下午8:51, Segher Boessenkool wrote: > Hi! > >> pattern 1: >> lt(a,b) = gt(b,a) >> le(a,b) = ge(b,a) > > This is done by swap_condition normally.
Nice! Done. > >> pattern 2: >> unge(a,b) = ~gt(b,a) >> unle(a,b) = ~gt(a,b) >> ne(a,b) = ~eq(a,b) >> ungt(a,b) = ~ge(b,a) >> unlt(a,b) = ~ge(a,b) > > This is reverse_condition_maybe_unordered (and a swap, in two cases). > Nice! Done. > >> pattern 4: >> uneq: ~gt(a,b) & ~gt(b,a) >> unordered: ~ge(a,b) & ~ge(b,a) > > That is 3, reversed. > Yes, merge them. > >> +; 1. For lt and le: >> +; lt(a,b) = gt(b,a) >> +; le(a,b) = ge(b,a) >> +(define_insn_and_split "vector_<code><mode>" >> [(set (match_operand:VEC_F 0 "vfloat_operand") >> + (vec_fp_cmp1:VEC_F (match_operand:VEC_F 1 "vfloat_operand") >> + (match_operand:VEC_F 2 "vfloat_operand")))] >> "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" >> "#" >> "" >> + [(set (match_dup 0) >> + (<vec_fp_cmp1_attr>:VEC_F (match_dup 2) >> + (match_dup 1)))] >> { >> }) > > Empty preparation statements (the {}) can just be left out completely. > OK, thanks! > The split condition "" is incorrect, it should be "&& 1": if it starts > with "&&", the insn condition is included. > Got it, thanks! > > So maybe it is simplest to *do* use match_operator here, handle all of > lt gt le ge eq unge unle ungt unlt ne in one define_expand, which then > swaps the condition and the args, and expand the extra not for the five > where that is needed? > I still used define_and_split to support the RTL pattern recognition, so I didn't use match_operator there, but I put le lt ne unge ungt unlt unle together in one define_and_split as you suggested. > >> +;; 3. ltgt and ordered. >> +(define_code_iterator vec_fp_cmp3 [ltgt ordered]) >> +(define_code_attr vec_fp_cmp3_attr [(ltgt "gt") >> + (ordered "ge")]) >> + >> +;; 4. uneq and unordered. >> +(define_code_iterator vec_fp_cmp4 [uneq unordered]) >> +(define_code_attr vec_fp_cmp4_attr [(uneq "gt") >> + (unordered "ge")]) > > And then another one for ltgt uneq ordered unordered perhaps? > Done. > So you'll need to define two new predicates then. Something like > vector_fp_comparison_operator and, erm, vector_fp_extra_comparison_operator, > which kind of sucks as a name, but there is only one ;-) Thanks! I used the names for code_iterator in define_and_split now. > > Sorry for sending you first one way, and then back the other. > It really doesn't matter! The updated patch is attached. Since we check those <CODE> and generate related in prepare of define_and_split, I had one idea to factor those two parts out and merged as one common split function (move to rs6000.c)? Does it sound better? BR, Kewen ------------------------------- gcc/ChangeLog 2019-11-12 Kewen Lin <li...@gcc.gnu.org> * config/rs6000/vector.md (vector_fp_comparison_operator): New code iterator. (vector_fp_extra_comparison_operator): Likewise. (vector_<code><mode> for VEC_F and vector_fp_comparison_operator): New define_and_split. (vector_<code><mode> for VEC_F and vector_fp_extra_comparison_operator): Likewise. (vector_lt<mode> for VEC_F): Refactor with vector_fp_comparison_operator. (vector_le<mode> for VEC_F): Likewise. (vector_unge<mode> for VEC_F): Likewise. (vector_unle<mode> for VEC_F): Likewise. (vector_ne<mode> for VEC_F): Likewise. (vector_ungt<mode> for VEC_F): Likewise. (vector_unlt<mode> for VEC_F): Likewise. (vector_ltgt<mode> for VEC_F): Refactor with vector_fp_extra_comparison_operator. (vector_ordered<mode> for VEC_F): Likewise. (vector_uneq<mode> for VEC_F): Likewise. (vector_unordered<mode> for VEC_F): Likewise.
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index b132037..9919fd4 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -107,6 +107,12 @@ (smin "smin") (smax "smax")]) +;; code iterators and attributes for vector FP comparison operators: +(define_code_iterator vector_fp_comparison_operator [lt le ne + ungt unge unlt unle]) +(define_code_iterator vector_fp_extra_comparison_operator [ltgt uneq + unordered ordered]) + ;; Vector move instructions. Little-endian VSX loads and stores require ;; special handling to circumvent "element endianness." @@ -665,88 +671,6 @@ DONE; }) -; lt(a,b) = gt(b,a) -(define_expand "vector_lt<mode>" - [(set (match_operand:VEC_F 0 "vfloat_operand") - (lt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" -{ - emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[1])); - DONE; -}) - -; le(a,b) = ge(b,a) -(define_expand "vector_le<mode>" - [(set (match_operand:VEC_F 0 "vfloat_operand") - (le:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" -{ - emit_insn (gen_vector_ge<mode> (operands[0], operands[2], operands[1])); - DONE; -}) - -; ne(a,b) = ~eq(a,b) -(define_expand "vector_ne<mode>" - [(set (match_operand:VEC_F 0 "vfloat_operand") - (ne:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" -{ - emit_insn (gen_vector_eq<mode> (operands[0], operands[1], operands[2])); - emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); - DONE; -}) - -; unge(a,b) = ~gt(b,a) -(define_expand "vector_unge<mode>" - [(set (match_operand:VEC_F 0 "vfloat_operand") - (unge:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" -{ - emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[1])); - emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); - DONE; -}) - -; ungt(a,b) = ~ge(b,a) -(define_expand "vector_ungt<mode>" - [(set (match_operand:VEC_F 0 "vfloat_operand") - (ungt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" -{ - emit_insn (gen_vector_ge<mode> (operands[0], operands[2], operands[1])); - emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); - DONE; -}) - -; unle(a,b) = ~gt(a,b) -(define_expand "vector_unle<mode>" - [(set (match_operand:VEC_F 0 "vfloat_operand") - (unle:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" -{ - emit_insn (gen_vector_gt<mode> (operands[0], operands[1], operands[2])); - emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); - DONE; -}) - -; unlt(a,b) = ~ge(a,b) -(define_expand "vector_unlt<mode>" - [(set (match_operand:VEC_F 0 "vfloat_operand") - (unlt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" -{ - emit_insn (gen_vector_ge<mode> (operands[0], operands[1], operands[2])); - emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); - DONE; -}) - (define_expand "vector_eq<mode>" [(set (match_operand:VEC_C 0 "vlogical_operand") (eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand") @@ -761,13 +685,6 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") -(define_expand "vector_ge<mode>" - [(set (match_operand:VEC_F 0 "vlogical_operand") - (ge:VEC_F (match_operand:VEC_F 1 "vlogical_operand") - (match_operand:VEC_F 2 "vlogical_operand")))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") - ; >= for integer vectors: swap operands and apply not-greater-than (define_expand "vector_nlt<mode>" [(set (match_operand:VEC_I 3 "vlogical_operand") @@ -829,88 +746,114 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_insn_and_split "vector_uneq<mode>" - [(set (match_operand:VEC_F 0 "vfloat_operand") - (uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" - "#" - "" - [(set (match_dup 3) - (gt:VEC_F (match_dup 1) - (match_dup 2))) - (set (match_dup 4) - (gt:VEC_F (match_dup 2) - (match_dup 1))) - (set (match_dup 0) - (and:VEC_F (not:VEC_F (match_dup 3)) - (not:VEC_F (match_dup 4))))] -{ - operands[3] = gen_reg_rtx (<MODE>mode); - operands[4] = gen_reg_rtx (<MODE>mode); -}) +; There are 14 possible vector FP comparison operators, gt and eq of them have +; been expanded above, so just support 12 remaining operators here. -(define_insn_and_split "vector_ltgt<mode>" - [(set (match_operand:VEC_F 0 "vfloat_operand") - (ltgt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] +; 0. For ge: +(define_expand "vector_ge<mode>" + [(set (match_operand:VEC_F 0 "vlogical_operand") + (ge:VEC_F (match_operand:VEC_F 1 "vlogical_operand") + (match_operand:VEC_F 2 "vlogical_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" - "#" - "" - [(set (match_dup 3) - (gt:VEC_F (match_dup 1) - (match_dup 2))) - (set (match_dup 4) - (gt:VEC_F (match_dup 2) - (match_dup 1))) - (set (match_dup 0) - (ior:VEC_F (match_dup 3) - (match_dup 4)))] -{ - operands[3] = gen_reg_rtx (<MODE>mode); - operands[4] = gen_reg_rtx (<MODE>mode); -}) + "") -(define_insn_and_split "vector_ordered<mode>" +; 1. For lt/le/ne/ungt/unge/unlt/unle: +; lt(a,b) = gt(b,a) +; le(a,b) = ge(b,a) +; unge(a,b) = ~lt(a,b) +; unle(a,b) = ~gt(a,b) +; ne(a,b) = ~eq(a,b) +; ungt(a,b) = ~le(a,b) +; unlt(a,b) = ~ge(a,b) +(define_insn_and_split "vector_<code><mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") - (ordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] + (vector_fp_comparison_operator:VEC_F + (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "#" - "" - [(set (match_dup 3) - (ge:VEC_F (match_dup 1) - (match_dup 2))) - (set (match_dup 4) - (ge:VEC_F (match_dup 2) - (match_dup 1))) - (set (match_dup 0) - (ior:VEC_F (match_dup 3) - (match_dup 4)))] + "can_create_pseudo_p ()" + [(pc)] { - operands[3] = gen_reg_rtx (<MODE>mode); - operands[4] = gen_reg_rtx (<MODE>mode); + enum rtx_code cond = <CODE>; + rtx res = gen_reg_rtx (<MODE>mode); + bool need_invert = false; + switch (cond) + { + case LT: + case LE: + cond = swap_condition (cond); + std::swap (operands[1], operands[2]); + break; + case UNLE: + case UNLT: + case NE: + case UNGE: + case UNGT: + cond = reverse_condition_maybe_unordered (cond); + need_invert = true; + break; + default: + gcc_unreachable (); + } + + rtx comp = gen_rtx_fmt_ee (cond, <MODE>mode, operands[1], operands[2]); + emit_insn (gen_rtx_SET (res, comp)); + + if (need_invert) + emit_insn (gen_one_cmpl<mode>2 (res, res)); + + emit_insn (gen_rtx_SET (operands[0], res)); }) -(define_insn_and_split "vector_unordered<mode>" +; 2. For ltgt/uneq/ordered/unordered: +; ltgt: gt(a,b) | gt(b,a) +; uneq: ~gt(a,b) & ~gt(b,a) +; ordered: ge(a,b) | ge(b,a) +; unordered: ~ge(a,b) & ~ge(b,a) +(define_insn_and_split "vector_<code><mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") - (unordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand") - (match_operand:VEC_F 2 "vfloat_operand")))] + (vector_fp_extra_comparison_operator:VEC_F + (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "#" - "" - [(set (match_dup 3) - (ge:VEC_F (match_dup 1) - (match_dup 2))) - (set (match_dup 4) - (ge:VEC_F (match_dup 2) - (match_dup 1))) - (set (match_dup 0) - (and:VEC_F (not:VEC_F (match_dup 3)) - (not:VEC_F (match_dup 4))))] + "can_create_pseudo_p ()" + [(pc)] { - operands[3] = gen_reg_rtx (<MODE>mode); - operands[4] = gen_reg_rtx (<MODE>mode); + enum rtx_code cond = <CODE>; + rtx res1 = gen_reg_rtx (<MODE>mode); + rtx res2 = gen_reg_rtx (<MODE>mode); + bool need_invert = false; + switch (cond) + { + case UNEQ: + need_invert = true; + /* Fall through. */ + case LTGT: + cond = GT; + break; + case UNORDERED: + need_invert = true; + /* Fall through. */ + case ORDERED: + cond = GE; + break; + default: + gcc_unreachable (); + } + + rtx comp1 = gen_rtx_fmt_ee (cond, <MODE>mode, operands[1], operands[2]); + emit_insn (gen_rtx_SET (res1, comp1)); + rtx comp2 = gen_rtx_fmt_ee (cond, <MODE>mode, operands[2], operands[1]); + emit_insn (gen_rtx_SET (res2, comp2)); + + emit_insn (gen_ior<mode>3 (res1, res1, res2)); + + if (need_invert) + emit_insn (gen_one_cmpl<mode>2 (res1, res1)); + + emit_insn (gen_rtx_SET (operands[0], res1)); }) ;; Note the arguments for __builtin_altivec_vsel are op2, op1, mask