On Mon, Aug 5, 2019 at 3:09 PM Uros Bizjak <ubiz...@gmail.com> wrote:
> > > > > > (define_mode_iterator MAXMIN_IMODE [SI "TARGET_SSE4_1"] [DI > > > > > > "TARGET_AVX512F"]) > > > > > > > > > > > > and then we need to split DImode for 32bits, too. > > > > > > > > > > For now, please add "TARGET_64BIT && TARGET_AVX512F" for DImode > > > > > condition, I'll provide _doubleword splitter later. > > > > > > > > Shouldn't that be TARGET_AVX512VL instead? Or does the insn use %g0 > > > > etc. > > > > to force use of %zmmN? > > > > > > It generates V4SI mode, so - yes, AVX512VL. > > > > case SMAX: > > case SMIN: > > case UMAX: > > case UMIN: > > if ((mode == DImode && (!TARGET_64BIT || !TARGET_AVX512VL)) > > || (mode == SImode && !TARGET_SSE4_1)) > > return false; > > > > so there's no way to use AVX512VL for 32bit? > > There is a way, but on 32bit targets, we need to split DImode > operation to a sequence of SImode operations for unconverted pattern. > This is of course doable, but somehow more complex than simply > emitting a DImode compare + DImode cmove, which is what current > splitter does. So, a follow-up task. Please find attached the complete .md part that enables SImode for TARGET_SSE4_1 and DImode for TARGET_AVX512VL for both, 32bit and 64bit targets. The patterns also allows for memory operand 2, so STV has chance to create the vector pattern with implicit load. In case STV fails, the memory operand 2 is loaded to the register first; operand 2 is used in compare and cmove instruction, so pre-loading of the operand should be beneficial. Also note, that splitting should happen rarely. Due to the cost function, STV should effectively always convert minmax to a vector insn. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 274210) +++ config/i386/i386.md (working copy) @@ -17719,6 +17719,110 @@ (match_operand:SWI 3 "const_int_operand")] "" "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") + +;; min/max patterns + +(define_mode_iterator MAXMIN_IMODE + [(SI "TARGET_SSE4_1") (DI "TARGET_AVX512VL")]) +(define_code_attr maxmin_rel + [(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")]) + +(define_expand "<code><mode>3" + [(parallel + [(set (match_operand:MAXMIN_IMODE 0 "register_operand") + (maxmin:MAXMIN_IMODE + (match_operand:MAXMIN_IMODE 1 "register_operand") + (match_operand:MAXMIN_IMODE 2 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_STV") + +(define_insn_and_split "*<code><mode>3_1" + [(set (match_operand:MAXMIN_IMODE 0 "register_operand") + (maxmin:MAXMIN_IMODE + (match_operand:MAXMIN_IMODE 1 "register_operand") + (match_operand:MAXMIN_IMODE 2 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_64BIT || <MODE>mode != DImode) && TARGET_STV + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 0) + (if_then_else:MAXMIN_IMODE (match_dup 3) + (match_dup 1) + (match_dup 2)))] +{ + machine_mode mode = <MODE>mode; + + if (!register_operand (operands[2], mode)) + operands[2] = force_reg (mode, operands[2]); + + enum rtx_code code = <maxmin_rel>; + machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], operands[2]); + rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG); + + rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], operands[2]); + emit_insn (gen_rtx_SET (flags, tmp)); + + operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); +}) + +(define_insn_and_split "*<code>di3_doubleword" + [(set (match_operand:DI 0 "register_operand") + (maxmin:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 0) + (if_then_else:SI (match_dup 6) + (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (if_then_else:SI (match_dup 6) + (match_dup 4) + (match_dup 5)))] +{ + if (!register_operand (operands[2], DImode)) + operands[2] = force_reg (DImode, operands[2]); + + split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]); + + rtx cmplo[2] = { operands[1], operands[2] }; + rtx cmphi[2] = { operands[4], operands[5] }; + + enum rtx_code code = <maxmin_rel>; + + switch (code) + { + case LE: case LEU: + std::swap (cmplo[0], cmplo[1]); + std::swap (cmphi[0], cmphi[1]); + code = swap_condition (code); + /* FALLTHRU */ + + case GE: case GEU: + { + bool uns = (code == GEU); + rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx) + = uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz; + + emit_insn (gen_cmp_1 (SImode, cmplo[0], cmplo[1])); + + rtx tmp = gen_rtx_SCRATCH (SImode); + emit_insn (sbb_insn (SImode, tmp, cmphi[0], cmphi[1])); + + rtx flags = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG); + operands[6] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); + + break; + } + + default: + gcc_unreachable (); + } +}) ;; Misc patterns (?)