This patch adds a zero else operand to masked loads, in particular the masked gather load builtins that are used for gather vectorization.
gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_special_args_builtin): Add else-operand handling. (ix86_expand_builtin): Ditto. * config/i386/predicates.md (vcvtne2ps2bf_parallel): New predicate. (maskload_else_operand): Ditto. * config/i386/sse.md: Use predicate. --- gcc/config/i386/i386-expand.cc | 59 +++++++++++++--- gcc/config/i386/predicates.md | 15 ++++ gcc/config/i386/sse.md | 124 ++++++++++++++++++++------------- 3 files changed, 142 insertions(+), 56 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index d9ad06264aa..b8505fe2c38 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -12462,10 +12462,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, { tree arg; rtx pat, op; - unsigned int i, nargs, arg_adjust, memory; + unsigned int i, nargs, arg_adjust, memory = -1; unsigned int constant = 100; bool aligned_mem = false; - rtx xops[4]; + rtx xops[4] = {}; + bool add_els = false; enum insn_code icode = d->icode; const struct insn_data_d *insn_p = &insn_data[icode]; machine_mode tmode = insn_p->operand[0].mode; @@ -12592,6 +12593,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case V4DI_FTYPE_PCV4DI_V4DI: case V4SI_FTYPE_PCV4SI_V4SI: case V2DI_FTYPE_PCV2DI_V2DI: + /* Two actual args but an additional else operand. */ + add_els = true; + /* Fallthru. */ case VOID_FTYPE_INT_INT64: nargs = 2; klass = load; @@ -12864,6 +12868,12 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, xops[i]= op; } + if (add_els) + { + xops[i] = CONST0_RTX (GET_MODE (xops[0])); + nargs++; + } + switch (nargs) { case 0: @@ -13113,10 +13123,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, size_t i; enum insn_code icode, icode2; tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - tree arg0, arg1, arg2, arg3, arg4; - rtx op0, op1, op2, op3, op4, pat, pat2, insn; - machine_mode mode0, mode1, mode2, mode3, mode4; + tree arg0, arg1, arg2, arg3, arg4, arg5; + rtx op0, op1, op2, op3, op4, op5, opels, pat, pat2, insn; + machine_mode mode0, mode1, mode2, mode3, mode4, mode5; unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); + bool has_else_op; HOST_WIDE_INT bisa, bisa2; /* For CPU builtins that can be folded, fold first and expand the fold. */ @@ -14919,6 +14930,7 @@ rdseed_step: arg2 = CALL_EXPR_ARG (exp, 2); arg3 = CALL_EXPR_ARG (exp, 3); arg4 = CALL_EXPR_ARG (exp, 4); + has_else_op = call_expr_nargs (exp) == 6; op0 = expand_normal (arg0); op1 = expand_normal (arg1); op2 = expand_normal (arg2); @@ -15021,10 +15033,38 @@ rdseed_step: op3 = copy_to_reg (op3); op3 = lowpart_subreg (mode3, op3, GET_MODE (op3)); } - if (!insn_data[icode].operand[5].predicate (op4, mode4)) + /* The vectorizer only adds an else operand for real masks. */ + if (has_else_op) + { + if (op4 != CONST0_RTX (GET_MODE (subtarget))) + { + error ("the else operand must be 0"); + return const0_rtx; + } + else + { + arg5 = CALL_EXPR_ARG (exp, 5); + op5 = expand_normal (arg5); + /* Note the arg order is different from the operand order. */ + mode5 = insn_data[icode].operand[5].mode; + if (!insn_data[icode].operand[5].predicate (op5, mode5)) + { + error ("the last argument must be scale 1, 2, 4, 8"); + return const0_rtx; + } + } + opels = op4; + op4 = op5; + mode4 = mode5; + } + else { - error ("the last argument must be scale 1, 2, 4, 8"); - return const0_rtx; + if (!insn_data[icode].operand[5].predicate (op4, mode4)) + { + error ("the last argument must be scale 1, 2, 4, 8"); + return const0_rtx; + } + opels = CONST0_RTX (GET_MODE (subtarget)); } /* Optimize. If mask is known to have all high bits set, @@ -15095,7 +15135,8 @@ rdseed_step: } } - pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); + pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4, opels); + if (! pat) return const0_rtx; emit_insn (pat); diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 680594871de..aac7341aeab 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -2332,3 +2332,18 @@ (define_predicate "apx_ndd_add_memory_operand" return true; }) + +;; Check that each element is odd and incrementally increasing from 1 +(define_predicate "vcvtne2ps2bf_parallel" + (and (match_code "const_vector") + (match_code "const_int" "a")) +{ + for (int i = 0; i < XVECLEN (op, 0); ++i) + if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1)) + return false; + return true; +}) + +(define_predicate "maskload_else_operand" + (and (match_code "const_int,const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index baaec689749..d1e64152000 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1487,7 +1487,8 @@ (define_expand "<avx512>_load<mode>_mask" } else if (MEM_P (operands[1])) operands[1] = gen_rtx_UNSPEC (<MODE>mode, - gen_rtvec(1, operands[1]), + gen_rtvec(2, operands[1], + CONST0_RTX (<MODE>mode)), UNSPEC_MASKLOAD); }) @@ -1495,7 +1496,8 @@ (define_insn "*<avx512>_load<mode>_mask" [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") (vec_merge:V48_AVX512VL (unspec:V48_AVX512VL - [(match_operand:V48_AVX512VL 1 "memory_operand" "m")] + [(match_operand:V48_AVX512VL 1 "memory_operand" "m") + (match_operand:V48_AVX512VL 4 "maskload_else_operand")] UNSPEC_MASKLOAD) (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] @@ -1523,7 +1525,8 @@ (define_insn "*<avx512>_load<mode>_mask" (define_insn_and_split "*<avx512>_load<mode>" [(set (match_operand:V48_AVX512VL 0 "register_operand") (unspec:V48_AVX512VL - [(match_operand:V48_AVX512VL 1 "memory_operand")] + [(match_operand:V48_AVX512VL 1 "memory_operand") + (match_operand:V48_AVX512VL 2 "maskload_else_operand")] UNSPEC_MASKLOAD))] "TARGET_AVX512F" "#" @@ -1545,7 +1548,8 @@ (define_expand "<avx512>_load<mode>_mask" } else if (MEM_P (operands[1])) operands[1] = gen_rtx_UNSPEC (<MODE>mode, - gen_rtvec(1, operands[1]), + gen_rtvec(2, operands[1], + CONST0_RTX (<MODE>mode)), UNSPEC_MASKLOAD); }) @@ -1554,7 +1558,8 @@ (define_insn "*<avx512>_load<mode>_mask" [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v") (vec_merge:VI12HFBF_AVX512VL (unspec:VI12HFBF_AVX512VL - [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")] + [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m") + (match_operand:VI12HFBF_AVX512VL 4 "maskload_else_operand")] UNSPEC_MASKLOAD) (match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand" "0C") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] @@ -1567,7 +1572,8 @@ (define_insn "*<avx512>_load<mode>_mask" (define_insn_and_split "*<avx512>_load<mode>" [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v") (unspec:VI12HFBF_AVX512VL - [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")] + [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m") + (match_operand:VI12HFBF_AVX512VL 2 "maskload_else_operand")] UNSPEC_MASKLOAD))] "TARGET_AVX512BW" "#" @@ -28440,7 +28446,8 @@ (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>" [(set (match_operand:V48_128_256 0 "register_operand" "=x") (unspec:V48_128_256 [(match_operand:<sseintvecmode> 2 "register_operand" "x") - (match_operand:V48_128_256 1 "memory_operand" "jm")] + (match_operand:V48_128_256 1 "memory_operand" "jm") + (match_operand:V48_128_256 3 "maskload_else_operand")] UNSPEC_MASKMOV))] "TARGET_AVX" { @@ -28481,7 +28488,8 @@ (define_expand "maskload<mode><sseintvecmodelower>" [(set (match_operand:V48_128_256 0 "register_operand") (unspec:V48_128_256 [(match_operand:<sseintvecmode> 2 "register_operand") - (match_operand:V48_128_256 1 "memory_operand")] + (match_operand:V48_128_256 1 "memory_operand") + (match_operand:V48_128_256 3 "maskload_else_operand")] UNSPEC_MASKMOV))] "TARGET_AVX") @@ -28489,20 +28497,24 @@ (define_expand "maskload<mode><avx512fmaskmodelower>" [(set (match_operand:V48_AVX512VL 0 "register_operand") (vec_merge:V48_AVX512VL (unspec:V48_AVX512VL - [(match_operand:V48_AVX512VL 1 "memory_operand")] + [(match_operand:V48_AVX512VL 1 "memory_operand") + (match_operand:V48_AVX512VL 3 "maskload_else_operand")] UNSPEC_MASKLOAD) (match_dup 0) - (match_operand:<avx512fmaskmode> 2 "register_operand")))] + (match_operand:<avx512fmaskmode> 2 "register_operand"))) + ] "TARGET_AVX512F") (define_expand "maskload<mode><avx512fmaskmodelower>" [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand") (vec_merge:VI12HFBF_AVX512VL (unspec:VI12HFBF_AVX512VL - [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")] + [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand") + (match_operand:VI12HFBF_AVX512VL 3 "maskload_else_operand")] UNSPEC_MASKLOAD) (match_dup 0) - (match_operand:<avx512fmaskmode> 2 "register_operand")))] + (match_operand:<avx512fmaskmode> 2 "register_operand"))) + ] "TARGET_AVX512BW") (define_expand "maskstore<mode><sseintvecmodelower>" @@ -29067,20 +29079,22 @@ (define_expand "avx2_gathersi<mode>" (unspec:VEC_GATHER_MODE [(match_operand:VEC_GATHER_MODE 1 "register_operand") (mem:<ssescalarmode> - (match_par_dup 6 + (match_par_dup 7 [(match_operand 2 "vsib_address_operand") (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand") - (match_operand:SI 5 "const1248_operand ")])) + (match_operand:SI 5 "const1248_operand ") + (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")])) (mem:BLK (scratch)) (match_operand:VEC_GATHER_MODE 4 "register_operand")] UNSPEC_GATHER)) - (clobber (match_scratch:VEC_GATHER_MODE 7))])] + (clobber (match_scratch:VEC_GATHER_MODE 8))])] "TARGET_AVX2" { - operands[6] - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], - operands[5]), UNSPEC_VSIBADDR); + operands[7] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], + operands[5], operands[6]), + UNSPEC_VSIBADDR); }) (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>" @@ -29091,7 +29105,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>" [(unspec:P [(match_operand:P 3 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x") - (match_operand:SI 6 "const1248_operand")] + (match_operand:SI 6 "const1248_operand") + (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")] @@ -29112,7 +29127,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2" [(unspec:P [(match_operand:P 2 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x") - (match_operand:SI 5 "const1248_operand")] + (match_operand:SI 5 "const1248_operand") + (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")] @@ -29130,20 +29146,22 @@ (define_expand "avx2_gatherdi<mode>" (unspec:VEC_GATHER_MODE [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") (mem:<ssescalarmode> - (match_par_dup 6 + (match_par_dup 7 [(match_operand 2 "vsib_address_operand") (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand") - (match_operand:SI 5 "const1248_operand ")])) + (match_operand:SI 5 "const1248_operand ") + (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")])) (mem:BLK (scratch)) (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")] UNSPEC_GATHER)) - (clobber (match_scratch:VEC_GATHER_MODE 7))])] + (clobber (match_scratch:VEC_GATHER_MODE 8))])] "TARGET_AVX2" { - operands[6] - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], - operands[5]), UNSPEC_VSIBADDR); + operands[7] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], + operands[5], operands[6]), + UNSPEC_VSIBADDR); }) (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>" @@ -29154,7 +29172,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>" [(unspec:P [(match_operand:P 3 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") - (match_operand:SI 6 "const1248_operand")] + (match_operand:SI 6 "const1248_operand") + (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] @@ -29175,7 +29194,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2" [(unspec:P [(match_operand:P 2 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") - (match_operand:SI 5 "const1248_operand")] + (match_operand:SI 5 "const1248_operand") + (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] @@ -29201,7 +29221,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_3" [(unspec:P [(match_operand:P 3 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") - (match_operand:SI 6 "const1248_operand")] + (match_operand:SI 6 "const1248_operand") + (match_operand:VI4F_256 8 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] @@ -29225,7 +29246,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_4" [(unspec:P [(match_operand:P 2 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") - (match_operand:SI 5 "const1248_operand")] + (match_operand:SI 5 "const1248_operand") + (match_operand:VI4F_256 7 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] @@ -29246,17 +29268,19 @@ (define_expand "<avx512>_gathersi<mode>" [(match_operand:VI48F 1 "register_operand") (match_operand:<avx512fmaskmode> 4 "register_operand") (mem:<ssescalarmode> - (match_par_dup 6 + (match_par_dup 7 [(match_operand 2 "vsib_address_operand") (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand") - (match_operand:SI 5 "const1248_operand")]))] + (match_operand:SI 5 "const1248_operand") + (match_operand:VI48F 6 "maskload_else_operand")]))] UNSPEC_GATHER)) - (clobber (match_scratch:<avx512fmaskmode> 7))])] + (clobber (match_scratch:<avx512fmaskmode> 8))])] "TARGET_AVX512F" { - operands[6] - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], - operands[5]), UNSPEC_VSIBADDR); + operands[7] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], + operands[5], operands[6]), + UNSPEC_VSIBADDR); }) (define_insn "*avx512f_gathersi<VI48F:mode>" @@ -29268,7 +29292,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>" [(unspec:P [(match_operand:P 4 "vsib_address_operand" "Tv") (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v") - (match_operand:SI 5 "const1248_operand")] + (match_operand:SI 5 "const1248_operand") + (match_operand:VI48F 8 "maskload_else_operand")] UNSPEC_VSIBADDR)])] UNSPEC_GATHER)) (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))] @@ -29289,7 +29314,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>_2" [(unspec:P [(match_operand:P 3 "vsib_address_operand" "Tv") (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v") - (match_operand:SI 4 "const1248_operand")] + (match_operand:SI 4 "const1248_operand") + (match_operand:VI48F 7 "maskload_else_operand")] UNSPEC_VSIBADDR)])] UNSPEC_GATHER)) (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] @@ -29308,17 +29334,19 @@ (define_expand "<avx512>_gatherdi<mode>" [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") (match_operand:QI 4 "register_operand") (mem:<ssescalarmode> - (match_par_dup 6 + (match_par_dup 7 [(match_operand 2 "vsib_address_operand") (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand") - (match_operand:SI 5 "const1248_operand")]))] + (match_operand:SI 5 "const1248_operand") + (match_operand:VI48F 6 "maskload_else_operand")]))] UNSPEC_GATHER)) - (clobber (match_scratch:QI 7))])] + (clobber (match_scratch:QI 8))])] "TARGET_AVX512F" { - operands[6] - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], - operands[5]), UNSPEC_VSIBADDR); + operands[7] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], + operands[5], operands[6]), + UNSPEC_VSIBADDR); }) (define_insn "*avx512f_gatherdi<VI48F:mode>" @@ -29330,7 +29358,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>" [(unspec:P [(match_operand:P 4 "vsib_address_operand" "Tv") (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v") - (match_operand:SI 5 "const1248_operand")] + (match_operand:SI 5 "const1248_operand") + (match_operand:VI48F 8 "maskload_else_operand")] UNSPEC_VSIBADDR)])] UNSPEC_GATHER)) (clobber (match_scratch:QI 2 "=&Yk"))] @@ -29351,7 +29380,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>_2" [(unspec:P [(match_operand:P 3 "vsib_address_operand" "Tv") (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v") - (match_operand:SI 4 "const1248_operand")] + (match_operand:SI 4 "const1248_operand") + (match_operand:VI48F 7 "maskload_else_operand")] UNSPEC_VSIBADDR)])] UNSPEC_GATHER)) (clobber (match_scratch:QI 1 "=&Yk"))] @@ -29388,7 +29418,7 @@ (define_expand "<avx512>_scattersi<mode>" operands[5] = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2], operands[4], operands[1]), - UNSPEC_VSIBADDR); + UNSPEC_VSIBADDR); }) (define_insn "*avx512f_scattersi<VI48F:mode>" -- 2.45.2