Hi, Add support for folding of vec_msum in GIMPLE. This uses the DOT_PROD_EXPR gimple op, which is sensitive to type mismatches: error: type mismatch in dot product reduction __vector signed int __vector signed char __vector unsigned char D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>; So for those cases with a signed/unsigned mismatch in the arguments, this converts those arguments to their signed type. This also adds a define_expand for sdot_prodv16qi. This is based on a similar existing entry. Testing coverage is handled by the existing gcc.target/powerpc/fold-vec-msum*.c tests. Sniff-tests have passed on P8. full regtests currently running on other assorted power systems. OK for trunk with successful results? Thanks -Will [gcc]
2017-12-01 Will Schmidt <will_schm...@vnet.ibm.com> * config/rs6000/altivec.md (sdot_prodv16qi): New. * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for gimple-folding of vec_msum. (builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM. diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 7122f99..fa9e121 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -3349,11 +3349,26 @@ (match_operand:V8HI 2 "register_operand" "v")] UNSPEC_VMSUMSHM)))] "TARGET_ALTIVEC" " { - emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], operands[2], operands[3])); + emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}") + +(define_expand "sdot_prodv16qi" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (plus:V4SI (match_operand:V4SI 3 "register_operand" "v") + (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMSUMM)))] + "TARGET_ALTIVEC" + " +{ + emit_insn (gen_altivec_vmsummbm (operands[0], operands[1], + operands[2], operands[3])); DONE; }") (define_expand "widen_usum<mode>3" [(set (match_operand:V4SI 0 "register_operand" "=v") diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 551d9c4..552fcdd 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case VSX_BUILTIN_CMPLE_2DI: case VSX_BUILTIN_CMPLE_U2DI: fold_compare_helper (gsi, LE_EXPR, stmt); return true; + /* vec_msum. */ + case ALTIVEC_BUILTIN_VMSUMUHM: + case ALTIVEC_BUILTIN_VMSUMSHM: + case ALTIVEC_BUILTIN_VMSUMUBM: + case ALTIVEC_BUILTIN_VMSUMMBM: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + tree arg2 = gimple_call_arg (stmt, 2); + lhs = gimple_call_lhs (stmt); + if ( TREE_TYPE (arg0) == TREE_TYPE (arg1)) + g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2); + else + { + // For the case where we have a mix of signed/unsigned + // arguments, convert both multiply args to their signed type. + gimple_seq stmts = NULL; + location_t loc = gimple_location (stmt); + tree new_arg_type = signed_type_for (TREE_TYPE (arg0)); + tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type, arg0); + tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type, arg1); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + g = gimple_build_assign (lhs, DOT_PROD_EXPR, + signed_arg0, signed_arg1, arg2); + } + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + default: if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", fn_code, fn_name1, fn_name2); break; @@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, case CRYPTO_BUILTIN_VPERMXOR_V8HI: case CRYPTO_BUILTIN_VPERMXOR_V16QI: case CRYPTO_BUILTIN_VSHASIGMAW: case CRYPTO_BUILTIN_VSHASIGMAD: case CRYPTO_BUILTIN_VSHASIGMA: + case ALTIVEC_BUILTIN_VMSUMUHM: + case ALTIVEC_BUILTIN_VMSUMUBM: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; h.uns_p[3] = 1; break; + /* The second parm to this vec_msum variant is unsigned. */ + case ALTIVEC_BUILTIN_VMSUMMBM: + h.uns_p[2] = 1; + break; + /* signed permute functions with unsigned char mask. */ case ALTIVEC_BUILTIN_VPERM_16QI: case ALTIVEC_BUILTIN_VPERM_8HI: case ALTIVEC_BUILTIN_VPERM_4SI: case ALTIVEC_BUILTIN_VPERM_4SF: