On December 1, 2017 6:22:21 PM GMT+01:00, Will Schmidt <will_schm...@vnet.ibm.com> wrote: >Hi, >Add support for folding of vec_msum in GIMPLE. > >This uses the DOT_PROD_EXPR gimple op, which is sensitive to type >mismatches: > error: type mismatch in dot product reduction > __vector signed int > __vector signed char > __vector unsigned char > D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>; >So for those cases with a signed/unsigned mismatch in the arguments, >this >converts those arguments to their signed type. > >This also adds a define_expand for sdot_prodv16qi. This is based on a >similar >existing entry. > >Testing coverage is handled by the existing >gcc.target/powerpc/fold-vec-msum*.c tests. > >Sniff-tests have passed on P8. full regtests currently running on >other assorted >power systems. >OK for trunk with successful results?
Note DOT_PROD_EXPR is only useful when the result is reduced to a scalar later and the reduction order is irrelevant. This is because GIMPLE doesn't specify whether the reduction reduces odd/even or high/low lanes of the argument vectors. Does vec_msum specify that? That said, it exists as a 'hack' for the vectorizer and isn't otherwise useful for GIMPLE. Richard. >Thanks >-Will > >[gcc] > >2017-12-01 Will Schmidt <will_schm...@vnet.ibm.com> > > * config/rs6000/altivec.md (sdot_prodv16qi): New. > * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for > gimple-folding of vec_msum. > (builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM. > >diff --git a/gcc/config/rs6000/altivec.md >b/gcc/config/rs6000/altivec.md >index 7122f99..fa9e121 100644 >--- a/gcc/config/rs6000/altivec.md >+++ b/gcc/config/rs6000/altivec.md >@@ -3349,11 +3349,26 @@ > (match_operand:V8HI 2 "register_operand" "v")] > UNSPEC_VMSUMSHM)))] > "TARGET_ALTIVEC" > " > { >- emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], >operands[2], operands[3])); >+ emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], >+ operands[2], operands[3])); >+ DONE; >+}") >+ >+(define_expand "sdot_prodv16qi" >+ [(set (match_operand:V4SI 0 "register_operand" "=v") >+ (plus:V4SI (match_operand:V4SI 3 "register_operand" "v") >+ (unspec:V4SI [(match_operand:V16QI 1 >"register_operand" "v") >+ (match_operand:V16QI 2 >"register_operand" "v")] >+ UNSPEC_VMSUMM)))] >+ "TARGET_ALTIVEC" >+ " >+{ >+ emit_insn (gen_altivec_vmsummbm (operands[0], operands[1], >+ operands[2], operands[3])); > DONE; > }") > > (define_expand "widen_usum<mode>3" > [(set (match_operand:V4SI 0 "register_operand" "=v") >diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c >index 551d9c4..552fcdd 100644 >--- a/gcc/config/rs6000/rs6000.c >+++ b/gcc/config/rs6000/rs6000.c >@@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin >(gimple_stmt_iterator *gsi) > case VSX_BUILTIN_CMPLE_2DI: > case VSX_BUILTIN_CMPLE_U2DI: > fold_compare_helper (gsi, LE_EXPR, stmt); > return true; > >+ /* vec_msum. */ >+ case ALTIVEC_BUILTIN_VMSUMUHM: >+ case ALTIVEC_BUILTIN_VMSUMSHM: >+ case ALTIVEC_BUILTIN_VMSUMUBM: >+ case ALTIVEC_BUILTIN_VMSUMMBM: >+ { >+ arg0 = gimple_call_arg (stmt, 0); >+ arg1 = gimple_call_arg (stmt, 1); >+ tree arg2 = gimple_call_arg (stmt, 2); >+ lhs = gimple_call_lhs (stmt); >+ if ( TREE_TYPE (arg0) == TREE_TYPE (arg1)) >+ g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2); >+ else >+ { >+ // For the case where we have a mix of signed/unsigned >+ // arguments, convert both multiply args to their signed type. >+ gimple_seq stmts = NULL; >+ location_t loc = gimple_location (stmt); >+ tree new_arg_type = signed_type_for (TREE_TYPE (arg0)); >+ tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type, >arg0); >+ tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type, >arg1); >+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); >+ g = gimple_build_assign (lhs, DOT_PROD_EXPR, >+ signed_arg0, signed_arg1, arg2); >+ } >+ gimple_set_location (g, gimple_location (stmt)); >+ gsi_replace (gsi, g, true); >+ return true; >+ } >+ > default: > if (TARGET_DEBUG_BUILTIN) > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", > fn_code, fn_name1, fn_name2); > break; >@@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret, >machine_mode mode_arg0, > case CRYPTO_BUILTIN_VPERMXOR_V8HI: > case CRYPTO_BUILTIN_VPERMXOR_V16QI: > case CRYPTO_BUILTIN_VSHASIGMAW: > case CRYPTO_BUILTIN_VSHASIGMAD: > case CRYPTO_BUILTIN_VSHASIGMA: >+ case ALTIVEC_BUILTIN_VMSUMUHM: >+ case ALTIVEC_BUILTIN_VMSUMUBM: > h.uns_p[0] = 1; > h.uns_p[1] = 1; > h.uns_p[2] = 1; > h.uns_p[3] = 1; > break; > >+ /* The second parm to this vec_msum variant is unsigned. */ >+ case ALTIVEC_BUILTIN_VMSUMMBM: >+ h.uns_p[2] = 1; >+ break; >+ > /* signed permute functions with unsigned char mask. */ > case ALTIVEC_BUILTIN_VPERM_16QI: > case ALTIVEC_BUILTIN_VPERM_8HI: > case ALTIVEC_BUILTIN_VPERM_4SI: > case ALTIVEC_BUILTIN_VPERM_4SF: