Hi,
Add support for folding of vec_msum in GIMPLE.
    
This uses the DOT_PROD_EXPR gimple op, which is sensitive to type mismatches:
        error: type mismatch in dot product reduction
        __vector signed int
        __vector signed char
        __vector unsigned char
        D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>;
So for those cases with a signed/unsigned mismatch in the arguments, this
converts those arguments to their signed type.
    
This also adds a define_expand for sdot_prodv16qi. This is based on a similar
existing entry.
    
Testing coverage is handled by the existing gcc.target/powerpc/fold-vec-msum*.c 
tests.
    
Sniff-tests have passed on P8.  full regtests currently running on other 
assorted
power systems.
OK for trunk with successful results?
    
Thanks
-Will
    
[gcc]

2017-12-01  Will Schmidt  <will_schm...@vnet.ibm.com>

        * config/rs6000/altivec.md (sdot_prodv16qi): New.
        * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
        gimple-folding of vec_msum.
        (builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM.

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 7122f99..fa9e121 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -3349,11 +3349,26 @@
                                  (match_operand:V8HI 2 "register_operand" "v")]
                                 UNSPEC_VMSUMSHM)))]
   "TARGET_ALTIVEC"
   "
 {
-  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], operands[2], 
operands[3]));
+  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
+                                  operands[2], operands[3]));
+  DONE;
+}")
+
+(define_expand "sdot_prodv16qi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")
+                                 (match_operand:V16QI 2 "register_operand" 
"v")]
+                                UNSPEC_VMSUMM)))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vmsummbm (operands[0], operands[1],
+                                  operands[2], operands[3]));
   DONE;
 }")
 
 (define_expand "widen_usum<mode>3"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 551d9c4..552fcdd 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case VSX_BUILTIN_CMPLE_2DI:
     case VSX_BUILTIN_CMPLE_U2DI:
       fold_compare_helper (gsi, LE_EXPR, stmt);
       return true;
 
+    /* vec_msum.  */
+    case ALTIVEC_BUILTIN_VMSUMUHM:
+    case ALTIVEC_BUILTIN_VMSUMSHM:
+    case ALTIVEC_BUILTIN_VMSUMUBM:
+    case ALTIVEC_BUILTIN_VMSUMMBM:
+      {
+       arg0 = gimple_call_arg (stmt, 0);
+       arg1 = gimple_call_arg (stmt, 1);
+       tree arg2 = gimple_call_arg (stmt, 2);
+       lhs = gimple_call_lhs (stmt);
+       if ( TREE_TYPE (arg0) == TREE_TYPE (arg1))
+         g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2);
+       else
+         {
+           // For the case where we have a mix of signed/unsigned
+           // arguments, convert both multiply args to their signed type.
+           gimple_seq stmts = NULL;
+           location_t loc = gimple_location (stmt);
+           tree new_arg_type = signed_type_for (TREE_TYPE (arg0));
+           tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type, arg0);
+           tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type, arg1);
+           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+           g = gimple_build_assign (lhs, DOT_PROD_EXPR,
+                                    signed_arg0, signed_arg1, arg2);
+         }
+       gimple_set_location (g, gimple_location (stmt));
+       gsi_replace (gsi, g, true);
+       return true;
+      }
+
     default:
       if (TARGET_DEBUG_BUILTIN)
        fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
                 fn_code, fn_name1, fn_name2);
       break;
@@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret, 
machine_mode mode_arg0,
     case CRYPTO_BUILTIN_VPERMXOR_V8HI:
     case CRYPTO_BUILTIN_VPERMXOR_V16QI:
     case CRYPTO_BUILTIN_VSHASIGMAW:
     case CRYPTO_BUILTIN_VSHASIGMAD:
     case CRYPTO_BUILTIN_VSHASIGMA:
+    case ALTIVEC_BUILTIN_VMSUMUHM:
+    case ALTIVEC_BUILTIN_VMSUMUBM:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
       h.uns_p[3] = 1;
       break;
 
+    /* The second parm to this vec_msum variant is unsigned.  */
+    case ALTIVEC_BUILTIN_VMSUMMBM:
+      h.uns_p[2] = 1;
+      break;
+
     /* signed permute functions with unsigned char mask.  */
     case ALTIVEC_BUILTIN_VPERM_16QI:
     case ALTIVEC_BUILTIN_VPERM_8HI:
     case ALTIVEC_BUILTIN_VPERM_4SI:
     case ALTIVEC_BUILTIN_VPERM_4SF:


Reply via email to