Hi,
Add support for early gimple folding of the vec_mergee() and vec_mergeo()
intrinsics.
Testcases posted separately.
Tested across assorted power linux platforms.
OK for trunk?
Thanks,
-Will
[gcc]
2018-10-09 Will Schmidt <[email protected]>
* config/rs6000/rs6000.c: (fold_mergeeo_helper): New helper function.
(rs6000_gimple_fold_builtin): Add hooks for vec_mergee and vec_mergeo
intrinsics.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 5c7ab2b..a77049e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -15161,10 +15161,57 @@ fold_mergehl_helper (gimple_stmt_iterator *gsi,
gimple *stmt, int use_high)
gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
gimple_set_location (g, gimple_location (stmt));
gsi_replace (gsi, g, true);
}
+/* Helper function to handle the vector merge[eo] built-ins.
+ * The permute vector contains even or odd values that index
+ * across both arg1 and arg2. The even/odd-ness is handled via the
+ * shift argument passed in. */
+static void
+fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int shift)
+{
+ tree arg0 = gimple_call_arg (stmt, 0);
+ tree arg1 = gimple_call_arg (stmt, 1);
+ tree lhs = gimple_call_lhs (stmt);
+ tree lhs_type = TREE_TYPE (lhs);
+ int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
+
+ /* The permute_type will match the lhs for integral types. For double and
+ float types, the permute type needs to map to the V2 or V4 type that
+ matches size. */
+ tree permute_type;
+ if (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)))
+ permute_type = lhs_type;
+ else
+ {
+ if (types_compatible_p (TREE_TYPE (lhs_type),
+ TREE_TYPE (V2DF_type_node)))
+ permute_type = V2DI_type_node;
+ else if (types_compatible_p (TREE_TYPE (lhs_type),
+ TREE_TYPE (V4SF_type_node)))
+ permute_type = V4SI_type_node;
+ else
+ gcc_unreachable ();
+ }
+ tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
+
+ /* Build the permute vector. */
+ for (int i = 0; i < n_elts / 2; i++)
+ {
+ elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
+ 2*i + shift));
+ elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
+ 2*i + shift + n_elts));
+ }
+
+ tree permute = elts.build ();
+
+ gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi, g, true);
+}
/* Fold a machine-dependent built-in in GIMPLE. (For folding into
a constant, use rs6000_fold_builtin.) */
bool
@@ -15862,10 +15909,25 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
case VSX_BUILTIN_XXMRGHW_4SF:
case VSX_BUILTIN_VEC_MERGEH_V2DF:
fold_mergehl_helper (gsi, stmt, 0);
return true;
+ /* Flavors of vec_mergee. */
+ case P8V_BUILTIN_VMRGEW_V4SI:
+ case P8V_BUILTIN_VMRGEW_V2DI:
+ case P8V_BUILTIN_VMRGEW_V4SF:
+ case P8V_BUILTIN_VMRGEW_V2DF:
+ fold_mergeeo_helper (gsi, stmt, 0);
+ return true;
+ /* Flavors of vec_mergeo. */
+ case P8V_BUILTIN_VMRGOW_V4SI:
+ case P8V_BUILTIN_VMRGOW_V2DI:
+ case P8V_BUILTIN_VMRGOW_V4SF:
+ case P8V_BUILTIN_VMRGOW_V2DF:
+ fold_mergeeo_helper (gsi, stmt, 1);
+ return true;
+
/* d = vec_pack (a, b) */
case P8V_BUILTIN_VPKUDUM:
case ALTIVEC_BUILTIN_VPKUHUM:
case ALTIVEC_BUILTIN_VPKUWUM:
{