Hi
Enable GIMPLE folding of the vec_splat() intrinsic. (v3).
    
This uses the tree_vec_extract() function out of tree-vect-generic.c
to retrieve the splat value, which is a BIT_FIELD_REF.   That function is
made non-static as part of this change.
    
Testcases are already in-tree.
    
V2 updates, per feedback previously received.
Forced arg1 into range (modulo #elements) before attempting to extract
the splat value.
Removed the (now unnecessary) code that did bounds-checking before calling
the tree_vec_extract helper.
Used arg0_type rather than lhs_type for calculating the tree size.
    
V3 updates, inspired by additional offline discussion with Segher.
Noting that for vec_splat(arg1,arg2), the ABI describes ARG2 as an element
number less than the number of elements supported by the respective ARG1
type, so we do not attempt to gimple-fold the intrinsic if we determine our
value is out of range. Thus, the initial check ensures that ARG1 is both
constant and has a valid index into ARG0.
The subsequent modulo operation is no longer necessary, and has been removed.
Also eliminated a few now-unused variables.
    
Sniff-tests pass.  full regtest against powerpc platforms is in the queue.
OK for trunk with passing test results?
    
Thanks,
-Will
    
[gcc]
    
2018-08-20  Will Schmidt  <will_schm...@vnet.ibm.com>
    
        * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
          early gimple folding of vec_splat().
        * tree-vect-generic.c: Remove static from tree_vec_extract() definition.
        * gimple-fold.h:  Add an extern define for tree_vec_extract().

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 5f77afd..4c3257f 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -15768,10 +15768,52 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
         tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
         g = gimple_build_assign (lhs, splat_tree);
         gimple_set_location (g, gimple_location (stmt));
         gsi_replace (gsi, g, true);
         return true;
+       }
+
+    /* Flavors of vec_splat.  */
+    /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...};  */
+    case ALTIVEC_BUILTIN_VSPLTB:
+    case ALTIVEC_BUILTIN_VSPLTH:
+    case ALTIVEC_BUILTIN_VSPLTW:
+    case VSX_BUILTIN_XXSPLTD_V2DI:
+    case VSX_BUILTIN_XXSPLTD_V2DF:
+      {
+       arg0 = gimple_call_arg (stmt, 0); /* input vector.  */
+       arg1 = gimple_call_arg (stmt, 1); /* index into arg0.  */
+       /* Only fold the vec_splat_*() if arg1 is both a constant value, and a 
valid
+        index into the arg0 vector.  */
+       unsigned int n_elts = VECTOR_CST_NELTS (arg0);
+       if (TREE_CODE (arg1) != INTEGER_CST
+           || TREE_INT_CST_LOW (arg1) > (n_elts -1))
+         return false;
+       lhs = gimple_call_lhs (stmt);
+       tree lhs_type = TREE_TYPE (lhs);
+       tree arg0_type = TREE_TYPE (arg0);
+       tree splat;
+       if (TREE_CODE (arg0) == VECTOR_CST)
+         splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1));
+       else
+         {
+           /* Determine (in bits) the length and start location of the
+              splat value for a call to the tree_vec_extract helper.  */
+           int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type))
+                                   * BITS_PER_UNIT / n_elts;
+           int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size;
+           tree len = build_int_cst (bitsizetype, splat_elem_size);
+           tree start = build_int_cst (bitsizetype, splat_start_bit);
+           splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0,
+                                     len, start);
+         }
+       /* And finally, build the new vector.  */
+       tree splat_tree = build_vector_from_val (lhs_type, splat);
+       g = gimple_build_assign (lhs, splat_tree);
+       gimple_set_location (g, gimple_location (stmt));
+       gsi_replace (gsi, g, true);
+       return true;
       }
 
     /* vec_mergel (integrals).  */
     case ALTIVEC_BUILTIN_VMRGLH:
     case ALTIVEC_BUILTIN_VMRGLW:
diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
index 04e9bfa..e634180 100644
--- a/gcc/gimple-fold.h
+++ b/gcc/gimple-fold.h
@@ -59,10 +59,11 @@ extern tree gimple_fold_indirect_ref (tree);
 extern bool gimple_fold_builtin_sprintf (gimple_stmt_iterator *);
 extern bool gimple_fold_builtin_snprintf (gimple_stmt_iterator *);
 extern bool arith_code_with_undefined_signed_overflow (tree_code);
 extern gimple_seq rewrite_to_defined_overflow (gimple *);
 extern void replace_call_with_value (gimple_stmt_iterator *, tree);
+extern tree tree_vec_extract (gimple_stmt_iterator *, tree, tree, tree, tree);
 
 /* gimple_build, functionally matching fold_buildN, outputs stmts
    int the provided sequence, matching and simplifying them on-the-fly.
    Supposed to replace force_gimple_operand (fold_buildN (...), ...).  */
 extern tree gimple_build (gimple_seq *, location_t,
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 909f790..1c9701d 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -118,11 +118,11 @@ build_word_mode_vector_type (int nunits)
 
 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
                              tree, tree, tree, tree, tree, enum tree_code,
                              tree);
 
-static inline tree
+tree
 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
                  tree t, tree bitsize, tree bitpos)
 {
   if (TREE_CODE (t) == SSA_NAME)
     {


Reply via email to