Hi, Folding of vector stores in GIMPLE. - Add code to handle gimple folding for the vec_st (vector store) builtins. - Remove the now obsoleted folding code for vec_st from rs6000-c-c. There are two spots that I could use some feedback on.
First - An early exit remains in place prevent folding of statements that do not have a LHS. To allow folding of the stores to get past the check, I have added a helper function (rs6000_builtin_valid_without_lhs) that allows those store intrinsics to proceed. I'm not sure the approach (or the name I chose) is the best choice, so I'll defer to recommendations on how to improve that. :-) Second - This code (as-is) is subject to a TBAA related issue (similar to what was noticed in the gimple folding code for loads. As-is, with a testcase such as : void testst_struct1b (vector double vd1, long long ll1, struct S *p) { vec_st (vd1, ll1, (vector double *)p); } will generate gimple that looks like: MEM[(struct S *)D.3218] = vd1; If I rework the code, setting arg2_type to be ptr_type_node, i.e. + tree arg2_type = TREE_TYPE (arg2); to: + tree arg2_type = ptr_type_node; the generated gimple then looks like MEM[(void *)D.3218] = vd1; Which is probably OK, but I cannot say for certain. The generated .s content is at least equivalent. The resulting code is verified by testcases powerpc/fold-vec-st-*.c, which has been posted separately. regtest looks clean on power6 and newer. pending feedback, OK for trunk? Thanks, -Will [gcc] 2017-09-21 Will Schmidt <will_schm...@vnet.ibm.com> * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector stores (ALTIVEC_BUILTIN_ST_*). (rs6000_builtin_valid_without_lhs): helper function. * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_ST. diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index a49db97..4a363a1 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -6470,82 +6470,10 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, convert (TREE_TYPE (stmt), arg0)); stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); return stmt; } - /* Expand vec_st into an expression that masks the address and - performs the store. We need to expand this early to allow - the best aliasing, as by the time we get into RTL we no longer - are able to honor __restrict__, for example. We may want to - consider this for all memory access built-ins. - - When -maltivec=be is specified, or the wrong number of arguments - is provided, simply punt to existing built-in processing. */ - - if (fcode == ALTIVEC_BUILTIN_VEC_ST - && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) - && nargs == 3) - { - tree arg0 = (*arglist)[0]; - tree arg1 = (*arglist)[1]; - tree arg2 = (*arglist)[2]; - - /* Construct the masked address. Let existing error handling take - over if we don't have a constant offset. */ - arg1 = fold (arg1); - - if (TREE_CODE (arg1) == INTEGER_CST) - { - if (!ptrofftype_p (TREE_TYPE (arg1))) - arg1 = build1 (NOP_EXPR, sizetype, arg1); - - tree arg2_type = TREE_TYPE (arg2); - if (TREE_CODE (arg2_type) == ARRAY_TYPE && c_dialect_cxx ()) - { - /* Force array-to-pointer decay for C++. */ - arg2 = default_conversion (arg2); - arg2_type = TREE_TYPE (arg2); - } - - /* Find the built-in to make sure a compatible one exists; if not - we fall back to default handling to get the error message. */ - for (desc = altivec_overloaded_builtins; - desc->code && desc->code != fcode; desc++) - continue; - - for (; desc->code == fcode; desc++) - if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1) - && rs6000_builtin_type_compatible (TREE_TYPE (arg1), desc->op2) - && rs6000_builtin_type_compatible (TREE_TYPE (arg2), - desc->op3)) - { - tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type, - arg2, arg1); - tree aligned - = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type, - addr, build_int_cst (arg2_type, -16)); - - tree arg0_type = TREE_TYPE (arg0); - if (TYPE_MODE (arg0_type) == V2DImode) - /* Type-based aliasing analysis thinks vector long - and vector long long are different and will put them - in distinct alias classes. Force our address type - to be a may-alias type to avoid this. */ - arg0_type - = build_pointer_type_for_mode (arg0_type, Pmode, - true/*can_alias_all*/); - else - arg0_type = build_pointer_type (arg0_type); - aligned = build1 (NOP_EXPR, arg0_type, aligned); - tree stg = build_indirect_ref (loc, aligned, RO_NULL); - tree retval = build2 (MODIFY_EXPR, TREE_TYPE (stg), stg, - convert (TREE_TYPE (stg), arg0)); - return retval; - } - } - } - for (n = 0; !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs; fnargs = TREE_CHAIN (fnargs), n++) { tree decl_type = TREE_VALUE (fnargs); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1978634..ef41534 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16155,10 +16155,29 @@ rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, #else return NULL_TREE; #endif } +/* Helper function to sort out which built-ins may be valid without having + a LHS. */ +bool +rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code) +{ + switch (fn_code) + { + case ALTIVEC_BUILTIN_STVX_V16QI: + case ALTIVEC_BUILTIN_STVX_V8HI: + case ALTIVEC_BUILTIN_STVX_V4SI: + case ALTIVEC_BUILTIN_STVX_V4SF: + case ALTIVEC_BUILTIN_STVX_V2DI: + case ALTIVEC_BUILTIN_STVX_V2DF: + return true; + default: + return false; + } +} + /* Fold a machine-dependent built-in in GIMPLE. (For folding into a constant, use rs6000_fold_builtin.) */ bool rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) @@ -16182,12 +16201,13 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) fn_code, fn_name1, fn_name2); if (!rs6000_fold_gimple) return false; - /* Generic solution to prevent gimple folding of code without a LHS. */ - if (!gimple_call_lhs (stmt)) + /* Prevent gimple folding for code that does not have a LHS, unless it is + allowed per the rs6000_builtin_valid_without_lhs helper function. */ + if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code)) return false; switch (fn_code) { /* Flavors of vec_add. We deliberately don't expand @@ -16585,11 +16605,48 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) build_int_cst (arg1_type, 0))); gimple_set_location (g, loc); gsi_replace (gsi, g, true); return true; } - + /* Vector stores. */ + case ALTIVEC_BUILTIN_STVX_V16QI: + case ALTIVEC_BUILTIN_STVX_V8HI: + case ALTIVEC_BUILTIN_STVX_V4SI: + case ALTIVEC_BUILTIN_STVX_V4SF: + case ALTIVEC_BUILTIN_STVX_V2DI: + case ALTIVEC_BUILTIN_STVX_V2DF: + { + /* Do not fold for -maltivec=be on LE targets. */ + if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN) + return false; + arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ + arg1 = gimple_call_arg (stmt, 1); /* Offset. */ + tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ + location_t loc = gimple_location (stmt); + tree arg0_type = TREE_TYPE (arg0); + tree arg2_type = TREE_TYPE (arg2); + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create + the tree using the value from arg0. The resulting type will match + the type of arg2. */ + gimple_seq stmts = NULL; + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, + arg2_type, arg2, temp_offset); + /* Mask off any lower bits from the address. */ + tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR, + arg2_type, temp_addr, + build_int_cst (arg2_type, -16)); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + /* The desired gimple result should be similar to: + MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */ + gimple *g; + g = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr, + build_int_cst (arg2_type, 0)), arg0); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } default: if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", fn_code, fn_name1, fn_name2); break;