Hi, [PATCH, rs6000] [v2] Folding of vector loads in GIMPLE Folding of vector loads in GIMPLE. Add code to handle gimple folding for the vec_ld builtins. Remove the now obsoleted folding code for vec_ld from rs6000-c.c. Surrounding comments have been adjusted slightly so they continue to read OK for the existing vec_st code. The resulting code is specifically verified by the powerpc/fold-vec-ld-*.c tests which have been posted separately.
For V2 of this patch, I've removed the chunk of code that prohibited the gimple fold from occurring in BE environments. This had fixed an issue for me earlier during my development of the code, and turns out this was not necessary. I've sniff-tested after removing that check and it looks OK. >+ /* Limit folding of loads to LE targets. */ > + if (BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG) > + return false; I've restarted a regression test on this updated version. OK for trunk (assuming successful regression test completion) ? Thanks, -Will [gcc] 2017-09-12 Will Schmidt <will_schm...@vnet.ibm.com> * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*). * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD. diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index fbab0a2..bb8a77d 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -6470,92 +6470,19 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, convert (TREE_TYPE (stmt), arg0)); stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); return stmt; } - /* Expand vec_ld into an expression that masks the address and - performs the load. We need to expand this early to allow + /* Expand vec_st into an expression that masks the address and + performs the store. We need to expand this early to allow the best aliasing, as by the time we get into RTL we no longer are able to honor __restrict__, for example. We may want to consider this for all memory access built-ins. When -maltivec=be is specified, or the wrong number of arguments is provided, simply punt to existing built-in processing. */ - if (fcode == ALTIVEC_BUILTIN_VEC_LD - && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) - && nargs == 2) - { - tree arg0 = (*arglist)[0]; - tree arg1 = (*arglist)[1]; - - /* Strip qualifiers like "const" from the pointer arg. */ - tree arg1_type = TREE_TYPE (arg1); - if (!POINTER_TYPE_P (arg1_type) && TREE_CODE (arg1_type) != ARRAY_TYPE) - goto bad; - - tree inner_type = TREE_TYPE (arg1_type); - if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0) - { - arg1_type = build_pointer_type (build_qualified_type (inner_type, - 0)); - arg1 = fold_convert (arg1_type, arg1); - } - - /* Construct the masked address. Let existing error handling take - over if we don't have a constant offset. */ - arg0 = fold (arg0); - - if (TREE_CODE (arg0) == INTEGER_CST) - { - if (!ptrofftype_p (TREE_TYPE (arg0))) - arg0 = build1 (NOP_EXPR, sizetype, arg0); - - tree arg1_type = TREE_TYPE (arg1); - if (TREE_CODE (arg1_type) == ARRAY_TYPE) - { - arg1_type = TYPE_POINTER_TO (TREE_TYPE (arg1_type)); - tree const0 = build_int_cstu (sizetype, 0); - tree arg1_elt0 = build_array_ref (loc, arg1, const0); - arg1 = build1 (ADDR_EXPR, arg1_type, arg1_elt0); - } - - tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type, - arg1, arg0); - tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr, - build_int_cst (arg1_type, -16)); - - /* Find the built-in to get the return type so we can convert - the result properly (or fall back to default handling if the - arguments aren't compatible). */ - for (desc = altivec_overloaded_builtins; - desc->code && desc->code != fcode; desc++) - continue; - - for (; desc->code == fcode; desc++) - if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1) - && (rs6000_builtin_type_compatible (TREE_TYPE (arg1), - desc->op2))) - { - tree ret_type = rs6000_builtin_type (desc->ret_type); - if (TYPE_MODE (ret_type) == V2DImode) - /* Type-based aliasing analysis thinks vector long - and vector long long are different and will put them - in distinct alias classes. Force our return type - to be a may-alias type to avoid this. */ - ret_type - = build_pointer_type_for_mode (ret_type, Pmode, - true/*can_alias_all*/); - else - ret_type = build_pointer_type (ret_type); - aligned = build1 (NOP_EXPR, ret_type, aligned); - tree ret_val = build_indirect_ref (loc, aligned, RO_NULL); - return ret_val; - } - } - } - /* Similarly for stvx. */ if (fcode == ALTIVEC_BUILTIN_VEC_ST && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) && nargs == 3) { tree arg0 = (*arglist)[0]; diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1338371..1fb5f44 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16547,10 +16547,61 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res); gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); update_call_from_tree (gsi, res); return true; } + /* Vector loads. */ + case ALTIVEC_BUILTIN_LVX_V16QI: + case ALTIVEC_BUILTIN_LVX_V8HI: + case ALTIVEC_BUILTIN_LVX_V4SI: + case ALTIVEC_BUILTIN_LVX_V4SF: + case ALTIVEC_BUILTIN_LVX_V2DI: + case ALTIVEC_BUILTIN_LVX_V2DF: + { + gimple *g; + arg0 = gimple_call_arg (stmt, 0); // offset + arg1 = gimple_call_arg (stmt, 1); // address + + lhs = gimple_call_lhs (stmt); + location_t loc = gimple_location (stmt); + + tree arg1_type = TREE_TYPE (arg1); + tree lhs_type = TREE_TYPE (lhs); + + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create + the tree using the value from arg0. The resulting type will match + the type of arg1. */ + tree temp_offset = create_tmp_reg_or_ssa_name (sizetype); + g = gimple_build_assign (temp_offset, NOP_EXPR, arg0); + gimple_set_location (g, loc); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + tree temp_addr = create_tmp_reg_or_ssa_name (arg1_type); + g = gimple_build_assign (temp_addr, POINTER_PLUS_EXPR, arg1, + temp_offset); + gimple_set_location (g, loc); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + + /* Mask off any lower bits from the address. */ + tree alignment_mask = build_int_cst (arg1_type, -16); + tree aligned_addr = create_tmp_reg_or_ssa_name (arg1_type); + g = gimple_build_assign (aligned_addr, BIT_AND_EXPR, + temp_addr, alignment_mask); + gimple_set_location (g, loc); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + + /* Use the build2 helper to set up the mem_ref. The MEM_REF could also + take an offset, but since we've already incorporated the offset + above, here we just pass in a zero. */ + g = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr, + build_int_cst (arg1_type, 0))); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + + return true; + + } + default: if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", fn_code, fn_name1, fn_name2); break;