> On Sep 12, 2017, at 4:08 PM, Will Schmidt <will_schm...@vnet.ibm.com> wrote: > > Hi, > > [PATCH, rs6000] [v2] Folding of vector loads in GIMPLE > > Folding of vector loads in GIMPLE. > > Add code to handle gimple folding for the vec_ld builtins. > Remove the now obsoleted folding code for vec_ld from rs6000-c.c. Surrounding > comments have been adjusted slightly so they continue to read OK for the > existing vec_st code. > > The resulting code is specifically verified by the powerpc/fold-vec-ld-*.c > tests which have been posted separately. > > For V2 of this patch, I've removed the chunk of code that prohibited the > gimple fold from occurring in BE environments. This had fixed an issue > for me earlier during my development of the code, and turns out this was > not necessary. I've sniff-tested after removing that check and it looks > OK.
Thanks! > >> + /* Limit folding of loads to LE targets. */ >> + if (BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG) >> + return false; > > I've restarted a regression test on this updated version. > > OK for trunk (assuming successful regression test completion) ? Looks good to me otherwise, but Richard may have streamlining improvements, so please wait for his review. And of course Segher's. Thanks, Bill > > Thanks, > -Will > > [gcc] > > 2017-09-12 Will Schmidt <will_schm...@vnet.ibm.com> > > * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling > for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*). > * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): > Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD. > > diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c > index fbab0a2..bb8a77d 100644 > --- a/gcc/config/rs6000/rs6000-c.c > +++ b/gcc/config/rs6000/rs6000-c.c > @@ -6470,92 +6470,19 @@ altivec_resolve_overloaded_builtin (location_t loc, > tree fndecl, > convert (TREE_TYPE (stmt), arg0)); > stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); > return stmt; > } > > - /* Expand vec_ld into an expression that masks the address and > - performs the load. We need to expand this early to allow > + /* Expand vec_st into an expression that masks the address and > + performs the store. We need to expand this early to allow > the best aliasing, as by the time we get into RTL we no longer > are able to honor __restrict__, for example. We may want to > consider this for all memory access built-ins. > > When -maltivec=be is specified, or the wrong number of arguments > is provided, simply punt to existing built-in processing. */ > - if (fcode == ALTIVEC_BUILTIN_VEC_LD > - && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) > - && nargs == 2) > - { > - tree arg0 = (*arglist)[0]; > - tree arg1 = (*arglist)[1]; > - > - /* Strip qualifiers like "const" from the pointer arg. */ > - tree arg1_type = TREE_TYPE (arg1); > - if (!POINTER_TYPE_P (arg1_type) && TREE_CODE (arg1_type) != ARRAY_TYPE) > - goto bad; > - > - tree inner_type = TREE_TYPE (arg1_type); > - if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0) > - { > - arg1_type = build_pointer_type (build_qualified_type (inner_type, > - 0)); > - arg1 = fold_convert (arg1_type, arg1); > - } > - > - /* Construct the masked address. Let existing error handling take > - over if we don't have a constant offset. */ > - arg0 = fold (arg0); > - > - if (TREE_CODE (arg0) == INTEGER_CST) > - { > - if (!ptrofftype_p (TREE_TYPE (arg0))) > - arg0 = build1 (NOP_EXPR, sizetype, arg0); > - > - tree arg1_type = TREE_TYPE (arg1); > - if (TREE_CODE (arg1_type) == ARRAY_TYPE) > - { > - arg1_type = TYPE_POINTER_TO (TREE_TYPE (arg1_type)); > - tree const0 = build_int_cstu (sizetype, 0); > - tree arg1_elt0 = build_array_ref (loc, arg1, const0); > - arg1 = build1 (ADDR_EXPR, arg1_type, arg1_elt0); > - } > - > - tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type, > - arg1, arg0); > - tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr, > - build_int_cst (arg1_type, -16)); > - > - /* Find the built-in to get the return type so we can convert > - the result properly (or fall back to default handling if the > - arguments aren't compatible). */ > - for (desc = altivec_overloaded_builtins; > - desc->code && desc->code != fcode; desc++) > - continue; > - > - for (; desc->code == fcode; desc++) > - if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1) > - && (rs6000_builtin_type_compatible (TREE_TYPE (arg1), > - desc->op2))) > - { > - tree ret_type = rs6000_builtin_type (desc->ret_type); > - if (TYPE_MODE (ret_type) == V2DImode) > - /* Type-based aliasing analysis thinks vector long > - and vector long long are different and will put them > - in distinct alias classes. Force our return type > - to be a may-alias type to avoid this. */ > - ret_type > - = build_pointer_type_for_mode (ret_type, Pmode, > - true/*can_alias_all*/); > - else > - ret_type = build_pointer_type (ret_type); > - aligned = build1 (NOP_EXPR, ret_type, aligned); > - tree ret_val = build_indirect_ref (loc, aligned, RO_NULL); > - return ret_val; > - } > - } > - } > > - /* Similarly for stvx. */ > if (fcode == ALTIVEC_BUILTIN_VEC_ST > && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) > && nargs == 3) > { > tree arg0 = (*arglist)[0]; > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 1338371..1fb5f44 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -16547,10 +16547,61 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator > *gsi) > res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res); > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > update_call_from_tree (gsi, res); > return true; > } > + /* Vector loads. */ > + case ALTIVEC_BUILTIN_LVX_V16QI: > + case ALTIVEC_BUILTIN_LVX_V8HI: > + case ALTIVEC_BUILTIN_LVX_V4SI: > + case ALTIVEC_BUILTIN_LVX_V4SF: > + case ALTIVEC_BUILTIN_LVX_V2DI: > + case ALTIVEC_BUILTIN_LVX_V2DF: > + { > + gimple *g; > + arg0 = gimple_call_arg (stmt, 0); // offset > + arg1 = gimple_call_arg (stmt, 1); // address > + > + lhs = gimple_call_lhs (stmt); > + location_t loc = gimple_location (stmt); > + > + tree arg1_type = TREE_TYPE (arg1); > + tree lhs_type = TREE_TYPE (lhs); > + > + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create > + the tree using the value from arg0. The resulting type will match > + the type of arg1. */ > + tree temp_offset = create_tmp_reg_or_ssa_name (sizetype); > + g = gimple_build_assign (temp_offset, NOP_EXPR, arg0); > + gimple_set_location (g, loc); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + tree temp_addr = create_tmp_reg_or_ssa_name (arg1_type); > + g = gimple_build_assign (temp_addr, POINTER_PLUS_EXPR, arg1, > + temp_offset); > + gimple_set_location (g, loc); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + > + /* Mask off any lower bits from the address. */ > + tree alignment_mask = build_int_cst (arg1_type, -16); > + tree aligned_addr = create_tmp_reg_or_ssa_name (arg1_type); > + g = gimple_build_assign (aligned_addr, BIT_AND_EXPR, > + temp_addr, alignment_mask); > + gimple_set_location (g, loc); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + > + /* Use the build2 helper to set up the mem_ref. The MEM_REF could also > + take an offset, but since we've already incorporated the offset > + above, here we just pass in a zero. */ > + g = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr, > + build_int_cst (arg1_type, 0))); > + gimple_set_location (g, loc); > + gsi_replace (gsi, g, true); > + > + return true; > + > + } > + > default: > if (TARGET_DEBUG_BUILTIN) > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", > fn_code, fn_name1, fn_name2); > break; > >