> On Jul 10, 2018, at 2:10 AM, Richard Biener <richard.guent...@gmail.com> > wrote: > > On Mon, Jul 9, 2018 at 9:08 PM Will Schmidt <will_schm...@vnet.ibm.com> wrote: >> >> Hi, >> Re-posting. Richard provided feedback on a previous version of this >> patch, I wanted to make sure he was/is OK with the latest. :-) >> >> Add support for Gimple folding for unaligned vector loads and stores. >> >> Regtest completed across variety of systems, P6,P7,P8,P9. >> >> [v2] Added the type for the MEM_REF, per feedback. >> Testcases for gimple-folding of the same are currently in-tree >> as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c. >> Re-tested, still looks good. :-) >> >> [v3] Updated the alignment for the MEM_REF to be 4bytes. >> Updated/added/removed comments in the code for clarity. >> >> OK for trunk? >> >> Thanks >> -Will >> >> [gcc] >> >> 2018-07-09 Will Schmidt <will_schm...@vnet.ibm.com> >> >> * config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add >> vec_xst variants to the list. >> (rs6000_gimple_fold_builtin): Add support for folding unaligned >> vector loads and stores. >> >> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c >> index 8bc4109..774c60a 100644 >> --- a/gcc/config/rs6000/rs6000.c >> +++ b/gcc/config/rs6000/rs6000.c >> @@ -15401,10 +15401,16 @@ rs6000_builtin_valid_without_lhs (enum >> rs6000_builtins fn_code) >> case ALTIVEC_BUILTIN_STVX_V8HI: >> case ALTIVEC_BUILTIN_STVX_V4SI: >> case ALTIVEC_BUILTIN_STVX_V4SF: >> case ALTIVEC_BUILTIN_STVX_V2DI: >> case ALTIVEC_BUILTIN_STVX_V2DF: >> + case VSX_BUILTIN_STXVW4X_V16QI: >> + case VSX_BUILTIN_STXVW4X_V8HI: >> + case VSX_BUILTIN_STXVW4X_V4SF: >> + case VSX_BUILTIN_STXVW4X_V4SI: >> + case VSX_BUILTIN_STXVD2X_V2DF: >> + case VSX_BUILTIN_STXVD2X_V2DI: >> return true; >> default: >> return false; >> } >> } >> @@ -15910,10 +15916,79 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator >> *gsi) >> gimple_set_location (g, loc); >> gsi_replace (gsi, g, true); >> return true; >> } >> >> + /* unaligned Vector loads. */ >> + case VSX_BUILTIN_LXVW4X_V16QI: >> + case VSX_BUILTIN_LXVW4X_V8HI: >> + case VSX_BUILTIN_LXVW4X_V4SF: >> + case VSX_BUILTIN_LXVW4X_V4SI: >> + case VSX_BUILTIN_LXVD2X_V2DF: >> + case VSX_BUILTIN_LXVD2X_V2DI: >> + { >> + arg0 = gimple_call_arg (stmt, 0); // offset >> + arg1 = gimple_call_arg (stmt, 1); // address >> + lhs = gimple_call_lhs (stmt); >> + location_t loc = gimple_location (stmt); >> + /* Since arg1 may be cast to a different type, just use >> ptr_type_node >> + here instead of trying to enforce TBAA on pointer types. */ >> + tree arg1_type = ptr_type_node; >> + tree lhs_type = TREE_TYPE (lhs); >> + /* in GIMPLE the type of the MEM_REF specifies the alignment. The >> + required alignment (power) is 4 bytes regardless of data type. */ >> + tree align_ltype = build_aligned_type (lhs_type, 4); >> + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. >> Create >> + the tree using the value from arg0. The resulting type will >> match >> + the type of arg1. */ >> + gimple_seq stmts = NULL; >> + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0); >> + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, >> + arg1_type, arg1, temp_offset); >> + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); >> + /* Use the build2 helper to set up the mem_ref. The MEM_REF could >> also >> + take an offset, but since we've already incorporated the offset >> + above, here we just pass in a zero. */ >> + gimple *g; >> + g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, >> temp_addr, >> + build_int_cst (arg1_type, >> 0))); >> + gimple_set_location (g, loc); >> + gsi_replace (gsi, g, true); >> + return true; >> + } >> + >> + /* unaligned Vector stores. */ >> + case VSX_BUILTIN_STXVW4X_V16QI: >> + case VSX_BUILTIN_STXVW4X_V8HI: >> + case VSX_BUILTIN_STXVW4X_V4SF: >> + case VSX_BUILTIN_STXVW4X_V4SI: >> + case VSX_BUILTIN_STXVD2X_V2DF: >> + case VSX_BUILTIN_STXVD2X_V2DI: >> + { >> + arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ >> + arg1 = gimple_call_arg (stmt, 1); /* Offset. */ >> + tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ >> + location_t loc = gimple_location (stmt); >> + tree arg0_type = TREE_TYPE (arg0); >> + /* Use ptr_type_node (no TBAA) for the arg2_type. */ >> + tree arg2_type = ptr_type_node; >> + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. >> Create >> + the tree using the value from arg0. The resulting type will >> match >> + the type of arg2. */ >> + gimple_seq stmts = NULL; >> + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); >> + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, >> + arg2_type, arg2, temp_offset); >> + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); >> + gimple *g; >> + g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr, >> + build_int_cst (arg2_type, 0)), >> arg0); > > Here you are using the alignment of a "value", that is, you originally > have sth like > > __builtin_store (val_1, offset, ptr); > > where val_1 is an SSA name of say, 'vector<int>' type (or vector<int> > __attribute__((aligned(1))) type!). > > I wonder if, given you don't play similar games as with the load the > HW requirement > is sth like vector element alignment?
Right, same alignment rules for stores as for loads. Minimum alignment is word alignment for all data types. Bill > > GIMPLE generally doesn't care about the alignment of registers (why > should it) but you > can't be sure they do not "leak" there. > > Richard. > >> + gimple_set_location (g, loc); >> + gsi_replace (gsi, g, true); >> + return true; >> + } >> + >> /* Vector Fused multiply-add (fma). */ >> case ALTIVEC_BUILTIN_VMADDFP: >> case VSX_BUILTIN_XVMADDDP: >> case ALTIVEC_BUILTIN_VMLADDUHM: >> {