> +/* Returns true if the vector load/store is unaligned and if > + unaligned vector load/stores are slow. */
document STMT. > > +static bool > +is_slow_vect_unaligned_load_store (gimple stmt) > +{ > + stmt_vec_info stmt_info; > + struct data_reference *dr = NULL; > + > + /* Are unaligned load/stores slow for this target? */ > + if (!targetm.slow_unaligned_vector_memop > + || !targetm.slow_unaligned_vector_memop ()) > + return false; > + > + /* Harmful only if it is in a hot region of code when profiles are > + available. */ > + if (profile_status == PROFILE_READ > + && !maybe_hot_bb_p (gimple_bb (stmt))) > + return false; Is this check necessary? > + > + stmt_info = vinfo_for_stmt (stmt); > + if (!stmt_info) > + return false; > + > + /* Check if access is aligned?. */ > + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) > + { > + gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info); > + if (first_stmt > + && vinfo_for_stmt (first_stmt)) > + dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); > + } > + else > + { > + dr = STMT_VINFO_DATA_REF (stmt_info); > + } Remove {} > + > + if (!dr) > + return false; > + > + if (!aligned_access_p (dr)) > + { > + return true; > + } Remove {} > + > + return false; > +} > + > /* Make sure the statement is vectorizable. */ > > bool > @@ -5065,27 +5112,43 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vect > if (!bb_vinfo > && (STMT_VINFO_RELEVANT_P (stmt_info) > || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) > + { > ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL) > || vectorizable_type_demotion (stmt, NULL, NULL, NULL) > || vectorizable_conversion (stmt, NULL, NULL, NULL) > || vectorizable_shift (stmt, NULL, NULL, NULL) > || vectorizable_operation (stmt, NULL, NULL, NULL) > || vectorizable_assignment (stmt, NULL, NULL, NULL) > - || vectorizable_load (stmt, NULL, NULL, NULL, NULL) > || vectorizable_call (stmt, NULL, NULL) > - || vectorizable_store (stmt, NULL, NULL, NULL) > - || vectorizable_reduction (stmt, NULL, NULL, NULL) > + || vectorizable_reduction (stmt, NULL, NULL, NULL) > || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); > + > + if (!ok) > + { > + ok = (vectorizable_load (stmt, NULL, NULL, NULL, NULL) > + || vectorizable_store (stmt, NULL, NULL, NULL)); > + > + if (ok && is_slow_vect_unaligned_load_store (stmt)) > + ok = false; > + } > + } > else > { > if (bb_vinfo) > - ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) > - || vectorizable_type_demotion (stmt, NULL, NULL, node) > - || vectorizable_shift (stmt, NULL, NULL, node) > - || vectorizable_operation (stmt, NULL, NULL, node) > - || vectorizable_assignment (stmt, NULL, NULL, node) > - || vectorizable_load (stmt, NULL, NULL, node, NULL) > - || vectorizable_store (stmt, NULL, NULL, node)); > + { > + ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) > + || vectorizable_type_demotion (stmt, NULL, NULL, node) > + || vectorizable_shift (stmt, NULL, NULL, node) > + || vectorizable_operation (stmt, NULL, NULL, node) > + || vectorizable_assignment (stmt, NULL, NULL, node)); > + if (!ok) > + { > + ok = (vectorizable_load (stmt, NULL, NULL, node, NULL) > + || vectorizable_store (stmt, NULL, NULL, node)); > + if (ok && is_slow_vect_unaligned_load_store (stmt)) > + ok = false; > + } > + } > } > Same question as Ira has asked -- why not doing the check in vectorizable_load|store ? David > if (!ok) > Index: config/i386/i386.c > =================================================================== > --- config/i386/i386.c (revision 182265) > +++ config/i386/i386.c (working copy) > @@ -26464,6 +26464,24 @@ ix86_init_mmx_sse_builtins (void) > } > } > > +/* Detect if this unaligned vectorizable load/stores should be > + considered slow. This is true for core2 where the movdqu insn > + is slow, ~5x slower than the movdqa. */ > + > +static bool > +ix86_slow_unaligned_vector_memop (void) > +{ > + /* This is known to be slow on core2. */ > + if (ix86_tune == PROCESSOR_CORE2_64 > + || ix86_tune == PROCESSOR_CORE2_32) > + return true; > + > + return false; > +} > + > /* Internal method for ix86_init_builtins. */ > > static void > @@ -36624,6 +36642,9 @@ ix86_loop_unroll_adjust (unsigned nunroll, struct > #undef TARGET_BUILD_BUILTIN_VA_LIST > #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list > > +#undef TARGET_SLOW_UNALIGNED_VECTOR_MEMOP > +#define TARGET_SLOW_UNALIGNED_VECTOR_MEMOP ix86_slow_unaligned_vector_memop > + > #undef TARGET_ENUM_VA_LIST_P > #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list > > > -- > This patch is available for review at http://codereview.appspot.com/5488054