On Tue, Nov 12, 2013 at 2:26 AM, Jakub Jelinek <ja...@redhat.com> wrote: > On Tue, Nov 12, 2013 at 11:05:45AM +0100, Jan Hubicka wrote: >> > @@ -16576,7 +16576,7 @@ ix86_avx256_split_vector_move_misalign (rtx >> > op0, rtx op1) >> > >> > if (MEM_P (op1)) >> > { >> > - if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD) >> > + if (!TARGET_AVX2 && TARGET_AVX256_SPLIT_UNALIGNED_LOAD) >> > { >> > rtx r = gen_reg_rtx (mode); >> > m = adjust_address (op1, mode, 0); >> > @@ -16596,7 +16596,7 @@ ix86_avx256_split_vector_move_misalign (rtx >> > op0, rtx op1) >> > } >> > else if (MEM_P (op0)) >> > { >> > - if (TARGET_AVX256_SPLIT_UNALIGNED_STORE) >> > + if (!TARGET_AVX2 && TARGET_AVX256_SPLIT_UNALIGNED_STORE) >> >> I would add explanation comment on those two. > > Looking at http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01235.html > we are going to have some AMD CPU with AVX2 support soon, the question is > if it will prefer 256-bit vmovups/vmovupd/vmovdqu or split, but even > if it will prefer split, the question is if like bdver{1,2,3} it will > be X86_TUNE_AVX128_OPTIMAL, because if yes, then how 256-bit unaligned > loads/stores are handled is much less important there. Ganesh?
I left those two out. This is what I checked in. H.J. --- Index: ChangeLog =================================================================== --- ChangeLog (revision 204699) +++ ChangeLog (working copy) @@ -1,3 +1,16 @@ +2013-11-12 H.J. Lu <hongjiu...@intel.com> + + PR target/59084 + * config/i386/i386.c (ix86_option_override_internal): Check + X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL and + X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL for + MASK_AVX256_SPLIT_UNALIGNED_LOAD and + MASK_AVX256_SPLIT_UNALIGNED_STORE. + + * config/i386/x86-tune.def (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL): + Clear m_COREI7_AVX and update comments. + (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL): Likewise. + 2013-11-12 Martin Jambor <mjam...@suse.cz> PR rtl-optimization/10474 Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 204699) +++ config/i386/i386.c (working copy) @@ -3974,10 +3974,10 @@ ix86_option_override_internal (bool main if (flag_expensive_optimizations && !(opts_set->x_target_flags & MASK_VZEROUPPER)) opts->x_target_flags |= MASK_VZEROUPPER; - if (!ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL] + if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; - if (!ix86_tune_features[X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL] + if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; /* Enable 128-bit AVX instruction generation Index: config/i386/x86-tune.def =================================================================== --- config/i386/x86-tune.def (revision 204699) +++ config/i386/x86-tune.def (working copy) @@ -376,15 +376,15 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ /*****************************************************************************/ -/* X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL: if true, unaligned loads are +/* X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL: if false, unaligned loads are split. */ DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL, "256_unaligned_load_optimal", - ~(m_COREI7 | m_GENERIC)) + ~(m_COREI7 | m_COREI7_AVX | m_GENERIC)) -/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if true, unaligned loads are +/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if false, unaligned stores are split. */ -DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_load_optimal", - ~(m_COREI7 | m_BDVER | m_GENERIC)) +DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_store_optimal", + ~(m_COREI7 | m_COREI7_AVX | m_BDVER | m_GENERIC)) /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for the auto-vectorizer. */ -- H.J.