On Thu, 14 Apr 2016, Bill Schmidt wrote: > On Thu, 2016-04-14 at 13:22 +0200, Richard Biener wrote: > > The following fixes PR70130 - improved SLP capabilities now run into > > the realignment code on ppc which doesn't properly verify that all > > vector loads emitted by vectorizable_load share the same alignment. > > > > Bootstrap / regtest pending on x86_64-unknown-linux-gnu. > > > > Bootstrapped / tested on ppc64le by Alan. > > > > Richard. > > > > 2016-04-14 Richard Biener <rguent...@suse.de> > > Alan Modra <amo...@gmail.com> > > > > PR tree-optimization/70130 > > * tree-vect-data-refs.c (vect_supportable_dr_alignment): Detect > > when alignment stays not the same and no not use the realign > > scheme then. > > > > * gcc.dg/vect/O3-pr70130.c: New testcase. > > > > Index: gcc/tree-vect-data-refs.c > > =================================================================== > > *** gcc/tree-vect-data-refs.c (revision 234970) > > --- gcc/tree-vect-data-refs.c (working copy) > > *************** vect_supportable_dr_alignment (struct da > > *** 5983,5992 **** > > || targetm.vectorize.builtin_mask_for_load ())) > > { > > tree vectype = STMT_VINFO_VECTYPE (stmt_info); > > ! if ((nested_in_vect_loop > > ! && (TREE_INT_CST_LOW (DR_STEP (dr)) > > ! != GET_MODE_SIZE (TYPE_MODE (vectype)))) > > ! || !loop_vinfo) > > return dr_explicit_realign; > > else > > return dr_explicit_realign_optimized; > > --- 5983,6001 ---- > > || targetm.vectorize.builtin_mask_for_load ())) > > { > > tree vectype = STMT_VINFO_VECTYPE (stmt_info); > > ! > > ! /* If we are doing SLP then the accesses need not have the > > ! same alignment, instead it depends on the SLP group size. */ > > ! if (loop_vinfo > > ! && STMT_SLP_TYPE (stmt_info) > > ! && (LOOP_VINFO_VECT_FACTOR (loop_vinfo) > > ! * GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT > > (stmt_info)))) > > ! % TYPE_VECTOR_SUBPARTS (vectype) != 0) > > Parentheses here look wrong. Should be one fewer ending paren on the "* > GROUP_SIZE" line and one more on the following line, right?
Yeah, though it shouldn't matter in practice. I'll fix things up before committing. Richard. > Bill > > > ! ; > > ! else if (!loop_vinfo > > ! || (nested_in_vect_loop > > ! && (TREE_INT_CST_LOW (DR_STEP (dr)) > > ! != GET_MODE_SIZE (TYPE_MODE (vectype))))) > > return dr_explicit_realign; > > else > > return dr_explicit_realign_optimized; > > Index: gcc/testsuite/gcc.dg/vect/O3-pr70130.c > > =================================================================== > > *** gcc/testsuite/gcc.dg/vect/O3-pr70130.c (revision 0) > > --- gcc/testsuite/gcc.dg/vect/O3-pr70130.c (working copy) > > *************** > > *** 0 **** > > --- 1,94 ---- > > + /* { dg-do run } */ > > + /* { dg-require-effective-target vsx_hw { target powerpc*-*-* } } */ > > + /* { dg-additional-options "-mcpu=power7" { target powerpc*-*-* } } */ > > + > > + struct foo > > + { > > + short a[3][16][16]; > > + short pad; > > + } images[8]; > > + > > + void __attribute__ ((noinline, noclone)) > > + Loop_err (struct foo *img, const int s[16][2], int s0) > > + { > > + int i, j; > > + > > + for (j = 0; j < 16; j++) > > + { > > + for (i=0; i < 16; i++) > > + { > > + img->a[0][j][i] = s[i][0]; > > + img->a[1][j][i] = s[j][1]; > > + img->a[2][j][i] = s0; > > + } > > + } > > + } > > + > > + const int s[16][2] = { { 1, 16 }, { 2, 15 }, { 3, 14 }, { 4, 13 }, > > + { 5, 12 }, { 6, 11 }, { 7, 10 }, { 8, 9 }, > > + { 9, 8 }, { 10, 7 }, { 11, 6 }, { 12, 5 }, > > + { 13, 4 }, { 14, 3 }, { 15, 2 }, { 16, 1 } }; > > + const struct foo expected > > + = { { { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, > > + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 } }, > > + { { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 > > }, > > + { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 }, > > + { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 }, > > + { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 }, > > + { 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 }, > > + { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }, > > + { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 }, > > + { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9 }, > > + { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, > > + { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }, > > + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 }, > > + { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, > > + { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, > > + { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, > > + { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, > > + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } }, > > + { { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 > > }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, > > + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } } }, > > + 0 }; > > + > > + int > > + main (void) > > + { > > + int i; > > + > > + for (i = 0; i < 8; i++) > > + Loop_err (images + i, s, -1); > > + > > + for (i = 0; i < 8; i++) > > + if (__builtin_memcmp (&expected, images + i, sizeof (expected))) > > + __builtin_abort (); > > + return 0; > > + } > > > > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)