On Thu, 2016-04-14 at 13:22 +0200, Richard Biener wrote:
> The following fixes PR70130 - improved SLP capabilities now run into
> the realignment code on ppc which doesn't properly verify that all
> vector loads emitted by vectorizable_load share the same alignment.
> 
> Bootstrap / regtest pending on x86_64-unknown-linux-gnu.
> 
> Bootstrapped / tested on ppc64le by Alan.
> 
> Richard.
> 
> 2016-04-14  Richard Biener  <rguent...@suse.de>
>       Alan Modra  <amo...@gmail.com>
> 
>       PR tree-optimization/70130
>       * tree-vect-data-refs.c (vect_supportable_dr_alignment): Detect
>       when alignment stays not the same and no not use the realign
>       scheme then.
> 
>       * gcc.dg/vect/O3-pr70130.c: New testcase.
> 
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> *** gcc/tree-vect-data-refs.c (revision 234970)
> --- gcc/tree-vect-data-refs.c (working copy)
> *************** vect_supportable_dr_alignment (struct da
> *** 5983,5992 ****
>             || targetm.vectorize.builtin_mask_for_load ()))
>       {
>         tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> !       if ((nested_in_vect_loop
> !            && (TREE_INT_CST_LOW (DR_STEP (dr))
> !                != GET_MODE_SIZE (TYPE_MODE (vectype))))
> !               || !loop_vinfo)
>           return dr_explicit_realign;
>         else
>           return dr_explicit_realign_optimized;
> --- 5983,6001 ----
>             || targetm.vectorize.builtin_mask_for_load ()))
>       {
>         tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> ! 
> !       /* If we are doing SLP then the accesses need not have the
> !          same alignment, instead it depends on the SLP group size.  */
> !       if (loop_vinfo
> !           && STMT_SLP_TYPE (stmt_info)
> !           && (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
> !               * GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT 
> (stmt_info))))
> !               % TYPE_VECTOR_SUBPARTS (vectype) != 0)

Parentheses here look wrong.  Should be one fewer ending paren on the "*
GROUP_SIZE" line and one more on the following line, right?

Bill

> !         ;
> !       else if (!loop_vinfo
> !                || (nested_in_vect_loop
> !                    && (TREE_INT_CST_LOW (DR_STEP (dr))
> !                        != GET_MODE_SIZE (TYPE_MODE (vectype)))))
>           return dr_explicit_realign;
>         else
>           return dr_explicit_realign_optimized;
> Index: gcc/testsuite/gcc.dg/vect/O3-pr70130.c
> ===================================================================
> *** gcc/testsuite/gcc.dg/vect/O3-pr70130.c    (revision 0)
> --- gcc/testsuite/gcc.dg/vect/O3-pr70130.c    (working copy)
> ***************
> *** 0 ****
> --- 1,94 ----
> + /* { dg-do run } */
> + /* { dg-require-effective-target vsx_hw { target powerpc*-*-* } } */
> + /* { dg-additional-options "-mcpu=power7" { target powerpc*-*-* } } */
> + 
> + struct foo
> + {
> +   short a[3][16][16];
> +   short pad;
> + } images[8];
> + 
> + void __attribute__ ((noinline, noclone))
> + Loop_err (struct foo *img, const int s[16][2], int s0)
> + {
> +   int i, j;
> + 
> +   for (j = 0; j < 16; j++)
> +     {
> +       for (i=0; i < 16; i++)
> +     {
> +       img->a[0][j][i] = s[i][0];
> +       img->a[1][j][i] = s[j][1];
> +       img->a[2][j][i] = s0;
> +     }
> +     }
> + }
> + 
> + const int s[16][2] = { { 1, 16 }, { 2, 15 }, { 3, 14 }, { 4, 13 },
> +                    { 5, 12 }, { 6, 11 }, { 7, 10 }, { 8, 9 },
> +                    { 9, 8 }, { 10, 7 }, { 11, 6 }, { 12, 5 },
> +                    { 13, 4 }, { 14, 3 }, { 15, 2 }, { 16, 1 } };
> + const struct foo expected
> + = { { { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> +     { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 } },
> +       { { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 },
> +     { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 },
> +     { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 },
> +     { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 },
> +     { 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 },
> +     { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 },
> +     { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 },
> +     { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9 },
> +     { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
> +     { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 },
> +     { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
> +     { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 },
> +     { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
> +     { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
> +     { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
> +     { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } },
> +       { { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> +     { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } } },
> +     0 };
> + 
> + int
> + main (void)
> + {
> +   int i;
> + 
> +   for (i = 0; i < 8; i++)
> +     Loop_err (images + i, s, -1);
> + 
> +   for (i = 0; i < 8; i++)
> +     if (__builtin_memcmp (&expected, images + i, sizeof (expected)))
> +       __builtin_abort ();
> +   return 0;
> + }
> 


Reply via email to