A new test case is added to testsuite/gcc.dg/vect, which will fail without this patch and pass with it. Bootstrap also get passed. No additional test failure is introduced.
The new test case includes a dot product on two arrays with short and int types. The loop will still be vectorized (using punpcklwd on array with short type), but should not be recognized as a dot-product pattern. thanks, Cong Index: gcc/tree-vect-patterns.c =================================================================== --- gcc/tree-vect-patterns.c (revision 202572) +++ gcc/tree-vect-patterns.c (working copy) @@ -397,7 +397,7 @@ vect_recog_dot_prod_pattern (vec<gimple> || !promotion) return NULL; oprnd00 = gimple_assign_rhs1 (def_stmt); - if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt, + if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt, &promotion) || !promotion) return NULL; Index: gcc/ChangeLog =================================================================== --- gcc/ChangeLog (revision 202572) +++ gcc/ChangeLog (working copy) @@ -1,3 +1,9 @@ +2013-09-13 Cong Hou <co...@google.com> + + * tree-vect-patterns.c (vect_recog_dot_prod_pattern): Fix a bug + when checking the dot production pattern. The type of rhs operand + of multiply is now checked correctly. + 2013-09-13 Jan Hubicka <j...@suse.cz> PR middle-end/58094 Index: gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16c.c =================================================================== --- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16c.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16c.c (revision 0) @@ -0,0 +1,73 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 64 +#define DOT 43680 + +signed short X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +signed int Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + +/* (short, int)->int->int dot product. + Not detected as a dot-product pattern. */ + +__attribute__ ((noinline)) int +foo (int len) +{ + int i; + int result = 0; + + for (i = 0; i < len; i++) + { + result += (X[i] * Y[i]); + } + return result; +} + + +/* (int, short)->int->int dot product. + Not detected as a dot-product pattern. */ + +__attribute__ ((noinline)) int +bar (int len) +{ + int i; + int result = 0; + + for (i = 0; i < len; i++) + { + result += (Y[i] * X[i]); + } + return result; +} + +int +main (void) +{ + int i; + int dot; + + check_vect (); + + for (i = 0; i < N; i++) + { + X[i] = i; + Y[i] = N - i; + __asm__ volatile (""); + } + + dot = foo (N); + if (dot != DOT) + abort (); + + dot = bar (N); + if (dot != DOT) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: gcc/testsuite/ChangeLog =================================================================== --- gcc/testsuite/ChangeLog (revision 202572) +++ gcc/testsuite/ChangeLog (working copy) @@ -1,3 +1,9 @@ +2013-09-13 Cong Hou <co...@google.com> + + * gcc.dg/vect/vect-reduc-dot-s16c.c: Add a test case with dot product + on two arrays with short and int types. This should not be recognized + as a dot product pattern. + 2013-09-13 Kai Tietz <kti...@redhat.com> gcc.target/i386/pr57848.c: New file. On Wed, Sep 11, 2013 at 6:55 PM, Xinliang David Li <davi...@google.com> wrote: > Can you add a test case to the regression suite? > > When the type of arguments are unsigned short/unsigned int, GCC does > not vectorize the loop anymore -- this is worth a separate bug to > track. punpcklwd instruction can be used to do zero extension of the > short type. > > David > > On Wed, Sep 11, 2013 at 6:16 PM, Cong Hou <co...@google.com> wrote: >> Hi >> >> There is a bug in the function vect_recog_dot_prod_pattern() in >> tree-vect-patterns.c. This function checks if a loop is of dot >> production pattern. Specifically, according to the comment of this >> function: >> >> /* >> Try to find the following pattern: >> >> type x_t, y_t; >> TYPE1 prod; >> TYPE2 sum = init; >> loop: >> sum_0 = phi <init, sum_1> >> S1 x_t = ... >> S2 y_t = ... >> S3 x_T = (TYPE1) x_t; >> S4 y_T = (TYPE1) y_t; >> S5 prod = x_T * y_T; >> [S6 prod = (TYPE2) prod; #optional] >> S7 sum_1 = prod + sum_0; >> >> where 'TYPE1' is exactly double the size of type 'type', and >> 'TYPE2' is the same size of 'TYPE1' or bigger. This is a special case >> of a reduction computation. >> */ >> >> This function should check if x_t and y_t have the same type (type) >> which has the half size of TYPE1. The corresponding code is shown >> below: >> >> oprnd0 = gimple_assign_rhs1 (stmt); >> oprnd1 = gimple_assign_rhs2 (stmt); >> if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) || >> !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) >> return NULL; >> if (!type_conversion_p (oprnd0, stmt, true, &half_type0, >> &def_stmt, &promotion) || !promotion) >> return NULL; >> oprnd00 = gimple_assign_rhs1 (def_stmt); >> >> /*======================V see here! */ >> if (!type_conversion_p (oprnd0, stmt, true, &half_type1, >> &def_stmt, &promotion) || !promotion) >> return NULL; >> oprnd01 = gimple_assign_rhs1 (def_stmt); >> if (!types_compatible_p (half_type0, half_type1)) >> return NULL; >> if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2) >> return NULL; >> >> Here the function uses x_T (oprnd0) to check the type of y_t, which is >> incorrect. The fix is simple: just replace it by oprnd1. >> >> The failed test case for this bug is shown below: >> >> int foo(short *a, int *b, int n) { >> int sum = 0; >> for (int i = 0; i < n; ++i) >> sum += a[i] * b[i]; >> return sum; >> } >> >> >> thanks, >> Cong >> >> >> Index: gcc/tree-vect-patterns.c >> =================================================================== >> --- gcc/tree-vect-patterns.c (revision 200988) >> +++ gcc/tree-vect-patterns.c (working copy) >> @@ -397,7 +397,7 @@ vect_recog_dot_prod_pattern (vec<gimple> >> || !promotion) >> return NULL; >> oprnd00 = gimple_assign_rhs1 (def_stmt); >> - if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt, >> + if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt, >> &promotion) >> || !promotion) >> return NULL;