> Am 13.12.2022 um 10:28 schrieb Jakub Jelinek via Gcc-patches 
> <gcc-patches@gcc.gnu.org>:
> 
> Hi!
> 
> Since vect_recog_rotate_pattern has been extended to work also
> on signed types in r13-1100 we miscompile the testcase below.
> vect_recog_rotate_pattern actually emits correct scalar code into
> the pattern def sequence (in particular cast to utype, doing the
> 2 shifts in utype so that the right shift is logical and not arithmetic,
> or and then cast back to the signed type), but it didn't supply vectype
> for most of those pattern statements, which means that the generic handling
> fills it up later with the vectype provided by vect_recog_rotate_pattern.
> The problem is that it is vectype of the result of the whole pattern,
> i.e. vector of signed values in this case, while the conversion to utype,
> 2 shifts and or (everything with utype lhs in scalar code) should have
> uvectype as STMT_VINFO_VECTYPE.

What an interesting trap…

> Fixed with following patch, bootstrapped/regtested on x86_64-linux and
> i686-linux, ok for trunk?

Ok.

Thanks,
Richard 


> 2022-12-13  Jakub Jelinek  <ja...@redhat.com>
> 
>    PR tree-optimization/108064
>    * tree-vect-patterns.cc (vect_recog_rotate_pattern): Pass uvectype
>    as 4th argument to append_pattern_def_seq for statements with lhs
>    with utype type.
> 
>    * gcc.c-torture/execute/pr108064.c: New test.
> 
> --- gcc/tree-vect-patterns.cc.jj    2022-12-05 11:10:37.000000000 +0100
> +++ gcc/tree-vect-patterns.cc    2022-12-12 13:14:23.356628767 +0100
> @@ -3113,7 +3113,7 @@ vect_recog_rotate_pattern (vec_info *vin
>     {
>       def = vect_recog_temp_ssa_var (utype, NULL);
>       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
> -      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
>       oprnd0 = def;
>     }
> 
> @@ -3137,7 +3137,7 @@ vect_recog_rotate_pattern (vec_info *vin
>     {
>       def = vect_recog_temp_ssa_var (utype, NULL);
>       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
> -      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
>     }
>   stype = TREE_TYPE (def);
> 
> @@ -3185,13 +3185,13 @@ vect_recog_rotate_pattern (vec_info *vin
>   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
>                    ? LSHIFT_EXPR : RSHIFT_EXPR,
>                  oprnd0, def);
> -  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
> 
>   var2 = vect_recog_temp_ssa_var (utype, NULL);
>   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
>                    ? RSHIFT_EXPR : LSHIFT_EXPR,
>                  oprnd0, def2);
> -  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
> 
>   /* Pattern detected.  */
>   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
> @@ -3202,7 +3202,7 @@ vect_recog_rotate_pattern (vec_info *vin
> 
>   if (!useless_type_conversion_p (type, utype))
>     {
> -      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
>       tree result = vect_recog_temp_ssa_var (type, NULL);
>       pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
>     }
> --- gcc/testsuite/gcc.c-torture/execute/pr108064.c.jj    2022-12-12 
> 13:22:29.875542508 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr108064.c    2022-12-12 
> 13:21:32.516377957 +0100
> @@ -0,0 +1,28 @@
> +/* PR tree-optimization/108064 */
> +
> +static inline short
> +foo (short value)
> +{
> +  return ((value >> 8) & 0xff) | ((value & 0xff) << 8);
> +}
> +
> +__attribute__((noipa))
> +void
> +bar (short *d, const short *s)
> +{
> +  for (unsigned long i = 0; i < 4; i++)
> +    d[i] = foo (s[i]);
> +}
> +
> +int
> +main ()
> +{
> +  short a[4] __attribute__((aligned (16))) = { 0xff, 0, 0, 0 };
> +  short b[4] __attribute__((aligned (16)));
> +  short c[4] __attribute__((aligned (16)));
> +
> +  bar (b, a);
> +  bar (c, b);
> +  if (a[0] != c[0])
> +    __builtin_abort ();
> +}
> 
>    Jakub
> 

Reply via email to