On Fri, 18 Oct 2024, Tamar Christina wrote:

> Hi All,
> 
> When finding the gsi to use for code of the root statements we should use the
> one of the original statement rather than the gcond which may be inside a
> pattern.
> 
> Without this the emitted instructions may be discarded later.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       PR tree-optimization/117410
>       * tree-vect-slp.cc (vectorize_slp_instance_root_stmt): Use gsi from
>       original statement.
> 
> gcc/testsuite/ChangeLog:
> 
>       PR tree-optimization/117410
>       * gcc.dg/vect/vect-early-break_129-pr117410.c: New test.
> 
> ---
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..eec7f8db40c7101c0b9aa6a7b87909bf76cf89fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c
> @@ -0,0 +1,94 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +typedef signed char int8_t;
> +typedef short int int16_t;
> +typedef int int32_t;
> +typedef long long int int64_t;
> +typedef unsigned char uint8_t;
> +typedef short unsigned int uint16_t;
> +typedef unsigned int uint32_t;
> +typedef long long unsigned int uint64_t;
> +
> +void __attribute__ ((noinline, noclone))
> +test_1_TYPE1_uint32_t (uint16_t *__restrict f, uint32_t *__restrict d,
> +                       uint16_t x, uint16_t x2, uint32_t y, int n)
> +{
> +    for (int i = 0; i < n; ++i)
> +        {
> +            f[i * 2 + 0] = x;
> +            f[i * 2 + 1] = x2;
> +            d[i] = y;
> +        }
> +}
> +
> +void __attribute__ ((noinline, noclone))
> +test_1_TYPE1_int64_t (int32_t *__restrict f, int64_t *__restrict d, int32_t 
> x,
> +                      int32_t x2, int64_t y, int n)
> +{
> +    for (int i = 0; i < n; ++i)
> +        {
> +            f[i * 2 + 0] = x;
> +            f[i * 2 + 1] = x2;
> +            d[i] = y;
> +        }
> +}
> +
> +int
> +main (void)
> +{
> +        // This part is necessary for ice to appear though running it by 
> itself does not trigger an ICE
> +        int n_3_TYPE1_uint32_t = 32;
> +        uint16_t x_3_uint16_t = 233;
> +        uint16_t x2_3_uint16_t = 78;
> +        uint32_t y_3_uint32_t = 1234;
> +        uint16_t f_3_uint16_t[33 * 2 + 1] = { 0} ;
> +        uint32_t d_3_uint32_t[33] = { 0} ;
> +        test_1_TYPE1_uint32_t (f_3_uint16_t, d_3_uint32_t, x_3_uint16_t, 
> x2_3_uint16_t, y_3_uint32_t, n_3_TYPE1_uint32_t);
> +        for (int i = 0;
> +                        i < n_3_TYPE1_uint32_t;
> +                        ++i) {
> +                if (f_3_uint16_t[i * 2 + 0] != x_3_uint16_t) __builtin_abort 
> ();
> +                if (f_3_uint16_t[i * 2 + 1] != x2_3_uint16_t) 
> __builtin_abort ();
> +                if (d_3_uint32_t[i] != y_3_uint32_t) __builtin_abort ();
> +        }
> +        for (int i = n_3_TYPE1_uint32_t;
> +                        i < n_3_TYPE1_uint32_t + 1;
> +                        ++i) {
> +                if (f_3_uint16_t[i * 2 + 0] != 0) __builtin_abort ();
> +                if (f_3_uint16_t[i * 2 + 1] != 0) __builtin_abort ();
> +                if (d_3_uint32_t[i] != 0) __builtin_abort ();
> +        }
> +    // If ran without the above section, a different ice appears. see below
> +    int n_3_TYPE1_int64_t = 32;
> +    int32_t x_3_int32_t = 233;
> +    int32_t x2_3_int32_t = 78;
> +    int64_t y_3_int64_t = 1234;
> +    int32_t f_3_int32_t[33 * 2 + 1] = { 0 };
> +    int64_t d_3_int64_t[33] = { 0 };
> +    test_1_TYPE1_int64_t (f_3_int32_t, d_3_int64_t, x_3_int32_t, 
> x2_3_int32_t,
> +                          y_3_int64_t, n_3_TYPE1_int64_t);
> +    for (int i = 0; i < n_3_TYPE1_int64_t; ++i)
> +        {
> +            if (f_3_int32_t[i * 2 + 0] != x_3_int32_t)
> +                __builtin_abort ();
> +            if (f_3_int32_t[i * 2 + 1] != x2_3_int32_t)
> +                __builtin_abort ();
> +            if (d_3_int64_t[i] != y_3_int64_t)
> +                __builtin_abort ();
> +        }
> +
> +    for (int i = n_3_TYPE1_int64_t; i < n_3_TYPE1_int64_t + 1; ++i)
> +        {
> +            if (f_3_int32_t[i * 2 + 0] != 0)
> +                __builtin_abort ();
> +            if (f_3_int32_t[i * 2 + 1] != 0)
> +                __builtin_abort ();
> +            if (d_3_int64_t[i] != 0)
> +                __builtin_abort ();
> +        }
> +
> +    return 0;
> +}
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 
> d35c2ea02dcef1c295bf97adc6717a7294d3f61e..9276662fa0f1beaa6c90835ba0e34726bf55ae19
>  100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -11167,7 +11167,7 @@ vectorize_slp_instance_root_stmt (vec_info *vinfo, 
> slp_tree node, slp_instance i
>        can't support lane > 1 at this time.  */
>        gcc_assert (instance->root_stmts.length () == 1);
>        auto root_stmt_info = instance->root_stmts[0];
> -      auto last_stmt = STMT_VINFO_STMT (root_stmt_info);
> +      auto last_stmt = STMT_VINFO_STMT (vect_orig_stmt (root_stmt_info));
>        gimple_stmt_iterator rgsi = gsi_for_stmt (last_stmt);
>        gimple *vec_stmt = NULL;
>        gcc_assert (!SLP_TREE_VEC_DEFS (node).is_empty ());
> 
> 
> 
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to