On Mon, 18 Nov 2019 at 16:17, Kyrill Tkachov
<kyrylo.tkac...@foss.arm.com> wrote:
>
> Hi Prathamesh,
>
> On 11/14/19 6:47 PM, Prathamesh Kulkarni wrote:
> > Hi,
> > As suggested in PR, the attached patch falls back to distributing
> > rshift over plus_expr instead of fallback widening -> arithmetic ->
> > narrowing sequence, if target support is not available.
> > Bootstrap+tested on x86_64-unknown-linux-gnu and aarch64-linux-gnu.
> > OK to commit ?
> >
> > Thanks,
> > Prathamesh
>
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr89007.c
> new file mode 100644
> index 00000000000..b682f3f3b74
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +#define N 1024
> +unsigned char dst[N];
> +unsigned char in1[N];
> +unsigned char in2[N];
> +
> +void
> +foo ()
> +{
> +  for( int x = 0; x < N; x++ )
> +    dst[x] = (in1[x] + in2[x] + 1) >> 1;
> +}
> +
> +/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */
> +/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */
>
>
> I think you'll want to make the test a bit strong to test the actual 
> instructions expected here.
> You'll also want to test the IFN_AVG_FLOOR case, as your patch adds support 
> for it too.
Hi Kyrill,
Thanks for the suggestions, I have updated  tests in the attached patch.
Does it look OK ?

Thanks,
Prathamesh
>
> Thanks,
> Kyrill
>
> diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> index 8ebbcd76b64..7025a3b4dc2 100644
> --- a/gcc/tree-vect-patterns.c
> +++ b/gcc/tree-vect-patterns.c
> @@ -2019,22 +2019,59 @@ vect_recog_average_pattern (stmt_vec_info 
> last_stmt_info, tree *type_out)
>
>     /* Check for target support.  */
>     tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
> -  if (!new_vectype
> -      || !direct_internal_fn_supported_p (ifn, new_vectype,
> -                                         OPTIMIZE_FOR_SPEED))
> +
> +  if (!new_vectype)
>       return NULL;
>
> +  bool ifn_supported
> +    = direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED);
> +
>     /* The IR requires a valid vector type for the cast result, even though
>        it's likely to be discarded.  */
>     *type_out = get_vectype_for_scalar_type (vinfo, type);
>     if (!*type_out)
>       return NULL;
>
> -  /* Generate the IFN_AVG* call.  */
>     tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
>     tree new_ops[2];
>     vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
>                        unprom, new_vectype);
> +
> +  if (!ifn_supported)
> +    {
> +      /* If there is no target support available, generate code
> +        to distribute rshift over plus and add one depending
> +        upon floor or ceil rounding.  */
> +
> +      tree one_cst = build_one_cst (new_type);
> +
> +      tree tmp1 = vect_recog_temp_ssa_var (new_type, NULL);
> +      gassign *g1 = gimple_build_assign (tmp1, RSHIFT_EXPR, new_ops[0], 
> one_cst);
> +
> +      tree tmp2 = vect_recog_temp_ssa_var (new_type, NULL);
> +      gassign *g2 = gimple_build_assign (tmp2, RSHIFT_EXPR, new_ops[1], 
> one_cst);
> +
> +      tree tmp3 = vect_recog_temp_ssa_var (new_type, NULL);
> +      gassign *g3 = gimple_build_assign (tmp3, PLUS_EXPR, tmp1, tmp2);
> +
> +      tree tmp4 = vect_recog_temp_ssa_var (new_type, NULL);
> +      tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
> +      gassign *g4 = gimple_build_assign (tmp4, c, new_ops[0], new_ops[1]);
> +
> +      tree tmp5 = vect_recog_temp_ssa_var (new_type, NULL);
> +      gassign *g5 = gimple_build_assign (tmp5, BIT_AND_EXPR, tmp4, one_cst);
> +
> +      gassign *g6 = gimple_build_assign (new_var, PLUS_EXPR, tmp3, tmp5);
> +
> +      append_pattern_def_seq (last_stmt_info, g1, new_vectype);
> +      append_pattern_def_seq (last_stmt_info, g2, new_vectype);
> +      append_pattern_def_seq (last_stmt_info, g3, new_vectype);
> +      append_pattern_def_seq (last_stmt_info, g4, new_vectype);
> +      append_pattern_def_seq (last_stmt_info, g5, new_vectype);
> +      return vect_convert_output (last_stmt_info, type, g6, new_vectype);
> +    }
> +
> +  /* Generate the IFN_AVG* call.  */
>     gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
>                                                     new_ops[1]);
>     gimple_call_set_lhs (average_stmt, new_var);
>
2019-11-19  Prathamesh Kulkarni  <prathamesh.kulka...@linaro.org>

        PR tree-optimization/89007
        * tree-vect-patterns.c (vect_recog_average_pattern): If there is no
        target support available, generate code to distribute rshift over plus
        and add one depending upon floor or ceil rounding.

testsuite/
        * gcc.target/aarch64/sve/pr89007.c: New test.

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c
new file mode 100644
index 00000000000..32095c63c61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 1024
+unsigned char dst[N];
+unsigned char in1[N];
+unsigned char in2[N];
+
+/*
+**  foo: 
+**     ...
+**     lsr     (z[0-9]+\.b), z[0-9]+\.b, #1
+**     lsr     (z[0-9]+\.b), z[0-9]+\.b, #1
+**     add     (z[0-9]+\.b), \1, \2
+**     orr     (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d
+**     and     (z[0-9]+\.b), \4\.b, #0x1
+**     add     z0.b, \3, \5
+**     ...
+*/
+void
+foo ()
+{
+  for( int x = 0; x < N; x++ )
+    dst[x] = (in1[x] + in2[x] + 1) >> 1;
+}
+
+/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */
+/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c
new file mode 100644
index 00000000000..cc40f45046b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 1024
+unsigned char dst[N];
+unsigned char in1[N];
+unsigned char in2[N];
+
+/*
+**  foo: 
+**     ...
+**     lsr     (z[0-9]+\.b), z[0-9]+\.b, #1
+**     lsr     (z[0-9]+\.b), z[0-9]+\.b, #1
+**     add     (z[0-9]+\.b), \1, \2
+**     and     (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d
+**     and     (z[0-9]+\.b), \4\.b, #0x1
+**     add     z0.b, \3, \5
+**     ...
+*/
+void
+foo ()
+{
+  for( int x = 0; x < N; x++ )
+    dst[x] = (in1[x] + in2[x]) >> 1;
+}
+
+/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */
+/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 8ebbcd76b64..7025a3b4dc2 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -2019,22 +2019,59 @@ vect_recog_average_pattern (stmt_vec_info 
last_stmt_info, tree *type_out)
 
   /* Check for target support.  */
   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
-  if (!new_vectype
-      || !direct_internal_fn_supported_p (ifn, new_vectype,
-                                         OPTIMIZE_FOR_SPEED))
+
+  if (!new_vectype)
     return NULL;
 
+  bool ifn_supported
+    = direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED);
+
   /* The IR requires a valid vector type for the cast result, even though
      it's likely to be discarded.  */
   *type_out = get_vectype_for_scalar_type (vinfo, type);
   if (!*type_out)
     return NULL;
 
-  /* Generate the IFN_AVG* call.  */
   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
   tree new_ops[2];
   vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
                       unprom, new_vectype);
+
+  if (!ifn_supported)
+    {
+      /* If there is no target support available, generate code
+        to distribute rshift over plus and add one depending
+        upon floor or ceil rounding.  */
+
+      tree one_cst = build_one_cst (new_type);
+
+      tree tmp1 = vect_recog_temp_ssa_var (new_type, NULL);
+      gassign *g1 = gimple_build_assign (tmp1, RSHIFT_EXPR, new_ops[0], 
one_cst);
+
+      tree tmp2 = vect_recog_temp_ssa_var (new_type, NULL);
+      gassign *g2 = gimple_build_assign (tmp2, RSHIFT_EXPR, new_ops[1], 
one_cst);
+
+      tree tmp3 = vect_recog_temp_ssa_var (new_type, NULL);
+      gassign *g3 = gimple_build_assign (tmp3, PLUS_EXPR, tmp1, tmp2);
+      
+      tree tmp4 = vect_recog_temp_ssa_var (new_type, NULL);
+      tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
+      gassign *g4 = gimple_build_assign (tmp4, c, new_ops[0], new_ops[1]);
+ 
+      tree tmp5 = vect_recog_temp_ssa_var (new_type, NULL);
+      gassign *g5 = gimple_build_assign (tmp5, BIT_AND_EXPR, tmp4, one_cst);
+
+      gassign *g6 = gimple_build_assign (new_var, PLUS_EXPR, tmp3, tmp5);
+
+      append_pattern_def_seq (last_stmt_info, g1, new_vectype);
+      append_pattern_def_seq (last_stmt_info, g2, new_vectype);
+      append_pattern_def_seq (last_stmt_info, g3, new_vectype);
+      append_pattern_def_seq (last_stmt_info, g4, new_vectype);
+      append_pattern_def_seq (last_stmt_info, g5, new_vectype);
+      return vect_convert_output (last_stmt_info, type, g6, new_vectype);
+    }
+
+  /* Generate the IFN_AVG* call.  */
   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
                                                    new_ops[1]);
   gimple_call_set_lhs (average_stmt, new_var);

Reply via email to