On Mon, 18 Nov 2019 at 16:17, Kyrill Tkachov <kyrylo.tkac...@foss.arm.com> wrote: > > Hi Prathamesh, > > On 11/14/19 6:47 PM, Prathamesh Kulkarni wrote: > > Hi, > > As suggested in PR, the attached patch falls back to distributing > > rshift over plus_expr instead of fallback widening -> arithmetic -> > > narrowing sequence, if target support is not available. > > Bootstrap+tested on x86_64-unknown-linux-gnu and aarch64-linux-gnu. > > OK to commit ? > > > > Thanks, > > Prathamesh > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007.c > b/gcc/testsuite/gcc.target/aarch64/sve/pr89007.c > new file mode 100644 > index 00000000000..b682f3f3b74 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3" } */ > + > +#define N 1024 > +unsigned char dst[N]; > +unsigned char in1[N]; > +unsigned char in2[N]; > + > +void > +foo () > +{ > + for( int x = 0; x < N; x++ ) > + dst[x] = (in1[x] + in2[x] + 1) >> 1; > +} > + > +/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */ > +/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */ > > > I think you'll want to make the test a bit strong to test the actual > instructions expected here. > You'll also want to test the IFN_AVG_FLOOR case, as your patch adds support > for it too. Hi Kyrill, Thanks for the suggestions, I have updated tests in the attached patch. Does it look OK ?
Thanks, Prathamesh > > Thanks, > Kyrill > > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c > index 8ebbcd76b64..7025a3b4dc2 100644 > --- a/gcc/tree-vect-patterns.c > +++ b/gcc/tree-vect-patterns.c > @@ -2019,22 +2019,59 @@ vect_recog_average_pattern (stmt_vec_info > last_stmt_info, tree *type_out) > > /* Check for target support. */ > tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); > - if (!new_vectype > - || !direct_internal_fn_supported_p (ifn, new_vectype, > - OPTIMIZE_FOR_SPEED)) > + > + if (!new_vectype) > return NULL; > > + bool ifn_supported > + = direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED); > + > /* The IR requires a valid vector type for the cast result, even though > it's likely to be discarded. */ > *type_out = get_vectype_for_scalar_type (vinfo, type); > if (!*type_out) > return NULL; > > - /* Generate the IFN_AVG* call. */ > tree new_var = vect_recog_temp_ssa_var (new_type, NULL); > tree new_ops[2]; > vect_convert_inputs (last_stmt_info, 2, new_ops, new_type, > unprom, new_vectype); > + > + if (!ifn_supported) > + { > + /* If there is no target support available, generate code > + to distribute rshift over plus and add one depending > + upon floor or ceil rounding. */ > + > + tree one_cst = build_one_cst (new_type); > + > + tree tmp1 = vect_recog_temp_ssa_var (new_type, NULL); > + gassign *g1 = gimple_build_assign (tmp1, RSHIFT_EXPR, new_ops[0], > one_cst); > + > + tree tmp2 = vect_recog_temp_ssa_var (new_type, NULL); > + gassign *g2 = gimple_build_assign (tmp2, RSHIFT_EXPR, new_ops[1], > one_cst); > + > + tree tmp3 = vect_recog_temp_ssa_var (new_type, NULL); > + gassign *g3 = gimple_build_assign (tmp3, PLUS_EXPR, tmp1, tmp2); > + > + tree tmp4 = vect_recog_temp_ssa_var (new_type, NULL); > + tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR; > + gassign *g4 = gimple_build_assign (tmp4, c, new_ops[0], new_ops[1]); > + > + tree tmp5 = vect_recog_temp_ssa_var (new_type, NULL); > + gassign *g5 = gimple_build_assign (tmp5, BIT_AND_EXPR, tmp4, one_cst); > + > + gassign *g6 = gimple_build_assign (new_var, PLUS_EXPR, tmp3, tmp5); > + > + append_pattern_def_seq (last_stmt_info, g1, new_vectype); > + append_pattern_def_seq (last_stmt_info, g2, new_vectype); > + append_pattern_def_seq (last_stmt_info, g3, new_vectype); > + append_pattern_def_seq (last_stmt_info, g4, new_vectype); > + append_pattern_def_seq (last_stmt_info, g5, new_vectype); > + return vect_convert_output (last_stmt_info, type, g6, new_vectype); > + } > + > + /* Generate the IFN_AVG* call. */ > gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0], > new_ops[1]); > gimple_call_set_lhs (average_stmt, new_var); >
2019-11-19 Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> PR tree-optimization/89007 * tree-vect-patterns.c (vect_recog_average_pattern): If there is no target support available, generate code to distribute rshift over plus and add one depending upon floor or ceil rounding. testsuite/ * gcc.target/aarch64/sve/pr89007.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c new file mode 100644 index 00000000000..32095c63c61 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#define N 1024 +unsigned char dst[N]; +unsigned char in1[N]; +unsigned char in2[N]; + +/* +** foo: +** ... +** lsr (z[0-9]+\.b), z[0-9]+\.b, #1 +** lsr (z[0-9]+\.b), z[0-9]+\.b, #1 +** add (z[0-9]+\.b), \1, \2 +** orr (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d +** and (z[0-9]+\.b), \4\.b, #0x1 +** add z0.b, \3, \5 +** ... +*/ +void +foo () +{ + for( int x = 0; x < N; x++ ) + dst[x] = (in1[x] + in2[x] + 1) >> 1; +} + +/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */ +/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c new file mode 100644 index 00000000000..cc40f45046b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c @@ -0,0 +1,29 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#define N 1024 +unsigned char dst[N]; +unsigned char in1[N]; +unsigned char in2[N]; + +/* +** foo: +** ... +** lsr (z[0-9]+\.b), z[0-9]+\.b, #1 +** lsr (z[0-9]+\.b), z[0-9]+\.b, #1 +** add (z[0-9]+\.b), \1, \2 +** and (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d +** and (z[0-9]+\.b), \4\.b, #0x1 +** add z0.b, \3, \5 +** ... +*/ +void +foo () +{ + for( int x = 0; x < N; x++ ) + dst[x] = (in1[x] + in2[x]) >> 1; +} + +/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */ +/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 8ebbcd76b64..7025a3b4dc2 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -2019,22 +2019,59 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out) /* Check for target support. */ tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); - if (!new_vectype - || !direct_internal_fn_supported_p (ifn, new_vectype, - OPTIMIZE_FOR_SPEED)) + + if (!new_vectype) return NULL; + bool ifn_supported + = direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED); + /* The IR requires a valid vector type for the cast result, even though it's likely to be discarded. */ *type_out = get_vectype_for_scalar_type (vinfo, type); if (!*type_out) return NULL; - /* Generate the IFN_AVG* call. */ tree new_var = vect_recog_temp_ssa_var (new_type, NULL); tree new_ops[2]; vect_convert_inputs (last_stmt_info, 2, new_ops, new_type, unprom, new_vectype); + + if (!ifn_supported) + { + /* If there is no target support available, generate code + to distribute rshift over plus and add one depending + upon floor or ceil rounding. */ + + tree one_cst = build_one_cst (new_type); + + tree tmp1 = vect_recog_temp_ssa_var (new_type, NULL); + gassign *g1 = gimple_build_assign (tmp1, RSHIFT_EXPR, new_ops[0], one_cst); + + tree tmp2 = vect_recog_temp_ssa_var (new_type, NULL); + gassign *g2 = gimple_build_assign (tmp2, RSHIFT_EXPR, new_ops[1], one_cst); + + tree tmp3 = vect_recog_temp_ssa_var (new_type, NULL); + gassign *g3 = gimple_build_assign (tmp3, PLUS_EXPR, tmp1, tmp2); + + tree tmp4 = vect_recog_temp_ssa_var (new_type, NULL); + tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR; + gassign *g4 = gimple_build_assign (tmp4, c, new_ops[0], new_ops[1]); + + tree tmp5 = vect_recog_temp_ssa_var (new_type, NULL); + gassign *g5 = gimple_build_assign (tmp5, BIT_AND_EXPR, tmp4, one_cst); + + gassign *g6 = gimple_build_assign (new_var, PLUS_EXPR, tmp3, tmp5); + + append_pattern_def_seq (last_stmt_info, g1, new_vectype); + append_pattern_def_seq (last_stmt_info, g2, new_vectype); + append_pattern_def_seq (last_stmt_info, g3, new_vectype); + append_pattern_def_seq (last_stmt_info, g4, new_vectype); + append_pattern_def_seq (last_stmt_info, g5, new_vectype); + return vect_convert_output (last_stmt_info, type, g6, new_vectype); + } + + /* Generate the IFN_AVG* call. */ gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]); gimple_call_set_lhs (average_stmt, new_var);