On Wed, Jun 20, 2018 at 12:39 PM Richard Sandiford <richard.sandif...@arm.com> wrote: > > The main over-widening patch can introduce quite a few extra casts, > and in many cases those casts simply "tap into" an intermediate > point in an existing extension. E.g. if we have: > > unsigned char a; > int ax = (int) a; > > and a later operation using ax is shortened to "unsigned short", > we would need: > > unsigned short ax' = (unsigned short) a; > > The a->ax extension requires one set of unpacks to get to unsigned > short and another set of unpacks to get to int. The first set are > then duplicated for ax'. If both ax and ax' are needed, the a->ax' > extension would end up counting twice during cost calculations. > > This patch rewrites the original: > > int ax = (int) a; > > into a pattern: > > unsigned short ax' = (unsigned short) a; > int ax = (int) ax'; > > so that each extension only counts once. > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
OK. Richard. > Richard > > > 2018-06-20 Richard Sandiford <richard.sandif...@arm.com> > > gcc/ > * tree-vect-patterns.c (vect_split_statement): New function. > (vect_convert_input): Use it to try to split an existing cast. > > gcc/testsuite/ > * gcc.dg/vect/vect-over-widen-5.c: Test that the extensions > get split into two for use by the over-widening pattern. > * gcc.dg/vect/vect-over-widen-6.c: Likewise. > * gcc.dg/vect/vect-over-widen-7.c: Likewise. > * gcc.dg/vect/vect-over-widen-8.c: Likewise. > * gcc.dg/vect/vect-over-widen-9.c: Likewise. > * gcc.dg/vect/vect-over-widen-10.c: Likewise. > * gcc.dg/vect/vect-over-widen-11.c: Likewise. > * gcc.dg/vect/vect-over-widen-12.c: Likewise. > * gcc.dg/vect/vect-over-widen-13.c: Likewise. > * gcc.dg/vect/vect-over-widen-14.c: Likewise. > * gcc.dg/vect/vect-over-widen-15.c: Likewise. > * gcc.dg/vect/vect-over-widen-16.c: Likewise. > * gcc.dg/vect/vect-over-widen-22.c: New test. > > Index: gcc/tree-vect-patterns.c > =================================================================== > --- gcc/tree-vect-patterns.c 2018-06-20 11:26:19.557193074 +0100 > +++ gcc/tree-vect-patterns.c 2018-06-20 11:26:23.637157077 +0100 > @@ -565,6 +565,97 @@ vect_recog_temp_ssa_var (tree type, gimp > return make_temp_ssa_name (type, stmt, "patt"); > } > > +/* STMT2_INFO describes a type conversion that could be split into STMT1 > + followed by a version of STMT2_INFO that takes NEW_RHS as its first > + input. Try to do this using pattern statements, returning true on > + success. */ > + > +static bool > +vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs, > + gimple *stmt1, tree vectype) > +{ > + if (is_pattern_stmt_p (stmt2_info)) > + { > + /* STMT2_INFO is part of a pattern. Get the statement to which > + the pattern is attached. */ > + stmt_vec_info orig_stmt2_info > + = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt2_info)); > + vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype); > + > + if (dump_enabled_p ()) > + { > + dump_printf_loc (MSG_NOTE, vect_location, > + "Splitting pattern statement: "); > + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0); > + } > + > + /* Since STMT2_INFO is a pattern statement, we can change it > + in-situ without worrying about changing the code for the > + containing block. */ > + gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs); > + > + if (dump_enabled_p ()) > + { > + dump_printf_loc (MSG_NOTE, vect_location, "into: "); > + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0); > + dump_printf_loc (MSG_NOTE, vect_location, "and: "); > + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0); > + } > + > + gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info); > + if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info->stmt) > + /* STMT2_INFO is the actual pattern statement. Add STMT1 > + to the end of the definition sequence. */ > + gimple_seq_add_stmt_without_update (def_seq, stmt1); > + else > + { > + /* STMT2_INFO belongs to the definition sequence. Insert STMT1 > + before it. */ > + gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq); > + gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT); > + } > + return true; > + } > + else > + { > + /* STMT2_INFO doesn't yet have a pattern. Try to create a > + two-statement pattern now. */ > + gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info)); > + tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt)); > + tree lhs_vectype = get_vectype_for_scalar_type (lhs_type); > + if (!lhs_vectype) > + return false; > + > + if (dump_enabled_p ()) > + { > + dump_printf_loc (MSG_NOTE, vect_location, > + "Splitting statement: "); > + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0); > + } > + > + /* Add STMT1 as a singleton pattern definition sequence. */ > + gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info); > + vect_init_pattern_stmt (stmt1, stmt2_info, vectype); > + gimple_seq_add_stmt_without_update (def_seq, stmt1); > + > + /* Build the second of the two pattern statements. */ > + tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL); > + gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs); > + vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype); > + > + if (dump_enabled_p ()) > + { > + dump_printf_loc (MSG_NOTE, vect_location, > + "into pattern statements: "); > + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0); > + dump_printf_loc (MSG_NOTE, vect_location, "and: "); > + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt2, 0); > + } > + > + return true; > + } > +} > + > /* Convert UNPROM to TYPE and return the result, adding new statements > to STMT_INFO's pattern definition statements if no better way is > available. VECTYPE is the vector form of TYPE. */ > @@ -593,6 +684,18 @@ vect_convert_input (stmt_vec_info stmt_i > tree new_op = vect_recog_temp_ssa_var (type, NULL); > gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op); > > + /* If the operation is the input to a vectorizable cast, try splitting > + that cast into two, taking the required result as a mid-way point. */ > + if (unprom->caster) > + { > + tree lhs = gimple_get_lhs (unprom->caster->stmt); > + if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type) > + && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type) > + && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type)) > + && vect_split_statement (unprom->caster, new_op, new_stmt, vectype)) > + return new_op; > + } > + > /* If OP is an external value, see if we can insert the new statement > on an incoming edge. */ > if (unprom->dt == vect_external_def) > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-5.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-5.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-5.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -44,6 +44,7 @@ main (void) > return 0; > } > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+ } "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 1} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: > detected:[^\n]* \(signed char\)} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-6.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-6.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-6.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -9,6 +9,7 @@ #define BASE_C 40 > > #include "vect-over-widen-5.c" > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+ } "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 1} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: > detected:[^\n]* \(unsigned char\)} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-7.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-7.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-7.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -46,6 +46,7 @@ main (void) > return 0; > } > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+ } "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 2} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: > detected:[^\n]* \(signed char\)} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-8.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-8.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-8.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -12,6 +12,7 @@ #define D 251 > > #include "vect-over-widen-7.c" > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+ } "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 2} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: > detected:[^\n]* \(unsigned char\)} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-9.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-9.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-9.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -50,6 +50,7 @@ main (void) > return 0; > } > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+ } "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 1} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 2} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-10.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-10.c 2018-06-20 > 11:26:19.553193110 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-10.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -11,6 +11,7 @@ #define BASE_C 40 > > #include "vect-over-widen-9.c" > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+ } "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 1} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 2} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-11.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-11.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-11.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -55,6 +55,7 @@ main (void) > return 0; > } > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+ } "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 1} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 2} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-12.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-12.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-12.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -11,6 +11,7 @@ #define BASE_C 40 > > #include "vect-over-widen-11.c" > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+ } "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 1} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 2} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-13.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-13.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-13.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -43,6 +43,7 @@ main (void) > return 0; > } > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* / 2} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: > detected:[^\n]* = \(signed char\)} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-14.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-14.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-14.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -11,6 +11,7 @@ #define BASE_C 40 > > #include "vect-over-widen-13.c" > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 1} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: > detected:[^\n]* = \(unsigned char\)} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-15.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-15.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-15.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -45,6 +45,7 @@ main (void) > return 0; > } > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* / 2} "vect" } } */ > /* { dg-final { scan-tree-dump-not {vect_recog_cast_forwprop_pattern: > detected} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-16.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-over-widen-16.c 2018-06-20 > 11:26:19.557193074 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-16.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -11,6 +11,7 @@ #define BASE_C 40 > > #include "vect-over-widen-15.c" > > +/* { dg-final { scan-tree-dump {Splitting statement} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \+} "vect" } } */ > /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* >> 1} "vect" } } */ > /* { dg-final { scan-tree-dump-not {vect_recog_cast_forwprop_pattern: > detected} "vect" } } */ > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-22.c > =================================================================== > --- /dev/null 2018-06-13 14:36:57.192460992 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-22.c 2018-06-20 > 11:26:23.633157113 +0100 > @@ -0,0 +1,53 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-require-effective-target vect_shift } */ > +/* { dg-require-effective-target vect_pack_trunc } */ > +/* { dg-require-effective-target vect_unpack } */ > + > +#include "tree-vect.h" > + > +#define N 111 > + > +/* The addition should be narrowed to short. */ > + > +void __attribute__ ((noipa)) > +f (unsigned int *restrict a, unsigned int *restrict b, > + unsigned short *restrict c, unsigned char *restrict d, unsigned int e) > +{ > + e &= 0xff; > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > + { > + unsigned int xor = d[i] ^ e; > + a[i] = c[i] | xor; > + b[i] = xor; > + } > +} > + > +int > +main (void) > +{ > + check_vect (); > + > + unsigned int a[N], b[N]; > + unsigned short c[N]; > + unsigned char d[N]; > + for (int i = 0; i < N; ++i) > + { > + c[i] = i * 11; > + d[i] = i * 2 + 3; > + asm volatile ("" ::: "memory"); > + } > + f (a, b, c, d, 0x73); > + for (int i = 0; i < N; ++i) > + if (b[i] != ((i * 2 + 3) ^ 0x73) > + || a[i] != ((i * 11) | b[i])) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump {Splitting pattern statement} "vect" } } */ > +/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \^} "vect" } } */ > +/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: > detected:[^\n]* \|} "vect" } } */ > +/* { dg-final { scan-tree-dump {demoting [^\n]* to [^\n]*char} "vect" } } */ > +/* { dg-final { scan-tree-dump {demoting [^\n]* to [^\n]*short} "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */