On Wed, Oct 29, 2014 at 2:30 PM, Thomas Preud'homme <thomas.preudho...@arm.com> wrote: >> From: Richard Biener [mailto:richard.guent...@gmail.com] >> Sent: Wednesday, October 08, 2014 8:27 AM >> >> I wouldn't worry about that too much. Indeed the question would be >> what >> should be canonical on GIMPLE (expanders should choose the optimal >> vairant from both). I think a tree code should be always prefered to a >> builtin function call - which means a rotate is more canonical than a >> bswap16 call. > > Below is the updated patch. ChangeLog entries are as follows: > > *** gcc/ChangeLog *** > > 2014-10-29 Thomas Preud'homme <thomas.preudho...@arm.com> > > PR tree-optimization/63259 > * tree-ssa-math-opts.c (bswap_replace): Replace expression by a > rotation left if it is a 16 bit byte swap. > (pass_optimize_bswap::execute): Also consider bswap in LROTATE_EXPR > and > RROTATE_EXPR statements if it is a byte rotation. > > > *** gcc/testsuite/ChangeLog *** > > 2014-10-29 Thomas Preud'homme <thomas.preudho...@arm.com> > > PR tree-optimization/63259 > * optimize-bswapsi-1.c (swap32_f): New bswap pass test. > * optimize-bswaphi-1.c: Drop useless SIType definition and fix typo in > following comment. > > > diff --git a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c > b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c > index 18aba28..692fceb 100644 > --- a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c > +++ b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c > @@ -42,11 +42,10 @@ uint32_t read_be16_3 (unsigned char *data) > return *(data + 1) | (*data << 8); > } > > -typedef int SItype __attribute__ ((mode (SI))); > typedef int HItype __attribute__ ((mode (HI))); > > /* Test that detection of significant sign extension works correctly. This > - checks that unknown byte marker are set correctly in cast of cast. */ > + checks that unknown byte markers are set correctly in cast of cast. */ > > HItype > swap16 (HItype in) > diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c > b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c > index cfde218..ad3ede4 100644 > --- a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c > +++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c > @@ -78,5 +78,16 @@ swap32_e (SItype in) > | (((in >> 24) & 0xFF) << 0); > } > > -/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" > 5 "bswap" } } */ > +/* This variant comes from PR63259. It compiles to a gimple sequence that > ends > + with a rotation instead of a bitwise OR. */ > + > +unsigned > +swap32_f (unsigned in) > +{ > + in = ((in & 0xff00ff00) >> 8) | ((in & 0x00ff00ff) << 8); > + in = ((in & 0xffff0000) >> 16) | ((in & 0x0000ffff) << 16); > + return in; > +} > + > +/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" > 6 "bswap" } } */ > /* { dg-final { cleanup-tree-dump "bswap" } } */ > diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c > index e0f2924..5b656e0 100644 > --- a/gcc/tree-ssa-math-opts.c > +++ b/gcc/tree-ssa-math-opts.c > @@ -2187,7 +2187,7 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator > gsi, gimple src_stmt, > struct symbolic_number *n, bool bswap) > { > tree src, tmp, tgt; > - gimple call; > + gimple bswap_stmt; > > src = gimple_assign_rhs1 (src_stmt); > tgt = gimple_assign_lhs (cur_stmt); > @@ -2293,16 +2293,28 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator > gsi, gimple src_stmt, > > tmp = src; > > - /* Convert the src expression if necessary. */ > - if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type)) > + /* Canonical form for 16 bit bswap is a rotate expression. */ > + if (bswap && n->range == 16) > { > - gimple convert_stmt; > - tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); > - convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tmp, src, NULL); > - gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); > + tree count = build_int_cst (NULL, BITS_PER_UNIT); > + bswap_type = TREE_TYPE (src); > + src = fold_build2 (LROTATE_EXPR, bswap_type, src, count); > + bswap_stmt = gimple_build_assign (NULL, src); > } > + else > + { > + /* Convert the src expression if necessary. */ > + if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type)) > + { > + gimple convert_stmt; > + tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); > + convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tmp, src, > + NULL); > + gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); > + } > > - call = gimple_build_call (fndecl, 1, tmp); > + bswap_stmt = gimple_build_call (fndecl, 1, tmp); > + } > > tmp = tgt; > > @@ -2315,7 +2327,7 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator > gsi, gimple src_stmt, > gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); > } > > - gimple_call_set_lhs (call, tmp); > + gimple_set_lhs (bswap_stmt, tmp); > > if (dump_file) > { > @@ -2324,7 +2336,7 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator > gsi, gimple src_stmt, > print_gimple_stmt (dump_file, cur_stmt, 0, 0); > } > > - gsi_insert_after (&gsi, call, GSI_SAME_STMT); > + gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); > gsi_remove (&gsi, true); > return true; > } > @@ -2388,13 +2400,29 @@ pass_optimize_bswap::execute (function *fun) > { > gimple src_stmt, cur_stmt = gsi_stmt (gsi); > tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; > + enum tree_code code; > struct symbolic_number n; > bool bswap; > > - if (!is_gimple_assign (cur_stmt) > - || gimple_assign_rhs_code (cur_stmt) != BIT_IOR_EXPR) > + if (!is_gimple_assign (cur_stmt)) > continue; > > + code = gimple_assign_rhs_code (cur_stmt); > + switch (code) > + { > + case LROTATE_EXPR: > + case RROTATE_EXPR: > + if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt)) > + || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt)) > + % BITS_PER_UNIT) > + continue; > + /* Fall through. */ > + case BIT_IOR_EXPR: > + break; > + default: > + continue; > + } > + > src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); > > if (!src_stmt) > > Ok for trunk?
Ok. >> >> From the performance side the pass could be re-structured to populate >> a lattice, thus work from def to use instead of the other way around. >> Which >> means we visit each stmt exactly once, compute its value symbolically >> and check it against a rotate. > > That sounds nice but is an heavy change. IMHO this should be done only if it > can be shown that bswap can have significan impact on compilation time. In > any case, with the approaching end of stage1 this would be material for next > stage1. Yes, of course. Thanks, Richard. > Best regards, > > Thomas > > >