Hi,
In its current form, when the bswap optimization pass recognizes a load in a
specific endianness it assumes that all smaller loads in the original expression
are part of a linear chain of basic block (ie they are either in the same basic
block or there is no conditional branching in the blocks involved). The
assumptions is used when replacing the original set of loads by a new wider
load: that load is always placed just before the original load with the smallest
address. This can mean accessing passed the end of an object when the other
loads of the original expression are executed conditionally because the code
would be changed to a wider load unconditionally. Please see initial comment in
PR77673 for the problem in action.
This patch changes the pass to always select the load in the original expression
in the most postdominated basic block of all loads as the location where to
insert the new load. It also checks that this location postdominates the final
statement of the load computation in the original expression. These two checks
together with the fact that there is necessarily a flow path that includes all
the loads and the computing expression (otherwise the expression's value would
be undefined) ensure that (1) the new load is made if all original loads would
have been made and (ii) the load is always made when the computing expression
would be executed.
ChangeLog entry is as follows:
*** gcc/ChangeLog ***
2016-11-22 Thomas Preud'homme <thomas.preudho...@arm.com>
PR tree-optimization/77673
* tree-ssa-math-opts.c (struct symbolic_number): Add new src field.
(init_symbolic_number): Initialize src field from src parameter.
(perform_symbolic_merge): Select most dominated statement as the
source statement. Set src field of resulting n structure from the
input src with the lowest address.
(find_bswap_or_nop): Rename source_stmt into ins_stmt.
(bswap_replace): Rename src_stmt into ins_stmt. Initially get source
of load from src field rather than insertion statement. Cancel
optimization if statement analyzed is not dominated by the insertion
statement.
(pass_optimize_bswap::execute): Rename src_stmt to ins_stmt. Compute
dominance information.
*** gcc/testsuite/ChangeLog ***
2016-11-22 Thomas Preud'homme <thomas.preudho...@arm.com>
PR tree-optimization/77673
* gcc.dg/pr77673.c: New test.
Bootstrapped on arm-linux-gnueabihf and x86_64-linux-gnu with regression
testsuite coming back clean in both case.
Is this ok for trunk?
Best regards,
Thomas
diff --git a/gcc/testsuite/gcc.dg/pr77673.c b/gcc/testsuite/gcc.dg/pr77673.c
new file mode 100644
index 0000000000000000000000000000000000000000..e03bcb9284d1e5a0e496cfa547fdbab630bec04f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr77673.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-require-effective-target bswap32 } */
+/* { dg-options "-O2 -fPIC -fdump-tree-bswap" } */
+/* { dg-additional-options "-march=z900" { target s390*-*-* } } */
+
+void mach_parse_compressed(unsigned char* ptr, unsigned long int* val)
+{
+ if (ptr[0] < 0xC0U) {
+ *val = ptr[0] + ptr[1];
+ return;
+ }
+
+ *val = ((unsigned long int)(ptr[0]) << 24)
+ | ((unsigned long int)(ptr[1]) << 16)
+ | ((unsigned long int)(ptr[2]) << 8)
+ | ptr[3];
+}
+
+/* { dg-final { scan-tree-dump-not "load_dst_\\d+ =.* if \\(" "bswap" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index deccdf1ad14ece93ad56153ba0cfb8c555f9ceb0..4a47254d223e24caf1cd611f434a578729ba205d 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1964,6 +1964,7 @@ struct symbolic_number {
tree base_addr;
tree offset;
HOST_WIDE_INT bytepos;
+ tree src;
tree alias_set;
tree vuse;
unsigned HOST_WIDE_INT range;
@@ -2068,6 +2069,7 @@ init_symbolic_number (struct symbolic_number *n, tree src)
return false;
n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
+ n->src = src;
/* Set up the symbolic number N by setting each byte to a value between 1 and
the byte size of rhs1. The highest order byte is set to n->size and the
@@ -2192,6 +2194,7 @@ perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
uint64_t inc;
HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
struct symbolic_number *toinc_n_ptr, *n_end;
+ basic_block bb1, bb2;
if (!n1->base_addr || !n2->base_addr
|| !operand_equal_p (n1->base_addr, n2->base_addr, 0))
@@ -2205,15 +2208,20 @@ perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
{
n_start = n1;
start_sub = n2->bytepos - n1->bytepos;
- source_stmt = source_stmt1;
}
else
{
n_start = n2;
start_sub = n1->bytepos - n2->bytepos;
- source_stmt = source_stmt2;
}
+ bb1 = gimple_bb (source_stmt1);
+ bb2 = gimple_bb (source_stmt2);
+ if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
+ source_stmt = source_stmt1;
+ else
+ source_stmt = source_stmt2;
+
/* Find the highest address at which a load is performed and
compute related info. */
end1 = n1->bytepos + (n1->range - 1);
@@ -2270,6 +2278,7 @@ perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
n->vuse = n_start->vuse;
n->base_addr = n_start->base_addr;
n->offset = n_start->offset;
+ n->src = n_start->src;
n->bytepos = n_start->bytepos;
n->type = n_start->type;
size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
@@ -2519,7 +2528,7 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
uint64_t cmpxchg = CMPXCHG;
uint64_t cmpnop = CMPNOP;
- gimple *source_stmt;
+ gimple *ins_stmt;
int limit;
/* The last parameter determines the depth search limit. It usually
@@ -2529,9 +2538,9 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
in libgcc, and for initial shift/and operation of the src operand. */
limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
- source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
+ ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
- if (!source_stmt)
+ if (!ins_stmt)
return NULL;
/* Find real size of result (highest non-zero byte). */
@@ -2583,7 +2592,7 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
return NULL;
n->range *= BITS_PER_UNIT;
- return source_stmt;
+ return ins_stmt;
}
namespace {
@@ -2632,7 +2641,7 @@ public:
changing of basic block. */
static bool
-bswap_replace (gimple *cur_stmt, gimple *src_stmt, tree fndecl,
+bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
tree bswap_type, tree load_type, struct symbolic_number *n,
bool bswap)
{
@@ -2641,25 +2650,31 @@ bswap_replace (gimple *cur_stmt, gimple *src_stmt, tree fndecl,
gimple *bswap_stmt;
gsi = gsi_for_stmt (cur_stmt);
- src = gimple_assign_rhs1 (src_stmt);
+ src = n->src;
tgt = gimple_assign_lhs (cur_stmt);
/* Need to load the value from memory first. */
if (n->base_addr)
{
- gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt);
+ gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
tree addr_expr, addr_tmp, val_expr, val_tmp;
tree load_offset_ptr, aligned_load_type;
gimple *addr_stmt, *load_stmt;
unsigned align;
HOST_WIDE_INT load_offset = 0;
+ basic_block ins_bb, cur_bb;
+
+ ins_bb = gimple_bb (ins_stmt);
+ cur_bb = gimple_bb (cur_stmt);
+ if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
+ return false;
align = get_object_alignment (src);
/* Move cur_stmt just before one of the load of the original
to ensure it has the same VUSE. See PR61517 for what could
go wrong. */
- if (gimple_bb (cur_stmt) != gimple_bb (src_stmt))
+ if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
gsi_move_before (&gsi, &gsi_ins);
gsi = gsi_for_stmt (cur_stmt);
@@ -2834,6 +2849,7 @@ pass_optimize_bswap::execute (function *fun)
memset (&nop_stats, 0, sizeof (nop_stats));
memset (&bswap_stats, 0, sizeof (bswap_stats));
+ calculate_dominance_info (CDI_DOMINATORS);
FOR_EACH_BB_FN (bb, fun)
{
@@ -2845,7 +2861,7 @@ pass_optimize_bswap::execute (function *fun)
variant wouldn't be detected. */
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
{
- gimple *src_stmt, *cur_stmt = gsi_stmt (gsi);
+ gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
enum tree_code code;
struct symbolic_number n;
@@ -2878,9 +2894,9 @@ pass_optimize_bswap::execute (function *fun)
continue;
}
- src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
+ ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
- if (!src_stmt)
+ if (!ins_stmt)
continue;
switch (n.range)
@@ -2914,7 +2930,7 @@ pass_optimize_bswap::execute (function *fun)
if (bswap && !fndecl && n.range != 16)
continue;
- if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
+ if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
&n, bswap))
changed = true;
}