Sorry, ignore the first attachment (2_combine_profile_multilib.patch). i always miss that Thunderbird selects the first file in the in-review folder upfront.

Best regards,

Thomas

On 13/12/16 11:43, Thomas Preudhomme wrote:
Hi,

Fix in r242869 for PR77673 (bswap loads after end of object) applies cleanly on
GCC 6 and with trivial fix for GCC 5 (gimple * in GCC 6 -> gimple in GCC 5). The
backports also bootstrap fine on x86_64-linux-gnu and testsuite shows no
regression.

ChangeLog entries are as follow:


*** gcc/ChangeLog ***

2016-12-12  Thomas Preud'homme  <thomas.preudho...@arm.com>

        Backport from mainline
        2016-11-25  Thomas Preud'homme  <thomas.preudho...@arm.com>

        PR tree-optimization/77673
        * tree-ssa-math-opts.c (struct symbolic_number): Add new src field.
        (init_symbolic_number): Initialize src field from src parameter.
        (perform_symbolic_merge): Select most dominated statement as the
        source statement.  Set src field of resulting n structure from the
        input src with the lowest address.
        (find_bswap_or_nop): Rename source_stmt into ins_stmt.
        (bswap_replace): Rename src_stmt into ins_stmt.  Initially get source
        of load from src field rather than insertion statement.  Cancel
        optimization if statement analyzed is not dominated by the insertion
        statement.
        (pass_optimize_bswap::execute): Rename src_stmt to ins_stmt.  Compute
        dominance information.


*** gcc/testsuite/ChangeLog ***

2016-12-12  Thomas Preud'homme  <thomas.preudho...@arm.com>

        Backport from mainline
        2016-11-25  Thomas Preud'homme  <thomas.preudho...@arm.com>

        PR tree-optimization/77673
        * gcc.dg/pr77673.c: New test.


Is this ok for gcc-5-branch and gcc-6-branch?

Best regards,

Thomas
diff --git a/gcc/testsuite/gcc.dg/pr77673.c b/gcc/testsuite/gcc.dg/pr77673.c
new file mode 100644
index 0000000000000000000000000000000000000000..e03bcb9284d1e5a0e496cfa547fdbab630bec04f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr77673.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-require-effective-target bswap32 } */
+/* { dg-options "-O2 -fPIC -fdump-tree-bswap" } */
+/* { dg-additional-options "-march=z900" { target s390*-*-* } } */
+
+void mach_parse_compressed(unsigned char* ptr, unsigned long int* val)
+{
+  if (ptr[0] < 0xC0U) {
+    *val = ptr[0] + ptr[1];
+    return;
+  }
+
+  *val = ((unsigned long int)(ptr[0]) << 24)
+    | ((unsigned long int)(ptr[1]) << 16)
+    | ((unsigned long int)(ptr[2]) << 8)
+    | ptr[3];
+}
+
+/* { dg-final { scan-tree-dump-not "load_dst_\\d+ =.* if \\(" "bswap" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 61b65bb824b1e90ab13e3cb3ac1b4fbbc34a3b70..3882652dcfc0e04642196243a034f1f7c1405936 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1645,6 +1645,7 @@ struct symbolic_number {
   tree base_addr;
   tree offset;
   HOST_WIDE_INT bytepos;
+  tree src;
   tree alias_set;
   tree vuse;
   unsigned HOST_WIDE_INT range;
@@ -1746,6 +1747,7 @@ init_symbolic_number (struct symbolic_number *n, tree src)
   int size;
 
   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
+  n->src = src;
 
   /* Set up the symbolic number N by setting each byte to a value between 1 and
      the byte size of rhs1.  The highest order byte is set to n->size and the
@@ -1859,6 +1861,7 @@ perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1,
       uint64_t inc;
       HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
       struct symbolic_number *toinc_n_ptr, *n_end;
+      basic_block bb1, bb2;
 
       if (!n1->base_addr || !n2->base_addr
 	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
@@ -1872,15 +1875,20 @@ perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1,
 	{
 	  n_start = n1;
 	  start_sub = n2->bytepos - n1->bytepos;
-	  source_stmt = source_stmt1;
 	}
       else
 	{
 	  n_start = n2;
 	  start_sub = n1->bytepos - n2->bytepos;
-	  source_stmt = source_stmt2;
 	}
 
+      bb1 = gimple_bb (source_stmt1);
+      bb2 = gimple_bb (source_stmt2);
+      if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
+	source_stmt = source_stmt1;
+      else
+	source_stmt = source_stmt2;
+
       /* Find the highest address at which a load is performed and
 	 compute related info.  */
       end1 = n1->bytepos + (n1->range - 1);
@@ -1937,6 +1945,7 @@ perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1,
   n->vuse = n_start->vuse;
   n->base_addr = n_start->base_addr;
   n->offset = n_start->offset;
+  n->src = n_start->src;
   n->bytepos = n_start->bytepos;
   n->type = n_start->type;
   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
@@ -2147,7 +2156,7 @@ find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
   uint64_t cmpxchg = CMPXCHG;
   uint64_t cmpnop = CMPNOP;
 
-  gimple source_stmt;
+  gimple ins_stmt;
   int limit;
 
   /* The last parameter determines the depth search limit.  It usually
@@ -2157,9 +2166,9 @@ find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
      in libgcc, and for initial shift/and operation of the src operand.  */
   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
-  source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
+  ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
 
-  if (!source_stmt)
+  if (!ins_stmt)
     return NULL;
 
   /* Find real size of result (highest non-zero byte).  */
@@ -2201,7 +2210,7 @@ find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
     return NULL;
 
   n->range *= BITS_PER_UNIT;
-  return source_stmt;
+  return ins_stmt;
 }
 
 namespace {
@@ -2250,7 +2259,7 @@ public:
    changing of basic block.  */
 
 static bool
-bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
+bswap_replace (gimple cur_stmt, gimple ins_stmt, tree fndecl, tree bswap_type,
 	       tree load_type, struct symbolic_number *n, bool bswap)
 {
   gimple_stmt_iterator gsi;
@@ -2258,18 +2267,24 @@ bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
   gimple bswap_stmt;
 
   gsi = gsi_for_stmt (cur_stmt);
-  src = gimple_assign_rhs1 (src_stmt);
+  src = n->src;
   tgt = gimple_assign_lhs (cur_stmt);
 
   /* Need to load the value from memory first.  */
   if (n->base_addr)
     {
-      gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt);
+      gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
       tree addr_expr, addr_tmp, val_expr, val_tmp;
       tree load_offset_ptr, aligned_load_type;
       gimple addr_stmt, load_stmt;
       unsigned align;
       HOST_WIDE_INT load_offset = 0;
+      basic_block ins_bb, cur_bb;
+
+      ins_bb = gimple_bb (ins_stmt);
+      cur_bb = gimple_bb (cur_stmt);
+      if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
+	return false;
 
       align = get_object_alignment (src);
       /* If the new access is smaller than the original one, we need
@@ -2301,7 +2316,7 @@ bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
       /* Move cur_stmt just before  one of the load of the original
 	 to ensure it has the same VUSE.  See PR61517 for what could
 	 go wrong.  */
-      if (gimple_bb (cur_stmt) != gimple_bb (src_stmt))
+      if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
 	reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
       gsi_move_before (&gsi, &gsi_ins);
       gsi = gsi_for_stmt (cur_stmt);
@@ -2474,6 +2489,7 @@ pass_optimize_bswap::execute (function *fun)
 
   memset (&nop_stats, 0, sizeof (nop_stats));
   memset (&bswap_stats, 0, sizeof (bswap_stats));
+  calculate_dominance_info (CDI_DOMINATORS);
 
   FOR_EACH_BB_FN (bb, fun)
     {
@@ -2485,7 +2501,7 @@ pass_optimize_bswap::execute (function *fun)
 	 variant wouldn't be detected.  */
       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
         {
-	  gimple src_stmt, cur_stmt = gsi_stmt (gsi);
+	  gimple ins_stmt, cur_stmt = gsi_stmt (gsi);
 	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
 	  enum tree_code code;
 	  struct symbolic_number n;
@@ -2518,9 +2534,9 @@ pass_optimize_bswap::execute (function *fun)
 	      continue;
 	    }
 
-	  src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
+	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
 
-	  if (!src_stmt)
+	  if (!ins_stmt)
 	    continue;
 
 	  switch (n.range)
@@ -2554,7 +2570,7 @@ pass_optimize_bswap::execute (function *fun)
 	  if (bswap && !fndecl && n.range != 16)
 	    continue;
 
-	  if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
+	  if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
 			     &n, bswap))
 	    changed = true;
 	}
diff --git a/gcc/testsuite/gcc.dg/pr77673.c b/gcc/testsuite/gcc.dg/pr77673.c
new file mode 100644
index 0000000000000000000000000000000000000000..e03bcb9284d1e5a0e496cfa547fdbab630bec04f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr77673.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-require-effective-target bswap32 } */
+/* { dg-options "-O2 -fPIC -fdump-tree-bswap" } */
+/* { dg-additional-options "-march=z900" { target s390*-*-* } } */
+
+void mach_parse_compressed(unsigned char* ptr, unsigned long int* val)
+{
+  if (ptr[0] < 0xC0U) {
+    *val = ptr[0] + ptr[1];
+    return;
+  }
+
+  *val = ((unsigned long int)(ptr[0]) << 24)
+    | ((unsigned long int)(ptr[1]) << 16)
+    | ((unsigned long int)(ptr[2]) << 8)
+    | ptr[3];
+}
+
+/* { dg-final { scan-tree-dump-not "load_dst_\\d+ =.* if \\(" "bswap" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 735b7c67c31df0c8317544346396fb8b15315879..ac15e8179a3257d1e190086c8089bc85ed8552bf 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1951,6 +1951,7 @@ struct symbolic_number {
   tree base_addr;
   tree offset;
   HOST_WIDE_INT bytepos;
+  tree src;
   tree alias_set;
   tree vuse;
   unsigned HOST_WIDE_INT range;
@@ -2052,6 +2053,7 @@ init_symbolic_number (struct symbolic_number *n, tree src)
   int size;
 
   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
+  n->src = src;
 
   /* Set up the symbolic number N by setting each byte to a value between 1 and
      the byte size of rhs1.  The highest order byte is set to n->size and the
@@ -2167,6 +2169,7 @@ perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
       uint64_t inc;
       HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
       struct symbolic_number *toinc_n_ptr, *n_end;
+      basic_block bb1, bb2;
 
       if (!n1->base_addr || !n2->base_addr
 	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
@@ -2180,15 +2183,20 @@ perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
 	{
 	  n_start = n1;
 	  start_sub = n2->bytepos - n1->bytepos;
-	  source_stmt = source_stmt1;
 	}
       else
 	{
 	  n_start = n2;
 	  start_sub = n1->bytepos - n2->bytepos;
-	  source_stmt = source_stmt2;
 	}
 
+      bb1 = gimple_bb (source_stmt1);
+      bb2 = gimple_bb (source_stmt2);
+      if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
+	source_stmt = source_stmt1;
+      else
+	source_stmt = source_stmt2;
+
       /* Find the highest address at which a load is performed and
 	 compute related info.  */
       end1 = n1->bytepos + (n1->range - 1);
@@ -2245,6 +2253,7 @@ perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
   n->vuse = n_start->vuse;
   n->base_addr = n_start->base_addr;
   n->offset = n_start->offset;
+  n->src = n_start->src;
   n->bytepos = n_start->bytepos;
   n->type = n_start->type;
   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
@@ -2455,7 +2464,7 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
   uint64_t cmpxchg = CMPXCHG;
   uint64_t cmpnop = CMPNOP;
 
-  gimple *source_stmt;
+  gimple *ins_stmt;
   int limit;
 
   /* The last parameter determines the depth search limit.  It usually
@@ -2465,9 +2474,9 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
      in libgcc, and for initial shift/and operation of the src operand.  */
   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
-  source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
+  ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
 
-  if (!source_stmt)
+  if (!ins_stmt)
     return NULL;
 
   /* Find real size of result (highest non-zero byte).  */
@@ -2509,7 +2518,7 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
     return NULL;
 
   n->range *= BITS_PER_UNIT;
-  return source_stmt;
+  return ins_stmt;
 }
 
 namespace {
@@ -2558,7 +2567,7 @@ public:
    changing of basic block.  */
 
 static bool
-bswap_replace (gimple *cur_stmt, gimple *src_stmt, tree fndecl,
+bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
 	       tree bswap_type, tree load_type, struct symbolic_number *n,
 	       bool bswap)
 {
@@ -2567,18 +2576,24 @@ bswap_replace (gimple *cur_stmt, gimple *src_stmt, tree fndecl,
   gimple *bswap_stmt;
 
   gsi = gsi_for_stmt (cur_stmt);
-  src = gimple_assign_rhs1 (src_stmt);
+  src = n->src;
   tgt = gimple_assign_lhs (cur_stmt);
 
   /* Need to load the value from memory first.  */
   if (n->base_addr)
     {
-      gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt);
+      gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
       tree addr_expr, addr_tmp, val_expr, val_tmp;
       tree load_offset_ptr, aligned_load_type;
       gimple *addr_stmt, *load_stmt;
       unsigned align;
       HOST_WIDE_INT load_offset = 0;
+      basic_block ins_bb, cur_bb;
+
+      ins_bb = gimple_bb (ins_stmt);
+      cur_bb = gimple_bb (cur_stmt);
+      if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
+	return false;
 
       align = get_object_alignment (src);
       /* If the new access is smaller than the original one, we need
@@ -2610,7 +2625,7 @@ bswap_replace (gimple *cur_stmt, gimple *src_stmt, tree fndecl,
       /* Move cur_stmt just before  one of the load of the original
 	 to ensure it has the same VUSE.  See PR61517 for what could
 	 go wrong.  */
-      if (gimple_bb (cur_stmt) != gimple_bb (src_stmt))
+      if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
 	reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
       gsi_move_before (&gsi, &gsi_ins);
       gsi = gsi_for_stmt (cur_stmt);
@@ -2783,6 +2798,7 @@ pass_optimize_bswap::execute (function *fun)
 
   memset (&nop_stats, 0, sizeof (nop_stats));
   memset (&bswap_stats, 0, sizeof (bswap_stats));
+  calculate_dominance_info (CDI_DOMINATORS);
 
   FOR_EACH_BB_FN (bb, fun)
     {
@@ -2794,7 +2810,7 @@ pass_optimize_bswap::execute (function *fun)
 	 variant wouldn't be detected.  */
       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
         {
-	  gimple *src_stmt, *cur_stmt = gsi_stmt (gsi);
+	  gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
 	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
 	  enum tree_code code;
 	  struct symbolic_number n;
@@ -2827,9 +2843,9 @@ pass_optimize_bswap::execute (function *fun)
 	      continue;
 	    }
 
-	  src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
+	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
 
-	  if (!src_stmt)
+	  if (!ins_stmt)
 	    continue;
 
 	  switch (n.range)
@@ -2863,7 +2879,7 @@ pass_optimize_bswap::execute (function *fun)
 	  if (bswap && !fndecl && n.range != 16)
 	    continue;
 
-	  if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
+	  if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
 			     &n, bswap))
 	    changed = true;
 	}

Reply via email to