Hi!

If we cast a wider memory load on big endian to a narrower type and then
byteswap that, we can't load the the narrower object from the
address of the larger original load, because it contains the MS bytes,
rather than LS bytes.

Fixed thusly, bootstrapped/regtested on x86_64-linux, i686-linux,
ppc64-linux and ppc64le-linux.  Ok for trunk?

2015-02-26  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/65215
        * tree-ssa-math-opts.c (find_bswap_or_nop_load): Return false
        for PDP endian targets.
        (perform_symbolic_merge, find_bswap_or_nop_1, find_bswap_or_nop):
        Fix up formatting issues.
        (bswap_replace): Likewise.  For BYTES_BIG_ENDIAN, if the final access
        size is smaller than the original, adjust MEM_REF offset by the
        difference of sizes.  Use is_gimple_mem_ref_addr instead of
        is_gimple_min_invariant test to avoid adding address temporaries.

        * gcc.c-torture/execute/pr65215-1.c: New test.
        * gcc.c-torture/execute/pr65215-2.c: New test.
        * gcc.c-torture/execute/pr65215-3.c: New test.
        * gcc.c-torture/execute/pr65215-4.c: New test.
        * gcc.c-torture/execute/pr65215-5.c: New test.

--- gcc/tree-ssa-math-opts.c.jj 2015-01-28 21:24:56.000000000 +0100
+++ gcc/tree-ssa-math-opts.c    2015-02-26 11:16:01.062024749 +0100
@@ -1780,6 +1780,10 @@ find_bswap_or_nop_load (gimple stmt, tre
   int unsignedp, volatilep;
   tree offset, base_addr;
 
+  /* Not prepared to handle PDP endian.  */
+  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
+    return false;
+
   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
     return false;
 
@@ -1860,8 +1864,8 @@ perform_symbolic_merge (gimple source_st
          || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
        return NULL;
 
-      if (!n1->offset != !n2->offset ||
-          (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
+      if (!n1->offset != !n2->offset
+         || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
        return NULL;
 
       if (n1->bytepos < n2->bytepos)
@@ -1912,8 +1916,8 @@ perform_symbolic_merge (gimple source_st
       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
        {
-         unsigned marker =
-           (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
+         unsigned marker
+           = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
          if (marker && marker != MARKER_BYTE_UNKNOWN)
            toinc_n_ptr->n += inc;
        }
@@ -2032,7 +2036,7 @@ find_bswap_or_nop_1 (gimple stmt, struct
        case RSHIFT_EXPR:
        case LROTATE_EXPR:
        case RROTATE_EXPR:
-         if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
+         if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
            return NULL;
          break;
        CASE_CONVERT:
@@ -2104,12 +2108,12 @@ find_bswap_or_nop_1 (gimple stmt, struct
          if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
            return NULL;
 
-         if (!n1.vuse != !n2.vuse ||
-         (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
+         if (!n1.vuse != !n2.vuse
+             || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
            return NULL;
 
-         source_stmt =
-           perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
+         source_stmt
+           = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
 
          if (!source_stmt)
            return NULL;
@@ -2153,12 +2157,12 @@ find_bswap_or_nop (gimple stmt, struct s
      in libgcc, and for initial shift/and operation of the src operand.  */
   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
-  source_stmt =  find_bswap_or_nop_1 (stmt, n, limit);
+  source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
 
   if (!source_stmt)
     return NULL;
 
-  /* Find real size of result (highest non zero byte).  */
+  /* Find real size of result (highest non-zero byte).  */
   if (n->base_addr)
     {
       int rsize;
@@ -2261,8 +2265,30 @@ bswap_replace (gimple cur_stmt, gimple s
       tree load_offset_ptr, aligned_load_type;
       gimple addr_stmt, load_stmt;
       unsigned align;
+      HOST_WIDE_INT load_offset = 0;
 
       align = get_object_alignment (src);
+      /* If the new access is smaller than the original one, we need
+        to perform big endian adjustment.  */
+      if (BYTES_BIG_ENDIAN)
+       {
+         HOST_WIDE_INT bitsize, bitpos;
+         machine_mode mode;
+         int unsignedp, volatilep;
+         tree offset;
+
+         get_inner_reference (src, &bitsize, &bitpos, &offset, &mode,
+                              &unsignedp, &volatilep, false);
+         if (n->range < (unsigned HOST_WIDE_INT) bitsize)
+           {
+             load_offset = (bitsize - n->range) / BITS_PER_UNIT;
+             unsigned HOST_WIDE_INT l
+               = (load_offset * BITS_PER_UNIT) & (align - 1);
+             if (l)
+               align = l & -l;
+           }
+       }
+
       if (bswap
          && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
          && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
@@ -2274,10 +2300,10 @@ bswap_replace (gimple cur_stmt, gimple s
       gsi_move_before (&gsi, &gsi_ins);
       gsi = gsi_for_stmt (cur_stmt);
 
-      /*  Compute address to load from and cast according to the size
-         of the load.  */
+      /* Compute address to load from and cast according to the size
+        of the load.  */
       addr_expr = build_fold_addr_expr (unshare_expr (src));
-      if (is_gimple_min_invariant (addr_expr))
+      if (is_gimple_mem_ref_addr (addr_expr))
        addr_tmp = addr_expr;
       else
        {
@@ -2291,7 +2317,7 @@ bswap_replace (gimple cur_stmt, gimple s
       aligned_load_type = load_type;
       if (align < TYPE_ALIGN (load_type))
        aligned_load_type = build_aligned_type (load_type, align);
-      load_offset_ptr = build_int_cst (n->alias_set, 0);
+      load_offset_ptr = build_int_cst (n->alias_set, load_offset);
       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
                              load_offset_ptr);
 
@@ -2328,7 +2354,7 @@ bswap_replace (gimple cur_stmt, gimple s
            {
              fprintf (dump_file,
                       "%d bit load in target endianness found at: ",
-                      (int)n->range);
+                      (int) n->range);
              print_gimple_stmt (dump_file, cur_stmt, 0, 0);
            }
          return true;
@@ -2395,7 +2421,7 @@ bswap_replace (gimple cur_stmt, gimple s
   if (dump_file)
     {
       fprintf (dump_file, "%d bit bswap implementation found at: ",
-              (int)n->range);
+              (int) n->range);
       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
     }
 
--- gcc/testsuite/gcc.c-torture/execute/pr65215-1.c.jj  2015-02-26 
10:46:29.102441519 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65215-1.c     2015-02-26 
10:44:39.000000000 +0100
@@ -0,0 +1,24 @@
+/* PR tree-optimization/65215 */
+
+static inline unsigned int
+foo (unsigned int x)
+{
+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+}
+
+__attribute__((noinline, noclone)) unsigned int
+bar (unsigned long long *x)
+{
+  return foo (*x);
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long 
long) != 8)
+    return 0;
+  unsigned long long l = foo (0xdeadbeefU) | 0xfeedbea800000000ULL;
+  if (bar (&l) != 0xdeadbeefU)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr65215-2.c.jj  2015-02-26 
10:46:31.524401403 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65215-2.c     2015-02-26 
10:45:15.000000000 +0100
@@ -0,0 +1,24 @@
+/* PR tree-optimization/65215 */
+
+static inline unsigned int
+foo (unsigned int x)
+{
+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+}
+
+__attribute__((noinline, noclone)) unsigned long long
+bar (unsigned long long *x)
+{
+  return ((unsigned long long) foo (*x) << 32) | foo (*x >> 32);
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long 
long) != 8)
+    return 0;
+  unsigned long long l = foo (0xfeedbea8U) | ((unsigned long long) foo 
(0xdeadbeefU) << 32);
+  if (bar (&l) != 0xfeedbea8deadbeefULL)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr65215-3.c.jj  2015-02-26 
10:46:33.463369288 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65215-3.c     2015-02-26 
10:45:37.000000000 +0100
@@ -0,0 +1,31 @@
+/* PR tree-optimization/65215 */
+
+struct S { unsigned long long l1 : 24, l2 : 8, l3 : 32; };
+
+static inline unsigned int
+foo (unsigned int x)
+{
+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+}
+
+__attribute__((noinline, noclone)) unsigned long long
+bar (struct S *x)
+{
+  unsigned long long x1 = foo (((unsigned int) x->l1 << 8) | x->l2);
+  unsigned long long x2 = foo (x->l3);
+  return (x2 << 32) | x1;
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long 
long) != 8)
+    return 0;
+  struct S s = { 0xdeadbeU, 0xefU, 0xfeedbea8U };
+  unsigned long long l = bar (&s);
+  if (foo (l >> 32) != s.l3
+      || (foo (l) >> 8) != s.l1
+      || (foo (l) & 0xff) != s.l2)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr65215-4.c.jj  2015-02-26 
10:46:35.438336576 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65215-4.c     2015-02-26 
10:45:46.000000000 +0100
@@ -0,0 +1,27 @@
+/* PR tree-optimization/65215 */
+
+struct S { unsigned long long l1 : 48; };
+
+static inline unsigned int
+foo (unsigned int x)
+{
+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+}
+
+__attribute__((noinline, noclone)) unsigned int
+bar (struct S *x)
+{
+  return foo (x->l1);
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long 
long) != 8)
+    return 0;
+  struct S s;
+  s.l1 = foo (0xdeadbeefU) | (0xfeedULL << 32);
+  if (bar (&s) != 0xdeadbeefU)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr65215-5.c.jj  2015-02-26 
11:14:44.664298719 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65215-5.c     2015-02-26 
11:12:27.000000000 +0100
@@ -0,0 +1,27 @@
+/* PR tree-optimization/65215 */
+
+__attribute__((noinline, noclone)) unsigned int
+foo (unsigned char *p)
+{
+  return ((unsigned int) p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
+}
+
+__attribute__((noinline, noclone)) unsigned int
+bar (unsigned char *p)
+{
+  return ((unsigned int) p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
+}
+
+struct S { unsigned int a; unsigned char b[5]; };
+
+int
+main ()
+{
+  struct S s = { 1, { 2, 3, 4, 5, 6 } };
+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4)
+    return 0;
+  if (foo (&s.b[1]) != 0x03040506U
+      || bar (&s.b[1]) != 0x06050403U)
+    __builtin_abort ();
+  return 0;
+}

        Jakub

Reply via email to