On February 26, 2015 8:07:48 PM CET, Jakub Jelinek <ja...@redhat.com> wrote:
>Hi!
>
>If we cast a wider memory load on big endian to a narrower type and
>then
>byteswap that, we can't load the the narrower object from the
>address of the larger original load, because it contains the MS bytes,
>rather than LS bytes.
>
>Fixed thusly, bootstrapped/regtested on x86_64-linux, i686-linux,
>ppc64-linux and ppc64le-linux.  Ok for trunk?

OK.

Thanks,
Richard.

>2015-02-26  Jakub Jelinek  <ja...@redhat.com>
>
>       PR tree-optimization/65215
>       * tree-ssa-math-opts.c (find_bswap_or_nop_load): Return false
>       for PDP endian targets.
>       (perform_symbolic_merge, find_bswap_or_nop_1, find_bswap_or_nop):
>       Fix up formatting issues.
>       (bswap_replace): Likewise.  For BYTES_BIG_ENDIAN, if the final access
>       size is smaller than the original, adjust MEM_REF offset by the
>       difference of sizes.  Use is_gimple_mem_ref_addr instead of
>       is_gimple_min_invariant test to avoid adding address temporaries.
>
>       * gcc.c-torture/execute/pr65215-1.c: New test.
>       * gcc.c-torture/execute/pr65215-2.c: New test.
>       * gcc.c-torture/execute/pr65215-3.c: New test.
>       * gcc.c-torture/execute/pr65215-4.c: New test.
>       * gcc.c-torture/execute/pr65215-5.c: New test.
>
>--- gcc/tree-ssa-math-opts.c.jj        2015-01-28 21:24:56.000000000 +0100
>+++ gcc/tree-ssa-math-opts.c   2015-02-26 11:16:01.062024749 +0100
>@@ -1780,6 +1780,10 @@ find_bswap_or_nop_load (gimple stmt, tre
>   int unsignedp, volatilep;
>   tree offset, base_addr;
> 
>+  /* Not prepared to handle PDP endian.  */
>+  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
>+    return false;
>+
>   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
>     return false;
> 
>@@ -1860,8 +1864,8 @@ perform_symbolic_merge (gimple source_st
>         || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
>       return NULL;
> 
>-      if (!n1->offset != !n2->offset ||
>-          (n1->offset && !operand_equal_p (n1->offset, n2->offset,
>0)))
>+      if (!n1->offset != !n2->offset
>+        || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
>       return NULL;
> 
>       if (n1->bytepos < n2->bytepos)
>@@ -1912,8 +1916,8 @@ perform_symbolic_merge (gimple source_st
>       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
>       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
>       {
>-        unsigned marker =
>-          (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
>+        unsigned marker
>+          = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
>         if (marker && marker != MARKER_BYTE_UNKNOWN)
>           toinc_n_ptr->n += inc;
>       }
>@@ -2032,7 +2036,7 @@ find_bswap_or_nop_1 (gimple stmt, struct
>       case RSHIFT_EXPR:
>       case LROTATE_EXPR:
>       case RROTATE_EXPR:
>-        if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
>+        if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
>           return NULL;
>         break;
>       CASE_CONVERT:
>@@ -2104,12 +2108,12 @@ find_bswap_or_nop_1 (gimple stmt, struct
>         if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
>           return NULL;
> 
>-        if (!n1.vuse != !n2.vuse ||
>-        (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
>+        if (!n1.vuse != !n2.vuse
>+            || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
>           return NULL;
> 
>-        source_stmt =
>-          perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
>+        source_stmt
>+          = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2,
>n);
> 
>         if (!source_stmt)
>           return NULL;
>@@ -2153,12 +2157,12 @@ find_bswap_or_nop (gimple stmt, struct s
> in libgcc, and for initial shift/and operation of the src operand.  */
>   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
>   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
>-  source_stmt =  find_bswap_or_nop_1 (stmt, n, limit);
>+  source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
> 
>   if (!source_stmt)
>     return NULL;
> 
>-  /* Find real size of result (highest non zero byte).  */
>+  /* Find real size of result (highest non-zero byte).  */
>   if (n->base_addr)
>     {
>       int rsize;
>@@ -2261,8 +2265,30 @@ bswap_replace (gimple cur_stmt, gimple s
>       tree load_offset_ptr, aligned_load_type;
>       gimple addr_stmt, load_stmt;
>       unsigned align;
>+      HOST_WIDE_INT load_offset = 0;
> 
>       align = get_object_alignment (src);
>+      /* If the new access is smaller than the original one, we need
>+       to perform big endian adjustment.  */
>+      if (BYTES_BIG_ENDIAN)
>+      {
>+        HOST_WIDE_INT bitsize, bitpos;
>+        machine_mode mode;
>+        int unsignedp, volatilep;
>+        tree offset;
>+
>+        get_inner_reference (src, &bitsize, &bitpos, &offset, &mode,
>+                             &unsignedp, &volatilep, false);
>+        if (n->range < (unsigned HOST_WIDE_INT) bitsize)
>+          {
>+            load_offset = (bitsize - n->range) / BITS_PER_UNIT;
>+            unsigned HOST_WIDE_INT l
>+              = (load_offset * BITS_PER_UNIT) & (align - 1);
>+            if (l)
>+              align = l & -l;
>+          }
>+      }
>+
>       if (bswap
>         && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
>         && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
>@@ -2274,10 +2300,10 @@ bswap_replace (gimple cur_stmt, gimple s
>       gsi_move_before (&gsi, &gsi_ins);
>       gsi = gsi_for_stmt (cur_stmt);
> 
>-      /*  Compute address to load from and cast according to the size
>-        of the load.  */
>+      /* Compute address to load from and cast according to the size
>+       of the load.  */
>       addr_expr = build_fold_addr_expr (unshare_expr (src));
>-      if (is_gimple_min_invariant (addr_expr))
>+      if (is_gimple_mem_ref_addr (addr_expr))
>       addr_tmp = addr_expr;
>       else
>       {
>@@ -2291,7 +2317,7 @@ bswap_replace (gimple cur_stmt, gimple s
>       aligned_load_type = load_type;
>       if (align < TYPE_ALIGN (load_type))
>       aligned_load_type = build_aligned_type (load_type, align);
>-      load_offset_ptr = build_int_cst (n->alias_set, 0);
>+      load_offset_ptr = build_int_cst (n->alias_set, load_offset);
>       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
>                             load_offset_ptr);
> 
>@@ -2328,7 +2354,7 @@ bswap_replace (gimple cur_stmt, gimple s
>           {
>             fprintf (dump_file,
>                      "%d bit load in target endianness found at: ",
>-                     (int)n->range);
>+                     (int) n->range);
>             print_gimple_stmt (dump_file, cur_stmt, 0, 0);
>           }
>         return true;
>@@ -2395,7 +2421,7 @@ bswap_replace (gimple cur_stmt, gimple s
>   if (dump_file)
>     {
>       fprintf (dump_file, "%d bit bswap implementation found at: ",
>-             (int)n->range);
>+             (int) n->range);
>       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
>     }
> 
>--- gcc/testsuite/gcc.c-torture/execute/pr65215-1.c.jj 2015-02-26
>10:46:29.102441519 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65215-1.c    2015-02-26
>10:44:39.000000000 +0100
>@@ -0,0 +1,24 @@
>+/* PR tree-optimization/65215 */
>+
>+static inline unsigned int
>+foo (unsigned int x)
>+{
>+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x
><< 24);
>+}
>+
>+__attribute__((noinline, noclone)) unsigned int
>+bar (unsigned long long *x)
>+{
>+  return foo (*x);
>+}
>+
>+int
>+main ()
>+{
>+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof
>(unsigned long long) != 8)
>+    return 0;
>+  unsigned long long l = foo (0xdeadbeefU) | 0xfeedbea800000000ULL;
>+  if (bar (&l) != 0xdeadbeefU)
>+    __builtin_abort ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr65215-2.c.jj 2015-02-26
>10:46:31.524401403 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65215-2.c    2015-02-26
>10:45:15.000000000 +0100
>@@ -0,0 +1,24 @@
>+/* PR tree-optimization/65215 */
>+
>+static inline unsigned int
>+foo (unsigned int x)
>+{
>+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x
><< 24);
>+}
>+
>+__attribute__((noinline, noclone)) unsigned long long
>+bar (unsigned long long *x)
>+{
>+  return ((unsigned long long) foo (*x) << 32) | foo (*x >> 32);
>+}
>+
>+int
>+main ()
>+{
>+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof
>(unsigned long long) != 8)
>+    return 0;
>+  unsigned long long l = foo (0xfeedbea8U) | ((unsigned long long) foo
>(0xdeadbeefU) << 32);
>+  if (bar (&l) != 0xfeedbea8deadbeefULL)
>+    __builtin_abort ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr65215-3.c.jj 2015-02-26
>10:46:33.463369288 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65215-3.c    2015-02-26
>10:45:37.000000000 +0100
>@@ -0,0 +1,31 @@
>+/* PR tree-optimization/65215 */
>+
>+struct S { unsigned long long l1 : 24, l2 : 8, l3 : 32; };
>+
>+static inline unsigned int
>+foo (unsigned int x)
>+{
>+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x
><< 24);
>+}
>+
>+__attribute__((noinline, noclone)) unsigned long long
>+bar (struct S *x)
>+{
>+  unsigned long long x1 = foo (((unsigned int) x->l1 << 8) | x->l2);
>+  unsigned long long x2 = foo (x->l3);
>+  return (x2 << 32) | x1;
>+}
>+
>+int
>+main ()
>+{
>+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof
>(unsigned long long) != 8)
>+    return 0;
>+  struct S s = { 0xdeadbeU, 0xefU, 0xfeedbea8U };
>+  unsigned long long l = bar (&s);
>+  if (foo (l >> 32) != s.l3
>+      || (foo (l) >> 8) != s.l1
>+      || (foo (l) & 0xff) != s.l2)
>+    __builtin_abort ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr65215-4.c.jj 2015-02-26
>10:46:35.438336576 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65215-4.c    2015-02-26
>10:45:46.000000000 +0100
>@@ -0,0 +1,27 @@
>+/* PR tree-optimization/65215 */
>+
>+struct S { unsigned long long l1 : 48; };
>+
>+static inline unsigned int
>+foo (unsigned int x)
>+{
>+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x
><< 24);
>+}
>+
>+__attribute__((noinline, noclone)) unsigned int
>+bar (struct S *x)
>+{
>+  return foo (x->l1);
>+}
>+
>+int
>+main ()
>+{
>+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof
>(unsigned long long) != 8)
>+    return 0;
>+  struct S s;
>+  s.l1 = foo (0xdeadbeefU) | (0xfeedULL << 32);
>+  if (bar (&s) != 0xdeadbeefU)
>+    __builtin_abort ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr65215-5.c.jj 2015-02-26
>11:14:44.664298719 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65215-5.c    2015-02-26
>11:12:27.000000000 +0100
>@@ -0,0 +1,27 @@
>+/* PR tree-optimization/65215 */
>+
>+__attribute__((noinline, noclone)) unsigned int
>+foo (unsigned char *p)
>+{
>+  return ((unsigned int) p[0] << 24) | (p[1] << 16) | (p[2] << 8) |
>p[3];
>+}
>+
>+__attribute__((noinline, noclone)) unsigned int
>+bar (unsigned char *p)
>+{
>+  return ((unsigned int) p[3] << 24) | (p[2] << 16) | (p[1] << 8) |
>p[0];
>+}
>+
>+struct S { unsigned int a; unsigned char b[5]; };
>+
>+int
>+main ()
>+{
>+  struct S s = { 1, { 2, 3, 4, 5, 6 } };
>+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4)
>+    return 0;
>+  if (foo (&s.b[1]) != 0x03040506U
>+      || bar (&s.b[1]) != 0x06050403U)
>+    __builtin_abort ();
>+  return 0;
>+}
>
>       Jakub


Reply via email to