On February 26, 2015 8:07:48 PM CET, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >If we cast a wider memory load on big endian to a narrower type and >then >byteswap that, we can't load the the narrower object from the >address of the larger original load, because it contains the MS bytes, >rather than LS bytes. > >Fixed thusly, bootstrapped/regtested on x86_64-linux, i686-linux, >ppc64-linux and ppc64le-linux. Ok for trunk?
OK. Thanks, Richard. >2015-02-26 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/65215 > * tree-ssa-math-opts.c (find_bswap_or_nop_load): Return false > for PDP endian targets. > (perform_symbolic_merge, find_bswap_or_nop_1, find_bswap_or_nop): > Fix up formatting issues. > (bswap_replace): Likewise. For BYTES_BIG_ENDIAN, if the final access > size is smaller than the original, adjust MEM_REF offset by the > difference of sizes. Use is_gimple_mem_ref_addr instead of > is_gimple_min_invariant test to avoid adding address temporaries. > > * gcc.c-torture/execute/pr65215-1.c: New test. > * gcc.c-torture/execute/pr65215-2.c: New test. > * gcc.c-torture/execute/pr65215-3.c: New test. > * gcc.c-torture/execute/pr65215-4.c: New test. > * gcc.c-torture/execute/pr65215-5.c: New test. > >--- gcc/tree-ssa-math-opts.c.jj 2015-01-28 21:24:56.000000000 +0100 >+++ gcc/tree-ssa-math-opts.c 2015-02-26 11:16:01.062024749 +0100 >@@ -1780,6 +1780,10 @@ find_bswap_or_nop_load (gimple stmt, tre > int unsignedp, volatilep; > tree offset, base_addr; > >+ /* Not prepared to handle PDP endian. */ >+ if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) >+ return false; >+ > if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt)) > return false; > >@@ -1860,8 +1864,8 @@ perform_symbolic_merge (gimple source_st > || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) > return NULL; > >- if (!n1->offset != !n2->offset || >- (n1->offset && !operand_equal_p (n1->offset, n2->offset, >0))) >+ if (!n1->offset != !n2->offset >+ || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) > return NULL; > > if (n1->bytepos < n2->bytepos) >@@ -1912,8 +1916,8 @@ perform_symbolic_merge (gimple source_st > size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT; > for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) > { >- unsigned marker = >- (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; >+ unsigned marker >+ = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; > if (marker && marker != MARKER_BYTE_UNKNOWN) > toinc_n_ptr->n += inc; > } >@@ -2032,7 +2036,7 @@ find_bswap_or_nop_1 (gimple stmt, struct > case RSHIFT_EXPR: > case LROTATE_EXPR: > case RROTATE_EXPR: >- if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2))) >+ if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2))) > return NULL; > break; > CASE_CONVERT: >@@ -2104,12 +2108,12 @@ find_bswap_or_nop_1 (gimple stmt, struct > if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) > return NULL; > >- if (!n1.vuse != !n2.vuse || >- (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) >+ if (!n1.vuse != !n2.vuse >+ || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) > return NULL; > >- source_stmt = >- perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n); >+ source_stmt >+ = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, >n); > > if (!source_stmt) > return NULL; >@@ -2153,12 +2157,12 @@ find_bswap_or_nop (gimple stmt, struct s > in libgcc, and for initial shift/and operation of the src operand. */ > limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); > limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); >- source_stmt = find_bswap_or_nop_1 (stmt, n, limit); >+ source_stmt = find_bswap_or_nop_1 (stmt, n, limit); > > if (!source_stmt) > return NULL; > >- /* Find real size of result (highest non zero byte). */ >+ /* Find real size of result (highest non-zero byte). */ > if (n->base_addr) > { > int rsize; >@@ -2261,8 +2265,30 @@ bswap_replace (gimple cur_stmt, gimple s > tree load_offset_ptr, aligned_load_type; > gimple addr_stmt, load_stmt; > unsigned align; >+ HOST_WIDE_INT load_offset = 0; > > align = get_object_alignment (src); >+ /* If the new access is smaller than the original one, we need >+ to perform big endian adjustment. */ >+ if (BYTES_BIG_ENDIAN) >+ { >+ HOST_WIDE_INT bitsize, bitpos; >+ machine_mode mode; >+ int unsignedp, volatilep; >+ tree offset; >+ >+ get_inner_reference (src, &bitsize, &bitpos, &offset, &mode, >+ &unsignedp, &volatilep, false); >+ if (n->range < (unsigned HOST_WIDE_INT) bitsize) >+ { >+ load_offset = (bitsize - n->range) / BITS_PER_UNIT; >+ unsigned HOST_WIDE_INT l >+ = (load_offset * BITS_PER_UNIT) & (align - 1); >+ if (l) >+ align = l & -l; >+ } >+ } >+ > if (bswap > && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type)) > && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align)) >@@ -2274,10 +2300,10 @@ bswap_replace (gimple cur_stmt, gimple s > gsi_move_before (&gsi, &gsi_ins); > gsi = gsi_for_stmt (cur_stmt); > >- /* Compute address to load from and cast according to the size >- of the load. */ >+ /* Compute address to load from and cast according to the size >+ of the load. */ > addr_expr = build_fold_addr_expr (unshare_expr (src)); >- if (is_gimple_min_invariant (addr_expr)) >+ if (is_gimple_mem_ref_addr (addr_expr)) > addr_tmp = addr_expr; > else > { >@@ -2291,7 +2317,7 @@ bswap_replace (gimple cur_stmt, gimple s > aligned_load_type = load_type; > if (align < TYPE_ALIGN (load_type)) > aligned_load_type = build_aligned_type (load_type, align); >- load_offset_ptr = build_int_cst (n->alias_set, 0); >+ load_offset_ptr = build_int_cst (n->alias_set, load_offset); > val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp, > load_offset_ptr); > >@@ -2328,7 +2354,7 @@ bswap_replace (gimple cur_stmt, gimple s > { > fprintf (dump_file, > "%d bit load in target endianness found at: ", >- (int)n->range); >+ (int) n->range); > print_gimple_stmt (dump_file, cur_stmt, 0, 0); > } > return true; >@@ -2395,7 +2421,7 @@ bswap_replace (gimple cur_stmt, gimple s > if (dump_file) > { > fprintf (dump_file, "%d bit bswap implementation found at: ", >- (int)n->range); >+ (int) n->range); > print_gimple_stmt (dump_file, cur_stmt, 0, 0); > } > >--- gcc/testsuite/gcc.c-torture/execute/pr65215-1.c.jj 2015-02-26 >10:46:29.102441519 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr65215-1.c 2015-02-26 >10:44:39.000000000 +0100 >@@ -0,0 +1,24 @@ >+/* PR tree-optimization/65215 */ >+ >+static inline unsigned int >+foo (unsigned int x) >+{ >+ return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x ><< 24); >+} >+ >+__attribute__((noinline, noclone)) unsigned int >+bar (unsigned long long *x) >+{ >+ return foo (*x); >+} >+ >+int >+main () >+{ >+ if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof >(unsigned long long) != 8) >+ return 0; >+ unsigned long long l = foo (0xdeadbeefU) | 0xfeedbea800000000ULL; >+ if (bar (&l) != 0xdeadbeefU) >+ __builtin_abort (); >+ return 0; >+} >--- gcc/testsuite/gcc.c-torture/execute/pr65215-2.c.jj 2015-02-26 >10:46:31.524401403 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr65215-2.c 2015-02-26 >10:45:15.000000000 +0100 >@@ -0,0 +1,24 @@ >+/* PR tree-optimization/65215 */ >+ >+static inline unsigned int >+foo (unsigned int x) >+{ >+ return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x ><< 24); >+} >+ >+__attribute__((noinline, noclone)) unsigned long long >+bar (unsigned long long *x) >+{ >+ return ((unsigned long long) foo (*x) << 32) | foo (*x >> 32); >+} >+ >+int >+main () >+{ >+ if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof >(unsigned long long) != 8) >+ return 0; >+ unsigned long long l = foo (0xfeedbea8U) | ((unsigned long long) foo >(0xdeadbeefU) << 32); >+ if (bar (&l) != 0xfeedbea8deadbeefULL) >+ __builtin_abort (); >+ return 0; >+} >--- gcc/testsuite/gcc.c-torture/execute/pr65215-3.c.jj 2015-02-26 >10:46:33.463369288 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr65215-3.c 2015-02-26 >10:45:37.000000000 +0100 >@@ -0,0 +1,31 @@ >+/* PR tree-optimization/65215 */ >+ >+struct S { unsigned long long l1 : 24, l2 : 8, l3 : 32; }; >+ >+static inline unsigned int >+foo (unsigned int x) >+{ >+ return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x ><< 24); >+} >+ >+__attribute__((noinline, noclone)) unsigned long long >+bar (struct S *x) >+{ >+ unsigned long long x1 = foo (((unsigned int) x->l1 << 8) | x->l2); >+ unsigned long long x2 = foo (x->l3); >+ return (x2 << 32) | x1; >+} >+ >+int >+main () >+{ >+ if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof >(unsigned long long) != 8) >+ return 0; >+ struct S s = { 0xdeadbeU, 0xefU, 0xfeedbea8U }; >+ unsigned long long l = bar (&s); >+ if (foo (l >> 32) != s.l3 >+ || (foo (l) >> 8) != s.l1 >+ || (foo (l) & 0xff) != s.l2) >+ __builtin_abort (); >+ return 0; >+} >--- gcc/testsuite/gcc.c-torture/execute/pr65215-4.c.jj 2015-02-26 >10:46:35.438336576 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr65215-4.c 2015-02-26 >10:45:46.000000000 +0100 >@@ -0,0 +1,27 @@ >+/* PR tree-optimization/65215 */ >+ >+struct S { unsigned long long l1 : 48; }; >+ >+static inline unsigned int >+foo (unsigned int x) >+{ >+ return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x ><< 24); >+} >+ >+__attribute__((noinline, noclone)) unsigned int >+bar (struct S *x) >+{ >+ return foo (x->l1); >+} >+ >+int >+main () >+{ >+ if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof >(unsigned long long) != 8) >+ return 0; >+ struct S s; >+ s.l1 = foo (0xdeadbeefU) | (0xfeedULL << 32); >+ if (bar (&s) != 0xdeadbeefU) >+ __builtin_abort (); >+ return 0; >+} >--- gcc/testsuite/gcc.c-torture/execute/pr65215-5.c.jj 2015-02-26 >11:14:44.664298719 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr65215-5.c 2015-02-26 >11:12:27.000000000 +0100 >@@ -0,0 +1,27 @@ >+/* PR tree-optimization/65215 */ >+ >+__attribute__((noinline, noclone)) unsigned int >+foo (unsigned char *p) >+{ >+ return ((unsigned int) p[0] << 24) | (p[1] << 16) | (p[2] << 8) | >p[3]; >+} >+ >+__attribute__((noinline, noclone)) unsigned int >+bar (unsigned char *p) >+{ >+ return ((unsigned int) p[3] << 24) | (p[2] << 16) | (p[1] << 8) | >p[0]; >+} >+ >+struct S { unsigned int a; unsigned char b[5]; }; >+ >+int >+main () >+{ >+ struct S s = { 1, { 2, 3, 4, 5, 6 } }; >+ if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4) >+ return 0; >+ if (foo (&s.b[1]) != 0x03040506U >+ || bar (&s.b[1]) != 0x06050403U) >+ __builtin_abort (); >+ return 0; >+} > > Jakub