On Thu, 27 Jul 2017, Andrew Pinski wrote: > On Thu, Jul 27, 2017 at 6:50 AM, Richard Biener <rguent...@suse.de> wrote: > > > > I am testing the following additional pattern for match.pd to fix > > PR81502 resulting in the desired optimization to > > > > bar: > > .LFB526: > > .cfi_startproc > > movl %edi, %eax > > ret > > > > the pattern optimizes a BIT_FIELD_REF on a BIT_INSERT_EXPR by > > either extracting from the destination or the inserted value. > > Note this optimization pattern was on my list to implement for > bit-field optimizations after lowering.
Had to do some adjustments and ended up enforcing bitsizetype operands for BIT_INSERT_EXPR and BIT_FIELD_REF. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2017-07-28 Richard Biener <rguent...@suse.de> PR tree-optimization/81502 * match.pd: Add pattern combining BIT_INSERT_EXPR with BIT_FIELD_REF. * tree-cfg.c (verify_expr): Verify types of BIT_FIELD_REF size/pos operands. (verify_gimple_assign_ternary): Likewise for BIT_INSERT_EXPR pos. * gimple-fold.c (maybe_canonicalize_mem_ref_addr): Use bitsizetype for BIT_FIELD_REF args. * fold-const.c (make_bit_field_ref): Likewise. * tree-vect-stmts.c (vectorizable_simd_clone_call): Likewise. * gcc.target/i386/pr81502.c: New testcase. Index: gcc/testsuite/gcc.target/i386/pr81502.c =================================================================== *** gcc/testsuite/gcc.target/i386/pr81502.c (nonexistent) --- gcc/testsuite/gcc.target/i386/pr81502.c (working copy) *************** *** 0 **** --- 1,34 ---- + /* { dg-do compile { target lp64 } } */ + /* { dg-options "-O2 -msse2" } */ + + #include <emmintrin.h> + + #define SIZE (sizeof (void *)) + + static int foo(unsigned char (*foo)[SIZE]) + { + __m128i acc = _mm_set_epi32(0, 0, 0, 0); + size_t i = 0; + for(; i + sizeof(__m128i) <= SIZE; i += sizeof(__m128i)) { + __m128i word; + __builtin_memcpy(&word, foo + i, sizeof(__m128i)); + acc = _mm_add_epi32(word, acc); + } + if (i != SIZE) { + __m128i word = _mm_set_epi32(0, 0, 0, 0); + __builtin_memcpy(&word, foo + i, SIZE - i); // (1) + acc = _mm_add_epi32(word, acc); + } + int res; + __builtin_memcpy(&res, &acc, sizeof(res)); + return res; + } + + int bar(void *ptr) + { + unsigned char buf[SIZE]; + __builtin_memcpy(buf, &ptr, SIZE); + return foo((unsigned char(*)[SIZE])buf); + } + + /* { dg-final { scan-assembler-times "mov" 1 } } */ Index: gcc/match.pd =================================================================== --- gcc/match.pd (revision 250625) +++ gcc/match.pd (working copy) @@ -4178,3 +4178,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) { CONSTRUCTOR_ELT (ctor, idx / k)->value; }) (BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; } @1 { bitsize_int ((idx % k) * width); }))))))))) + +/* Simplify a bit extraction from a bit insertion for the cases with + the inserted element fully covering the extraction or the insertion + not touching the extraction. */ +(simplify + (BIT_FIELD_REF (bit_insert @0 @1 @ipos) @rsize @rpos) + (with + { + unsigned HOST_WIDE_INT isize; + if (INTEGRAL_TYPE_P (TREE_TYPE (@1))) + isize = TYPE_PRECISION (TREE_TYPE (@1)); + else + isize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (@1))); + } + (switch + (if (wi::leu_p (@ipos, @rpos) + && wi::leu_p (wi::add (@rpos, @rsize), wi::add (@ipos, isize))) + (BIT_FIELD_REF @1 @rsize { wide_int_to_tree (bitsizetype, + wi::sub (@rpos, @ipos)); })) + (if (wi::geu_p (@ipos, wi::add (@rpos, @rsize)) + || wi::geu_p (@rpos, wi::add (@ipos, isize))) + (BIT_FIELD_REF @0 @rsize @rpos))))) Index: gcc/gimple-fold.c =================================================================== --- gcc/gimple-fold.c (revision 250620) +++ gcc/gimple-fold.c (working copy) @@ -4245,7 +4245,7 @@ maybe_canonicalize_mem_ref_addr (tree *t TREE_TYPE (*t), TREE_OPERAND (TREE_OPERAND (*t, 0), 0), TYPE_SIZE (TREE_TYPE (*t)), - wide_int_to_tree (sizetype, idx)); + wide_int_to_tree (bitsizetype, idx)); res = true; } } Index: gcc/tree-cfg.c =================================================================== --- gcc/tree-cfg.c (revision 250620) +++ gcc/tree-cfg.c (working copy) @@ -3053,7 +3053,9 @@ verify_expr (tree *tp, int *walk_subtree tree t1 = TREE_OPERAND (t, 1); tree t2 = TREE_OPERAND (t, 2); if (!tree_fits_uhwi_p (t1) - || !tree_fits_uhwi_p (t2)) + || !tree_fits_uhwi_p (t2) + || !types_compatible_p (bitsizetype, TREE_TYPE (t1)) + || !types_compatible_p (bitsizetype, TREE_TYPE (t2))) { error ("invalid position or size operand to BIT_FIELD_REF"); return t; @@ -4247,6 +4249,7 @@ verify_gimple_assign_ternary (gassign *s return true; } if (! tree_fits_uhwi_p (rhs3) + || ! types_compatible_p (bitsizetype, TREE_TYPE (rhs3)) || ! tree_fits_uhwi_p (TYPE_SIZE (rhs2_type))) { error ("invalid position or size in BIT_INSERT_EXPR"); Index: gcc/fold-const.c =================================================================== --- gcc/fold-const.c (revision 250625) +++ gcc/fold-const.c (working copy) @@ -3936,7 +3936,7 @@ make_bit_field_ref (location_t loc, tree bftype = build_nonstandard_integer_type (bitsize, 0); result = build3_loc (loc, BIT_FIELD_REF, bftype, inner, - size_int (bitsize), bitsize_int (bitpos)); + bitsize_int (bitsize), bitsize_int (bitpos)); REF_REVERSE_STORAGE_ORDER (result) = reversep; if (bftype != type) Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c (revision 250625) +++ gcc/tree-vect-stmts.c (working copy) @@ -3531,7 +3531,7 @@ vectorizable_simd_clone_call (gimple *st arginfo[i].op = vec_oprnd0; vec_oprnd0 = build3 (BIT_FIELD_REF, atype, vec_oprnd0, - size_int (prec), + bitsize_int (prec), bitsize_int ((m & (k - 1)) * prec)); new_stmt = gimple_build_assign (make_ssa_name (atype), @@ -3692,7 +3692,7 @@ vectorizable_simd_clone_call (gimple *st } else t = build3 (BIT_FIELD_REF, vectype, new_temp, - size_int (prec), bitsize_int (l * prec)); + bitsize_int (prec), bitsize_int (l * prec)); new_stmt = gimple_build_assign (make_ssa_name (vectype), t); vect_finish_stmt_generation (stmt, new_stmt, gsi);