On Thu, Jul 27, 2017 at 6:50 AM, Richard Biener <rguent...@suse.de> wrote: > > I am testing the following additional pattern for match.pd to fix > PR81502 resulting in the desired optimization to > > bar: > .LFB526: > .cfi_startproc > movl %edi, %eax > ret > > the pattern optimizes a BIT_FIELD_REF on a BIT_INSERT_EXPR by > either extracting from the destination or the inserted value.
Note this optimization pattern was on my list to implement for bit-field optimizations after lowering. Thanks, Andrew Pinski > > Bootstrap and regtest running on x86_64-unknown-linux-gnu. > > Richard. > > 2017-07-27 Richard Biener <rguent...@suse.de> > > PR tree-optimization/81502 > * match.pd: Add pattern combining BIT_INSERT_EXPR with > BIT_FIELD_REF. > > * gcc.target/i386/pr81502.c: New testcase. > > Index: gcc/match.pd > =================================================================== > *** gcc/match.pd (revision 250620) > --- gcc/match.pd (working copy) > *************** DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > *** 4178,4180 **** > --- 4178,4195 ---- > { CONSTRUCTOR_ELT (ctor, idx / k)->value; }) > (BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; } > @1 { bitsize_int ((idx % k) * width); }))))))))) > + > + /* Simplify a bit extraction from a bit insertion for the cases with > + the inserted element fully covering the extraction or the insertion > + not touching the extraction. */ > + (simplify > + (BIT_FIELD_REF (bit_insert @0 @1 @ipos) @rsize @rpos) > + (switch > + (if (wi::leu_p (@ipos, @rpos) > + && wi::leu_p (wi::add (@rpos, @rsize), > + wi::add (@ipos, TYPE_PRECISION (TREE_TYPE (@1))))) > + (BIT_FIELD_REF @1 @rsize { wide_int_to_tree (bitsizetype, > + wi::sub (@rpos, @ipos)); })) > + (if (wi::geu_p (@ipos, wi::add (@rpos, @rsize)) > + || wi::geu_p (@rpos, wi::add (@ipos, TYPE_PRECISION (TREE_TYPE > (@1))))) > + (BIT_FIELD_REF @0 @rsize @rpos)))) > Index: gcc/testsuite/gcc.target/i386/pr81502.c > =================================================================== > *** gcc/testsuite/gcc.target/i386/pr81502.c (nonexistent) > --- gcc/testsuite/gcc.target/i386/pr81502.c (working copy) > *************** > *** 0 **** > --- 1,34 ---- > + /* { dg-do compile { target lp64 } } */ > + /* { dg-options "-O2 -msse2" } */ > + > + #include <emmintrin.h> > + > + #define SIZE (sizeof (void *)) > + > + static int foo(unsigned char (*foo)[SIZE]) > + { > + __m128i acc = _mm_set_epi32(0, 0, 0, 0); > + size_t i = 0; > + for(; i + sizeof(__m128i) <= SIZE; i += sizeof(__m128i)) { > + __m128i word; > + __builtin_memcpy(&word, foo + i, sizeof(__m128i)); > + acc = _mm_add_epi32(word, acc); > + } > + if (i != SIZE) { > + __m128i word = _mm_set_epi32(0, 0, 0, 0); > + __builtin_memcpy(&word, foo + i, SIZE - i); // (1) > + acc = _mm_add_epi32(word, acc); > + } > + int res; > + __builtin_memcpy(&res, &acc, sizeof(res)); > + return res; > + } > + > + int bar(void *ptr) > + { > + unsigned char buf[SIZE]; > + __builtin_memcpy(buf, &ptr, SIZE); > + return foo((unsigned char(*)[SIZE])buf); > + } > + > + /* { dg-final { scan-assembler-times "mov" 1 } } */