On Thu, Jul 27, 2017 at 6:50 AM, Richard Biener <rguent...@suse.de> wrote:
>
> I am testing the following additional pattern for match.pd to fix
> PR81502 resulting in the desired optimization to
>
> bar:
> .LFB526:
>         .cfi_startproc
>         movl    %edi, %eax
>         ret
>
> the pattern optimizes a BIT_FIELD_REF on a BIT_INSERT_EXPR by
> either extracting from the destination or the inserted value.

Note this optimization pattern was on my list to implement for
bit-field optimizations after lowering.

Thanks,
Andrew Pinski

>
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> Richard.
>
> 2017-07-27  Richard Biener  <rguent...@suse.de>
>
>         PR tree-optimization/81502
>         * match.pd: Add pattern combining BIT_INSERT_EXPR with
>         BIT_FIELD_REF.
>
>         * gcc.target/i386/pr81502.c: New testcase.
>
> Index: gcc/match.pd
> ===================================================================
> *** gcc/match.pd        (revision 250620)
> --- gcc/match.pd        (working copy)
> *************** DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> *** 4178,4180 ****
> --- 4178,4195 ----
>          { CONSTRUCTOR_ELT (ctor, idx / k)->value; })
>         (BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; }
>                        @1 { bitsize_int ((idx % k) * width); })))))))))
> +
> + /* Simplify a bit extraction from a bit insertion for the cases with
> +    the inserted element fully covering the extraction or the insertion
> +    not touching the extraction.  */
> + (simplify
> +  (BIT_FIELD_REF (bit_insert @0 @1 @ipos) @rsize @rpos)
> +  (switch
> +   (if (wi::leu_p (@ipos, @rpos)
> +        && wi::leu_p (wi::add (@rpos, @rsize),
> +                      wi::add (@ipos, TYPE_PRECISION (TREE_TYPE (@1)))))
> +    (BIT_FIELD_REF @1 @rsize { wide_int_to_tree (bitsizetype,
> +                                                 wi::sub (@rpos, @ipos)); }))
> +   (if (wi::geu_p (@ipos, wi::add (@rpos, @rsize))
> +        || wi::geu_p (@rpos, wi::add (@ipos, TYPE_PRECISION (TREE_TYPE 
> (@1)))))
> +    (BIT_FIELD_REF @0 @rsize @rpos))))
> Index: gcc/testsuite/gcc.target/i386/pr81502.c
> ===================================================================
> *** gcc/testsuite/gcc.target/i386/pr81502.c     (nonexistent)
> --- gcc/testsuite/gcc.target/i386/pr81502.c     (working copy)
> ***************
> *** 0 ****
> --- 1,34 ----
> + /* { dg-do compile { target lp64 } } */
> + /* { dg-options "-O2 -msse2" } */
> +
> + #include <emmintrin.h>
> +
> + #define SIZE (sizeof (void *))
> +
> + static int foo(unsigned char (*foo)[SIZE])
> + {
> +   __m128i acc = _mm_set_epi32(0, 0, 0, 0);
> +   size_t i = 0;
> +   for(; i + sizeof(__m128i) <= SIZE; i += sizeof(__m128i)) {
> +       __m128i word;
> +       __builtin_memcpy(&word, foo + i, sizeof(__m128i));
> +       acc = _mm_add_epi32(word, acc);
> +   }
> +   if (i != SIZE) {
> +       __m128i word = _mm_set_epi32(0, 0, 0, 0);
> +       __builtin_memcpy(&word, foo + i, SIZE - i); // (1)
> +       acc = _mm_add_epi32(word, acc);
> +   }
> +   int res;
> +   __builtin_memcpy(&res, &acc, sizeof(res));
> +   return res;
> + }
> +
> + int bar(void *ptr)
> + {
> +   unsigned char buf[SIZE];
> +   __builtin_memcpy(buf, &ptr, SIZE);
> +   return foo((unsigned char(*)[SIZE])buf);
> + }
> +
> + /* { dg-final { scan-assembler-times "mov" 1 } } */

Reply via email to