On Wed, 23 Oct 2013, Jakub Jelinek wrote:

> On Wed, Oct 23, 2013 at 11:14:54AM +0200, Richard Biener wrote:
> > On Tue, 22 Oct 2013, Jakub Jelinek wrote:
> > 
> > > Hi!
> > > 
> > > If VRP tells us that oprnd is always >= 0 or always < 0, we can generate
> > > better code for the divmode vectorization.
> > > 
> > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > 
> > Testcase...?
> > 
> > Ok with adding one (I suggest a x86 specific one and scanning the
> > assembler dump).
> 
> Like this?

Yes, thanks.
Richard.

> 2013-10-23  Jakub Jelinek  <ja...@redhat.com>
> 
>       * tree-vect-patterns.c (vect_recog_divmod_pattern): Optimize
>       sequence based on get_range_info returned range.
> 
>       * gcc.target/i386/vect-div-1.c: New test.
> 
> --- gcc/tree-vect-patterns.c.jj       2013-10-22 18:36:51.947395037 +0200
> +++ gcc/tree-vect-patterns.c  2013-10-23 11:28:26.211956658 +0200
> @@ -2226,20 +2226,19 @@ vect_recog_divmod_pattern (vec<gimple> *
>        if (post_shift >= prec)
>       return NULL;
>  
> -      /* t1 = oprnd1 h* ml;  */
> +      /* t1 = oprnd0 h* ml;  */
>        t1 = vect_recog_temp_ssa_var (itype, NULL);
>        def_stmt
>       = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
>                                       build_int_cst (itype, ml));
> -      append_pattern_def_seq (stmt_vinfo, def_stmt);
>  
>        if (add)
>       {
>         /* t2 = t1 + oprnd0;  */
> +       append_pattern_def_seq (stmt_vinfo, def_stmt);
>         t2 = vect_recog_temp_ssa_var (itype, NULL);
>         def_stmt
>           = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
> -       append_pattern_def_seq (stmt_vinfo, def_stmt);
>       }
>        else
>       t2 = t1;
> @@ -2247,27 +2246,57 @@ vect_recog_divmod_pattern (vec<gimple> *
>        if (post_shift)
>       {
>         /* t3 = t2 >> post_shift;  */
> +       append_pattern_def_seq (stmt_vinfo, def_stmt);
>         t3 = vect_recog_temp_ssa_var (itype, NULL);
>         def_stmt
>           = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
>                                           build_int_cst (itype, post_shift));
> -       append_pattern_def_seq (stmt_vinfo, def_stmt);
>       }
>        else
>       t3 = t2;
>  
> -      /* t4 = oprnd0 >> (prec - 1);  */
> -      t4 = vect_recog_temp_ssa_var (itype, NULL);
> -      def_stmt
> -     = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
> -                                     build_int_cst (itype, prec - 1));
> -      append_pattern_def_seq (stmt_vinfo, def_stmt);
> -
> -      /* q = t3 - t4;  or q = t4 - t3;  */
> -      q = vect_recog_temp_ssa_var (itype, NULL);
> -      pattern_stmt
> -     = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
> -                                     d < 0 ? t3 : t4);
> +      double_int oprnd0_min, oprnd0_max;
> +      int msb = 1;
> +      if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
> +     {
> +       if (!oprnd0_min.is_negative ())
> +         msb = 0;
> +       else if (oprnd0_max.is_negative ())
> +         msb = -1;
> +     }
> +
> +      if (msb == 0 && d >= 0)
> +     {
> +       /* q = t3;  */
> +       q = t3;
> +       pattern_stmt = def_stmt;
> +     }
> +      else
> +     {
> +       /* t4 = oprnd0 >> (prec - 1);
> +          or if we know from VRP that oprnd0 >= 0
> +          t4 = 0;
> +          or if we know from VRP that oprnd0 < 0
> +          t4 = -1;  */
> +       append_pattern_def_seq (stmt_vinfo, def_stmt);
> +       t4 = vect_recog_temp_ssa_var (itype, NULL);
> +       if (msb != 1)
> +         def_stmt
> +           = gimple_build_assign_with_ops (INTEGER_CST,
> +                                           t4, build_int_cst (itype, msb),
> +                                           NULL_TREE);
> +       else
> +         def_stmt
> +           = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
> +                                           build_int_cst (itype, prec - 1));
> +       append_pattern_def_seq (stmt_vinfo, def_stmt);
> +
> +       /* q = t3 - t4;  or q = t4 - t3;  */
> +       q = vect_recog_temp_ssa_var (itype, NULL);
> +       pattern_stmt
> +         = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
> +                                         d < 0 ? t3 : t4);
> +     }
>      }
>  
>    if (rhs_code == TRUNC_MOD_EXPR)
> --- gcc/testsuite/gcc.target/i386/vect-div-1.c.jj     2013-10-23 
> 11:43:49.089265027 +0200
> +++ gcc/testsuite/gcc.target/i386/vect-div-1.c        2013-10-23 
> 11:57:06.387187749 +0200
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target sse2 } } */
> +/* { dg-options "-O2 -ftree-vectorize -fno-common -msse2" } */
> +
> +unsigned short b[1024] = { 0 };
> +int a[1024] = { 0 };
> +
> +int
> +f1 (int x)
> +{
> +  int i;
> +  for (i = 0; i < 1024; i++)
> +    a[i] = (b[i] + 7) / 15;
> +}
> +
> +int
> +f2 (int x)
> +{
> +  int i;
> +  for (i = 0; i < 1024; i++)
> +    a[i] = (b[i] + 7) % 15;
> +}
> +
> +int
> +f3 (int x)
> +{
> +  int i;
> +  for (i = 0; i < 1024; i++)
> +    a[i] = (b[i] - 66000) / 15;
> +}
> +
> +int
> +f4 (int x)
> +{
> +  int i;
> +  for (i = 0; i < 1024; i++)
> +    a[i] = (b[i] - 66000) % 15;
> +}
> +
> +/* In f1 and f2, VRP can prove the first operand of division or modulo
> +   is always non-negative, so there is no need to do >> 31 shift
> +   etc. to check if it is.  And in f3 and f4, VRP can prove it is always
> +   negative.  */
> +/* { dg-final { scan-assembler-not "psrad\[^\n\r\]*\\\$31" } } */
> 
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend

Reply via email to