On Wed, 23 Oct 2013, Jakub Jelinek wrote: > On Wed, Oct 23, 2013 at 11:14:54AM +0200, Richard Biener wrote: > > On Tue, 22 Oct 2013, Jakub Jelinek wrote: > > > > > Hi! > > > > > > If VRP tells us that oprnd is always >= 0 or always < 0, we can generate > > > better code for the divmode vectorization. > > > > > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > > > Testcase...? > > > > Ok with adding one (I suggest a x86 specific one and scanning the > > assembler dump). > > Like this?
Yes, thanks. Richard. > 2013-10-23 Jakub Jelinek <ja...@redhat.com> > > * tree-vect-patterns.c (vect_recog_divmod_pattern): Optimize > sequence based on get_range_info returned range. > > * gcc.target/i386/vect-div-1.c: New test. > > --- gcc/tree-vect-patterns.c.jj 2013-10-22 18:36:51.947395037 +0200 > +++ gcc/tree-vect-patterns.c 2013-10-23 11:28:26.211956658 +0200 > @@ -2226,20 +2226,19 @@ vect_recog_divmod_pattern (vec<gimple> * > if (post_shift >= prec) > return NULL; > > - /* t1 = oprnd1 h* ml; */ > + /* t1 = oprnd0 h* ml; */ > t1 = vect_recog_temp_ssa_var (itype, NULL); > def_stmt > = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0, > build_int_cst (itype, ml)); > - append_pattern_def_seq (stmt_vinfo, def_stmt); > > if (add) > { > /* t2 = t1 + oprnd0; */ > + append_pattern_def_seq (stmt_vinfo, def_stmt); > t2 = vect_recog_temp_ssa_var (itype, NULL); > def_stmt > = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0); > - append_pattern_def_seq (stmt_vinfo, def_stmt); > } > else > t2 = t1; > @@ -2247,27 +2246,57 @@ vect_recog_divmod_pattern (vec<gimple> * > if (post_shift) > { > /* t3 = t2 >> post_shift; */ > + append_pattern_def_seq (stmt_vinfo, def_stmt); > t3 = vect_recog_temp_ssa_var (itype, NULL); > def_stmt > = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2, > build_int_cst (itype, post_shift)); > - append_pattern_def_seq (stmt_vinfo, def_stmt); > } > else > t3 = t2; > > - /* t4 = oprnd0 >> (prec - 1); */ > - t4 = vect_recog_temp_ssa_var (itype, NULL); > - def_stmt > - = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0, > - build_int_cst (itype, prec - 1)); > - append_pattern_def_seq (stmt_vinfo, def_stmt); > - > - /* q = t3 - t4; or q = t4 - t3; */ > - q = vect_recog_temp_ssa_var (itype, NULL); > - pattern_stmt > - = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3, > - d < 0 ? t3 : t4); > + double_int oprnd0_min, oprnd0_max; > + int msb = 1; > + if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE) > + { > + if (!oprnd0_min.is_negative ()) > + msb = 0; > + else if (oprnd0_max.is_negative ()) > + msb = -1; > + } > + > + if (msb == 0 && d >= 0) > + { > + /* q = t3; */ > + q = t3; > + pattern_stmt = def_stmt; > + } > + else > + { > + /* t4 = oprnd0 >> (prec - 1); > + or if we know from VRP that oprnd0 >= 0 > + t4 = 0; > + or if we know from VRP that oprnd0 < 0 > + t4 = -1; */ > + append_pattern_def_seq (stmt_vinfo, def_stmt); > + t4 = vect_recog_temp_ssa_var (itype, NULL); > + if (msb != 1) > + def_stmt > + = gimple_build_assign_with_ops (INTEGER_CST, > + t4, build_int_cst (itype, msb), > + NULL_TREE); > + else > + def_stmt > + = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0, > + build_int_cst (itype, prec - 1)); > + append_pattern_def_seq (stmt_vinfo, def_stmt); > + > + /* q = t3 - t4; or q = t4 - t3; */ > + q = vect_recog_temp_ssa_var (itype, NULL); > + pattern_stmt > + = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3, > + d < 0 ? t3 : t4); > + } > } > > if (rhs_code == TRUNC_MOD_EXPR) > --- gcc/testsuite/gcc.target/i386/vect-div-1.c.jj 2013-10-23 > 11:43:49.089265027 +0200 > +++ gcc/testsuite/gcc.target/i386/vect-div-1.c 2013-10-23 > 11:57:06.387187749 +0200 > @@ -0,0 +1,43 @@ > +/* { dg-do compile { target sse2 } } */ > +/* { dg-options "-O2 -ftree-vectorize -fno-common -msse2" } */ > + > +unsigned short b[1024] = { 0 }; > +int a[1024] = { 0 }; > + > +int > +f1 (int x) > +{ > + int i; > + for (i = 0; i < 1024; i++) > + a[i] = (b[i] + 7) / 15; > +} > + > +int > +f2 (int x) > +{ > + int i; > + for (i = 0; i < 1024; i++) > + a[i] = (b[i] + 7) % 15; > +} > + > +int > +f3 (int x) > +{ > + int i; > + for (i = 0; i < 1024; i++) > + a[i] = (b[i] - 66000) / 15; > +} > + > +int > +f4 (int x) > +{ > + int i; > + for (i = 0; i < 1024; i++) > + a[i] = (b[i] - 66000) % 15; > +} > + > +/* In f1 and f2, VRP can prove the first operand of division or modulo > + is always non-negative, so there is no need to do >> 31 shift > + etc. to check if it is. And in f3 and f4, VRP can prove it is always > + negative. */ > +/* { dg-final { scan-assembler-not "psrad\[^\n\r\]*\\\$31" } } */ > > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend