Dear all, here's my revised patch for the treatment of (2**e) ** n and (- 2**e) ** n, trying to take into account the bitwidth of the result.
I've also extended the testcase to sample different integer kinds. ChangeLogs see below. Note: I don't have commit rights, so please somebody else take care of it after review. Thanks, Harald 2019-01-20 Harald Anlauf <anl...@gmx.de> PR fortran/88579 * trans-expr.c (gfc_conv_power_op): Handle cases of (2**e) ** integer and (- 2**e) ** integer. 2019-01-20 Harald Anlauf <anl...@gmx.de> PR fortran/88579 * gfortran.dg/power_8.f90: New test. On 01/19/19 16:45, Thomas Koenig wrote: > Hi Harald, > > >> now that my copyright assignment is on file, here's my first attempt >> at a non-trivial patch. > > Excellent :-) > >> I have slightly rearranged Thomas' code, removed some, and added >> generalizations for (2**e)**n and (-2**e)**n. It works with the >> testcase below. > > That looks good. > >> I'd like some feedback how to properly handle the case when >> (-2**e) = -HUGE()-1, i.e. the number that falls outside the >> symmetric range. Should one skip that one? Does anyone have >> a template how to detect that case easily? Regtesting otherwise >> would fail for the last test (which most likely should be removed >> later). > > I think it is fine to fall back on the library version (which still > exists) for that one. > > The best way to check this is probably by getting the bit size > of the expression by using something like > > ikind = gfc_validate_kind (BT_INTEGER, expr->ts.kind, false); > bit_size = gfc_integer_kinds[ikind].bit_size; > > and take it from there. > > Regards > > Thomas > >
Index: gcc/fortran/trans-expr.c =================================================================== --- gcc/fortran/trans-expr.c (revision 268106) +++ gcc/fortran/trans-expr.c (working copy) @@ -3060,8 +3060,16 @@ && TREE_CODE (TREE_TYPE (rse.expr)) == INTEGER_TYPE) { wi::tree_to_wide_ref wlhs = wi::to_wide (lse.expr); - HOST_WIDE_INT v; + HOST_WIDE_INT v, w; + int kind, ikind, bit_size; + v = wlhs.to_shwi (); + w = abs (v); + + kind = expr->value.op.op1->ts.kind; + ikind = gfc_validate_kind (BT_INTEGER, kind, false); + bit_size = gfc_integer_kinds[ikind].bit_size; + if (v == 1) { /* 1**something is always 1. */ @@ -3068,11 +3076,28 @@ se->expr = build_int_cst (TREE_TYPE (lse.expr), 1); return; } - else if (v == 2 || v == 4 || v == 8 || v == 16) + else if (v == -1) { - /* 2**n = 1<<n, 4**n = 1<<(n+n), 8**n = 1 <<(3*n), 16**n = - 1<<(4*n), but we have to make sure to return zero if the - number of bits is too large. */ + /* (-1)**n is 1 - ((n & 1) << 1) */ + tree type; + tree tmp; + + type = TREE_TYPE (lse.expr); + tmp = fold_build2_loc (input_location, BIT_AND_EXPR, type, + rse.expr, build_int_cst (type, 1)); + tmp = fold_build2_loc (input_location, LSHIFT_EXPR, type, + tmp, build_int_cst (type, 1)); + tmp = fold_build2_loc (input_location, MINUS_EXPR, type, + build_int_cst (type, 1), tmp); + se->expr = tmp; + return; + } + else if (w > 0 && ((w & (w-1)) == 0) && ((w >> (bit_size-1)) == 0)) + { + /* Here v is +/- 2**e. The further simplification uses + 2**n = 1<<n, 4**n = 1<<(n+n), 8**n = 1 <<(3*n), 16**n = + 1<<(4*n), etc., but we have to make sure to return zero + if the number of bits is too large. */ tree lshift; tree type; tree shift; @@ -3080,27 +3105,25 @@ tree cond; tree num_bits; tree cond2; + tree tmp1; type = TREE_TYPE (lse.expr); - if (v == 2) + if (w == 2) shift = rse.expr; - else if (v == 4) + else if (w == 4) shift = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (rse.expr), rse.expr, rse.expr); - else if (v == 8) - shift = fold_build2_loc (input_location, MULT_EXPR, - TREE_TYPE (rse.expr), - build_int_cst (TREE_TYPE (rse.expr), 3), - rse.expr); - else if (v == 16) - shift = fold_build2_loc (input_location, MULT_EXPR, - TREE_TYPE (rse.expr), - build_int_cst (TREE_TYPE (rse.expr), 4), - rse.expr); else - gcc_unreachable (); + { + /* use popcount for fast log2(w) */ + int e = wi::popcount (w-1); + shift = fold_build2_loc (input_location, MULT_EXPR, + TREE_TYPE (rse.expr), + build_int_cst (TREE_TYPE (rse.expr), e), + rse.expr); + } lshift = fold_build2_loc (input_location, LSHIFT_EXPR, type, build_int_cst (type, 1), shift); @@ -3111,26 +3134,27 @@ num_bits = build_int_cst (TREE_TYPE (rse.expr), TYPE_PRECISION (type)); cond2 = fold_build2_loc (input_location, GE_EXPR, logical_type_node, rse.expr, num_bits); - se->expr = fold_build3_loc (input_location, COND_EXPR, type, cond2, - build_int_cst (type, 0), cond); + tmp1 = fold_build3_loc (input_location, COND_EXPR, type, cond2, + build_int_cst (type, 0), cond); + if (v > 0) + { + se->expr = tmp1; + } + else + { + /* for v < 0, calculate v**n = |v|**n * (-1)**n */ + tree tmp2; + tmp2 = fold_build2_loc (input_location, BIT_AND_EXPR, type, + rse.expr, build_int_cst (type, 1)); + tmp2 = fold_build2_loc (input_location, LSHIFT_EXPR, type, + tmp2, build_int_cst (type, 1)); + tmp2 = fold_build2_loc (input_location, MINUS_EXPR, type, + build_int_cst (type, 1), tmp2); + se->expr = fold_build2_loc (input_location, MULT_EXPR, type, + tmp1, tmp2); + } return; } - else if (v == -1) - { - /* (-1)**n is 1 - ((n & 1) << 1) */ - tree type; - tree tmp; - - type = TREE_TYPE (lse.expr); - tmp = fold_build2_loc (input_location, BIT_AND_EXPR, type, - rse.expr, build_int_cst (type, 1)); - tmp = fold_build2_loc (input_location, LSHIFT_EXPR, type, - tmp, build_int_cst (type, 1)); - tmp = fold_build2_loc (input_location, MINUS_EXPR, type, - build_int_cst (type, 1), tmp); - se->expr = tmp; - return; - } } gfc_int4_type_node = gfc_get_int_type (4);
Index: gcc/testsuite/gfortran.dg/power_8.f90 =================================================================== --- gcc/testsuite/gfortran.dg/power_8.f90 (nonexistent) +++ gcc/testsuite/gfortran.dg/power_8.f90 (working copy) @@ -0,0 +1,64 @@ +! { dg-do run } +! { dg-additional-options "-fdump-tree-original" } +! +! PR88579 - Test optimizations for bases that are powers of 2 or -2. +program p + implicit none + integer(4) :: i, u + integer(1) :: j, v + integer(2) :: k, w + integer(8) :: z + ! Test selected positive bases + u = 1 + do i=1,5 + u = u * 64_4 + if (u /= 64_4 ** i) stop 1 + end do + z = 1 + do i=1,7 + z = z * 256_8 + if (z /= 256_8 ** i) stop 2 + end do + z = 1 + do i=1,3 + z = z * 65536_8 + if (z /= 65536_8 ** i) stop 3 + end do + ! Test selected negative bases and integer kind combinations + u = 1 + do i=1,7 + u = u * (-2_1) + if (u /= (-2_1) ** i) stop 4 + end do + v = 1 + do j=1,7 + v = v * (-2_1) + if (v /= (-2_1) ** j) stop 5 + end do + v = 1 + do k=1,7 + v = v * (-2_1) + if (v /= (-2_1) ** k) stop 6 + end do + w = 1 + do k=1,7 + w = w * (-4_2) + if (w /= (-4_2) ** k) stop 7 + end do + w = 1 + do i=1,5 + w = w * (-8_2) + if (w /= (-8_2) ** i) stop 8 + end do + u = 1 + do i=1,1 + u = u * (-HUGE(1_4)/2-1) + if (u /= (-HUGE(1_4)/2-1) ** i) stop 9 + end do + z = 1 + do i=1,7 + z = z * (-512_8) + if (z /= (-512_8) ** i) stop 10 + end do +end program p +! { dg-final { scan-tree-dump-not "_gfortran_pow" "original" } }