Hi! As discussed recently on the mailing list, the norm2 simplification doesn't work if we limit mpfr emin/emax to some values derived from maximum floating exponents (and precision for denormals).
The following patch adjusts the computation, so that it is scaled down if needed. In particular, if any value in the array is so large that **2 will overflow on it or will be very close to it, the scale down is set to 2**(max_exponent/2+4), and if the result during computation gets close to overflowing, it is scaled down a little bit too. The scaling is always done using powers of two, operands by that and the sum by **2 of that, and at the end it multiplies the sqrt back. I had to change simplify_transformation_to_array, so that post_op is done immediately after finishing ops corresponding to that, so that there can be just one global variable for the scale. From my understanding of e.g. the libgfortran norm2 code where sqrt is done basically in this spot I hope it isn't possible that the same *dest is updated multiple times with dest increments/decrements in between. Bootstrapped/regtested on x86_64-linux and i686-linux (together with Richard's patch), ok for trunk? 2019-02-12 Jakub Jelinek <ja...@redhat.com> PR middle-end/88074 * simplify.c (simplify_transformation_to_array): Run post_op immediately after processing corresponding row, rather than at the end. (norm2_scale): New variable. (add_squared): Rename to ... (norm2_add_squared): ... this. Scale down operand and/or result if needed. (do_sqrt): Rename to ... (norm2_do_sqrt): ... this. Handle the result == e case. Scale up result and clear norm2_scale. (gfc_simplify_norm2): Clear norm2_scale. Change add_squared to norm2_add_squared and &do_sqrt to norm2_do_sqrt. Scale up result and clear norm2_scale again. --- gcc/fortran/simplify.c.jj 2019-01-10 11:43:12.452409482 +0100 +++ gcc/fortran/simplify.c 2019-02-12 19:54:03.726526824 +0100 @@ -636,6 +636,9 @@ simplify_transformation_to_array (gfc_ex if (*src) *dest = op (*dest, gfc_copy_expr (*src)); + if (post_op) + *dest = post_op (*dest, *dest); + count[0]++; base += sstride[0]; dest += dstride[0]; @@ -671,10 +674,7 @@ simplify_transformation_to_array (gfc_ex result_ctor = gfc_constructor_first (result->value.constructor); for (i = 0; i < resultsize; ++i) { - if (post_op) - result_ctor->expr = post_op (result_ctor->expr, resultvec[i]); - else - result_ctor->expr = resultvec[i]; + result_ctor->expr = resultvec[i]; result_ctor = gfc_constructor_next (result_ctor); } @@ -6048,9 +6048,10 @@ gfc_simplify_idnint (gfc_expr *e) return simplify_nint ("IDNINT", e, NULL); } +static int norm2_scale; static gfc_expr * -add_squared (gfc_expr *result, gfc_expr *e) +norm2_add_squared (gfc_expr *result, gfc_expr *e) { mpfr_t tmp; @@ -6059,8 +6060,45 @@ add_squared (gfc_expr *result, gfc_expr && result->expr_type == EXPR_CONSTANT); gfc_set_model_kind (result->ts.kind); + int index = gfc_validate_kind (BT_REAL, result->ts.kind, false); + mpfr_exp_t exp; + if (mpfr_regular_p (result->value.real)) + { + exp = mpfr_get_exp (result->value.real); + /* If result is getting close to overflowing, scale down. */ + if (exp >= gfc_real_kinds[index].max_exponent - 4 + && norm2_scale <= gfc_real_kinds[index].max_exponent - 2) + { + norm2_scale += 2; + mpfr_div_ui (result->value.real, result->value.real, 16, + GFC_RND_MODE); + } + } + mpfr_init (tmp); - mpfr_pow_ui (tmp, e->value.real, 2, GFC_RND_MODE); + if (mpfr_regular_p (e->value.real)) + { + exp = mpfr_get_exp (e->value.real); + /* If e**2 would overflow or close to overflowing, scale down. */ + if (exp - norm2_scale >= gfc_real_kinds[index].max_exponent / 2 - 2) + { + int new_scale = gfc_real_kinds[index].max_exponent / 2 + 4; + mpfr_set_ui (tmp, 1, GFC_RND_MODE); + mpfr_set_exp (tmp, new_scale - norm2_scale); + mpfr_div (result->value.real, result->value.real, tmp, GFC_RND_MODE); + mpfr_div (result->value.real, result->value.real, tmp, GFC_RND_MODE); + norm2_scale = new_scale; + } + } + if (norm2_scale) + { + mpfr_set_ui (tmp, 1, GFC_RND_MODE); + mpfr_set_exp (tmp, norm2_scale); + mpfr_div (tmp, e->value.real, tmp, GFC_RND_MODE); + } + else + mpfr_set (tmp, e->value.real, GFC_RND_MODE); + mpfr_pow_ui (tmp, tmp, 2, GFC_RND_MODE); mpfr_add (result->value.real, result->value.real, tmp, GFC_RND_MODE); mpfr_clear (tmp); @@ -6070,14 +6108,26 @@ add_squared (gfc_expr *result, gfc_expr static gfc_expr * -do_sqrt (gfc_expr *result, gfc_expr *e) +norm2_do_sqrt (gfc_expr *result, gfc_expr *e) { gcc_assert (e->ts.type == BT_REAL && e->expr_type == EXPR_CONSTANT); gcc_assert (result->ts.type == BT_REAL && result->expr_type == EXPR_CONSTANT); - mpfr_set (result->value.real, e->value.real, GFC_RND_MODE); + if (result != e) + mpfr_set (result->value.real, e->value.real, GFC_RND_MODE); mpfr_sqrt (result->value.real, result->value.real, GFC_RND_MODE); + if (norm2_scale && mpfr_regular_p (result->value.real)) + { + mpfr_t tmp; + mpfr_init (tmp); + mpfr_set_ui (tmp, 1, GFC_RND_MODE); + mpfr_set_exp (tmp, norm2_scale); + mpfr_mul (result->value.real, result->value.real, tmp, GFC_RND_MODE); + mpfr_clear (tmp); + } + norm2_scale = 0; + return result; } @@ -6100,15 +6150,27 @@ gfc_simplify_norm2 (gfc_expr *e, gfc_exp if (size_zero) return result; + norm2_scale = 0; if (!dim || e->rank == 1) { result = simplify_transformation_to_scalar (result, e, NULL, - add_squared); + norm2_add_squared); mpfr_sqrt (result->value.real, result->value.real, GFC_RND_MODE); + if (norm2_scale && mpfr_regular_p (result->value.real)) + { + mpfr_t tmp; + mpfr_init (tmp); + mpfr_set_ui (tmp, 1, GFC_RND_MODE); + mpfr_set_exp (tmp, norm2_scale); + mpfr_mul (result->value.real, result->value.real, tmp, GFC_RND_MODE); + mpfr_clear (tmp); + } + norm2_scale = 0; } else result = simplify_transformation_to_array (result, e, dim, NULL, - add_squared, &do_sqrt); + norm2_add_squared, + norm2_do_sqrt); return result; } Jakub