Hi!

As discussed recently on the mailing list, the norm2 simplification doesn't
work if we limit mpfr emin/emax to some values derived from maximum floating
exponents (and precision for denormals).

The following patch adjusts the computation, so that it is scaled down if
needed.  In particular, if any value in the array is so large that **2 will
overflow on it or will be very close to it, the scale down is set to
2**(max_exponent/2+4), and if the result during computation gets close to
overflowing, it is scaled down a little bit too.  The scaling is always done
using powers of two, operands by that and the sum by **2 of that, and at the
end it multiplies the sqrt back.  I had to change
simplify_transformation_to_array, so that post_op is done immediately after
finishing ops corresponding to that, so that there can be just one global
variable for the scale.  From my understanding of e.g. the libgfortran norm2
code where sqrt is done basically in this spot I hope it isn't possible that
the same *dest is updated multiple times with dest increments/decrements in
between.

Bootstrapped/regtested on x86_64-linux and i686-linux (together with
Richard's patch), ok for trunk?

2019-02-12  Jakub Jelinek  <ja...@redhat.com>

        PR middle-end/88074
        * simplify.c (simplify_transformation_to_array): Run post_op
        immediately after processing corresponding row, rather than at the
        end.
        (norm2_scale): New variable.
        (add_squared): Rename to ...
        (norm2_add_squared): ... this.  Scale down operand and/or result
        if needed.
        (do_sqrt): Rename to ...
        (norm2_do_sqrt): ... this.  Handle the result == e case.  Scale up
        result and clear norm2_scale.
        (gfc_simplify_norm2): Clear norm2_scale.  Change add_squared to
        norm2_add_squared and &do_sqrt to norm2_do_sqrt.  Scale up result
        and clear norm2_scale again.

--- gcc/fortran/simplify.c.jj   2019-01-10 11:43:12.452409482 +0100
+++ gcc/fortran/simplify.c      2019-02-12 19:54:03.726526824 +0100
@@ -636,6 +636,9 @@ simplify_transformation_to_array (gfc_ex
        if (*src)
          *dest = op (*dest, gfc_copy_expr (*src));
 
+      if (post_op)
+       *dest = post_op (*dest, *dest);
+
       count[0]++;
       base += sstride[0];
       dest += dstride[0];
@@ -671,10 +674,7 @@ simplify_transformation_to_array (gfc_ex
   result_ctor = gfc_constructor_first (result->value.constructor);
   for (i = 0; i < resultsize; ++i)
     {
-      if (post_op)
-       result_ctor->expr = post_op (result_ctor->expr, resultvec[i]);
-      else
-       result_ctor->expr = resultvec[i];
+      result_ctor->expr = resultvec[i];
       result_ctor = gfc_constructor_next (result_ctor);
     }
 
@@ -6048,9 +6048,10 @@ gfc_simplify_idnint (gfc_expr *e)
   return simplify_nint ("IDNINT", e, NULL);
 }
 
+static int norm2_scale;
 
 static gfc_expr *
-add_squared (gfc_expr *result, gfc_expr *e)
+norm2_add_squared (gfc_expr *result, gfc_expr *e)
 {
   mpfr_t tmp;
 
@@ -6059,8 +6060,45 @@ add_squared (gfc_expr *result, gfc_expr
              && result->expr_type == EXPR_CONSTANT);
 
   gfc_set_model_kind (result->ts.kind);
+  int index = gfc_validate_kind (BT_REAL, result->ts.kind, false);
+  mpfr_exp_t exp;
+  if (mpfr_regular_p (result->value.real))
+    {
+      exp = mpfr_get_exp (result->value.real);
+      /* If result is getting close to overflowing, scale down.  */
+      if (exp >= gfc_real_kinds[index].max_exponent - 4
+         && norm2_scale <= gfc_real_kinds[index].max_exponent - 2)
+       {
+         norm2_scale += 2;
+         mpfr_div_ui (result->value.real, result->value.real, 16,
+                      GFC_RND_MODE);
+       }
+    }
+
   mpfr_init (tmp);
-  mpfr_pow_ui (tmp, e->value.real, 2, GFC_RND_MODE);
+  if (mpfr_regular_p (e->value.real))
+    {
+      exp = mpfr_get_exp (e->value.real);
+      /* If e**2 would overflow or close to overflowing, scale down.  */
+      if (exp - norm2_scale >= gfc_real_kinds[index].max_exponent / 2 - 2)
+       {
+         int new_scale = gfc_real_kinds[index].max_exponent / 2 + 4;
+         mpfr_set_ui (tmp, 1, GFC_RND_MODE);
+         mpfr_set_exp (tmp, new_scale - norm2_scale);
+         mpfr_div (result->value.real, result->value.real, tmp, GFC_RND_MODE);
+         mpfr_div (result->value.real, result->value.real, tmp, GFC_RND_MODE);
+         norm2_scale = new_scale;
+       }
+    }
+  if (norm2_scale)
+    {
+      mpfr_set_ui (tmp, 1, GFC_RND_MODE);
+      mpfr_set_exp (tmp, norm2_scale);
+      mpfr_div (tmp, e->value.real, tmp, GFC_RND_MODE);
+    }
+  else
+    mpfr_set (tmp, e->value.real, GFC_RND_MODE);
+  mpfr_pow_ui (tmp, tmp, 2, GFC_RND_MODE);
   mpfr_add (result->value.real, result->value.real, tmp,
            GFC_RND_MODE);
   mpfr_clear (tmp);
@@ -6070,14 +6108,26 @@ add_squared (gfc_expr *result, gfc_expr
 
 
 static gfc_expr *
-do_sqrt (gfc_expr *result, gfc_expr *e)
+norm2_do_sqrt (gfc_expr *result, gfc_expr *e)
 {
   gcc_assert (e->ts.type == BT_REAL && e->expr_type == EXPR_CONSTANT);
   gcc_assert (result->ts.type == BT_REAL
              && result->expr_type == EXPR_CONSTANT);
 
-  mpfr_set (result->value.real, e->value.real, GFC_RND_MODE);
+  if (result != e)
+    mpfr_set (result->value.real, e->value.real, GFC_RND_MODE);
   mpfr_sqrt (result->value.real, result->value.real, GFC_RND_MODE);
+  if (norm2_scale && mpfr_regular_p (result->value.real))
+    {
+      mpfr_t tmp;
+      mpfr_init (tmp);
+      mpfr_set_ui (tmp, 1, GFC_RND_MODE);
+      mpfr_set_exp (tmp, norm2_scale);
+      mpfr_mul (result->value.real, result->value.real, tmp, GFC_RND_MODE);
+      mpfr_clear (tmp);
+    }
+  norm2_scale = 0;
+
   return result;
 }
 
@@ -6100,15 +6150,27 @@ gfc_simplify_norm2 (gfc_expr *e, gfc_exp
   if (size_zero)
     return result;
 
+  norm2_scale = 0;
   if (!dim || e->rank == 1)
     {
       result = simplify_transformation_to_scalar (result, e, NULL,
-                                                 add_squared);
+                                                 norm2_add_squared);
       mpfr_sqrt (result->value.real, result->value.real, GFC_RND_MODE);
+      if (norm2_scale && mpfr_regular_p (result->value.real))
+       {
+         mpfr_t tmp;
+         mpfr_init (tmp);
+         mpfr_set_ui (tmp, 1, GFC_RND_MODE);
+         mpfr_set_exp (tmp, norm2_scale);
+         mpfr_mul (result->value.real, result->value.real, tmp, GFC_RND_MODE);
+         mpfr_clear (tmp);
+       }
+      norm2_scale = 0;
     }
   else
     result = simplify_transformation_to_array (result, e, dim, NULL,
-                                              add_squared, &do_sqrt);
+                                              norm2_add_squared,
+                                              norm2_do_sqrt);
 
   return result;
 }

        Jakub

Reply via email to