max intrinsics

Kyrill Tkachov Wed, 18 Jul 2018 04:18:27 -0700

Hi all,

Thank you for the feedback so far.
This version of the patch doesn't try to emit fmin/fmax function calls but 
instead
emits MIN/MAX_EXPR sequences unconditionally.
I think a source of confusion in the original proposal (for me at least) was
that on aarch64 (that I primarily work on) we implement the fmin/fmax optabs
and therefore these calls are expanded to a single instruction.
But on x86_64 these optabs are not implemented and therefore expand to actual 
library calls.
Therefore at -O3 (no -ffast-math) I saw a gain on aarch64. But I measured today
on x86_64 and saw a regression.


Thomas and Janne suggested that the Fortran standard does not impose a 
requirement
on NaN handling for the min/max intrinsics, which would make emitting 
MIN/MAX_EXPR
sequences unconditionally a valid approach.

However, the gfortran.dg/nan_1.f90 test checks that handling of NaN values in
these intrinsics follows the IEEE semantics (min (nan, 2.0) == 2.0, for 
example).
This is not required by the standard, but is the existing gfortran behaviour.

If we end up always emitting MIN/MAX_EXPR sequences, like this version of the 
patch does,
then that test fails on some configurations of x86_64 and not others (for me it 
FAILs
by default, but passes with -march=native on my machine) and passes on AArch64.
This is expected since MIN/MAX_EXPR doesn't enforce IEEE behaviour on its 
arguments.

However, by always emitting MIN/MAX_EXPR the gfc_conv_intrinsic_minmax function 
is
simplified and, perhaps more importantly, generates faster code in the -O3 case.
With this patch I see performance improvement on 521.wrf on both AArch64 (3.7%)
and x86_64 (5.4%).

Thomas, Janne, would this relaxation of NaN handling be acceptable given the 
benefits
mentioned above? If so, what would be the recommended adjustment to the 
nan_1.f90 test?

Thanks,
Kyrill

2018-07-18  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    * trans-intrinsic.c: (gfc_conv_intrinsic_minmax): Emit MIN_MAX_EXPR
    sequence to calculate the min/max.

2018-07-18  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    * gfortran.dg/max_float.f90: New test.
    * gfortran.dg/min_float.f90: Likewise.
    * gfortran.dg/minmax_integer.f90: Likewise.

diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index d306e3a5a6209c1621d91f99ffc366acecd9c3d0..e5a1f1ddabeedc7b9f473db11e70f29548fc69ac 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -3874,14 +3874,11 @@ gfc_conv_intrinsic_ttynam (gfc_se * se, gfc_expr * expr)
     minmax (a1, a2, a3, ...)
     {
       mvar = a1;
-      if (a2 .op. mvar || isnan (mvar))
-        mvar = a2;
-      if (a3 .op. mvar || isnan (mvar))
-        mvar = a3;
+      mvar = MIN/MAX_EXPR (mvar, a2);
+      mvar = MIN/MAX_EXPR (mvar, a3);
       ...
-      return mvar
-    }
- */
+      return mvar;
+    }  */
 
 /* TODO: Mismatching types can occur when specific names are used.
    These should be handled during resolution.  */
@@ -3891,7 +3888,6 @@ gfc_conv_intrinsic_minmax (gfc_se * se, gfc_expr * expr, enum tree_code op)
   tree tmp;
   tree mvar;
   tree val;
-  tree thencase;
   tree *args;
   tree type;
   gfc_actual_arglist *argexpr;
@@ -3912,55 +3908,37 @@ gfc_conv_intrinsic_minmax (gfc_se * se, gfc_expr * expr, enum tree_code op)
 
   mvar = gfc_create_var (type, "M");
   gfc_add_modify (&se->pre, mvar, args[0]);
-  for (i = 1, argexpr = argexpr->next; i < nargs; i++)
-    {
-      tree cond, isnan;
 
+  for (i = 1, argexpr = argexpr->next; i < nargs; i++, argexpr = argexpr->next)
+    {
+      tree cond = NULL_TREE;
       val = args[i];
 
       /* Handle absent optional arguments by ignoring the comparison.  */
       if (argexpr->expr->expr_type == EXPR_VARIABLE
 	  && argexpr->expr->symtree->n.sym->attr.optional
 	  && TREE_CODE (val) == INDIRECT_REF)
-	cond = fold_build2_loc (input_location,
+	{
+	  cond = fold_build2_loc (input_location,
 				NE_EXPR, logical_type_node,
 				TREE_OPERAND (val, 0),
 			build_int_cst (TREE_TYPE (TREE_OPERAND (val, 0)), 0));
-      else
-      {
-	cond = NULL_TREE;
-
+	}
+      else if (!VAR_P (val) && !TREE_CONSTANT (val))
 	/* Only evaluate the argument once.  */
-	if (!VAR_P (val) && !TREE_CONSTANT (val))
-	  val = gfc_evaluate_now (val, &se->pre);
-      }
-
-      thencase = build2_v (MODIFY_EXPR, mvar, convert (type, val));
+	val = gfc_evaluate_now (val, &se->pre);
 
-      tmp = fold_build2_loc (input_location, op, logical_type_node,
-			     convert (type, val), mvar);
+      tree calc;
 
-      /* FIXME: When the IEEE_ARITHMETIC module is implemented, the call to
-	 __builtin_isnan might be made dependent on that module being loaded,
-	 to help performance of programs that don't rely on IEEE semantics.  */
-      if (FLOAT_TYPE_P (TREE_TYPE (mvar)))
-	{
-	  isnan = build_call_expr_loc (input_location,
-				       builtin_decl_explicit (BUILT_IN_ISNAN),
-				       1, mvar);
-	  tmp = fold_build2_loc (input_location, TRUTH_OR_EXPR,
-				 logical_type_node, tmp,
-				 fold_convert (logical_type_node, isnan));
-	}
-      tmp = build3_v (COND_EXPR, tmp, thencase,
-		      build_empty_stmt (input_location));
+      tree_code code = op == GT_EXPR ? MAX_EXPR : MIN_EXPR;
+      calc = fold_build2_loc (input_location, code, type,
+				convert (type, val), mvar);
+      tmp = build2_v (MODIFY_EXPR, mvar, calc);
 
       if (cond != NULL_TREE)
 	tmp = build3_v (COND_EXPR, cond, tmp,
 			build_empty_stmt (input_location));
-
       gfc_add_expr_to_block (&se->pre, tmp);
-      argexpr = argexpr->next;
     }
   se->expr = mvar;
 }
diff --git a/gcc/testsuite/gfortran.dg/max_float.f90 b/gcc/testsuite/gfortran.dg/max_float.f90
new file mode 100644
index 0000000000000000000000000000000000000000..a3a5d4f5df29cfa9c4e3abc2c18e7d3de1169fc3
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/max_float.f90
@@ -0,0 +1,19 @@
+! { dg-do compile }
+! { dg-options "-O2 -fdump-tree-optimized" }
+
+subroutine fool (a, b, c, d, e, f, g, h)
+  real (kind=16) :: a, b, c, d, e, f, g, h
+  a = max (a, b, c, d, e, f, g, h)
+end subroutine
+
+subroutine foo (a, b, c, d, e, f, g, h)
+  real (kind=8) :: a, b, c, d, e, f, g, h
+  a = max (a, b, c, d, e, f, g, h)
+end subroutine
+
+subroutine foof (a, b, c, d, e, f, g, h)
+  real (kind=4) :: a, b, c, d, e, f, g, h
+  a = max (a, b, c, d, e, f, g, h)
+end subroutine
+
+! { dg-final { scan-tree-dump-times "MAX_EXPR " 21 "optimized" } }
diff --git a/gcc/testsuite/gfortran.dg/min_float.f90 b/gcc/testsuite/gfortran.dg/min_float.f90
new file mode 100644
index 0000000000000000000000000000000000000000..41bd6b3c4062f364791841f7097f9a5c00782ec8
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/min_float.f90
@@ -0,0 +1,19 @@
+! { dg-do compile }
+! { dg-options "-O2 -fdump-tree-optimized" }
+
+subroutine fool (a, b, c, d, e, f, g, h)
+  real (kind=16) :: a, b, c, d, e, f, g, h
+  a = min (a, b, c, d, e, f, g, h)
+end subroutine
+
+subroutine foo (a, b, c, d, e, f, g, h)
+  real (kind=8) :: a, b, c, d, e, f, g, h
+  a = min (a, b, c, d, e, f, g, h)
+end subroutine
+
+subroutine foof (a, b, c, d, e, f, g, h)
+  real (kind=4) :: a, b, c, d, e, f, g, h
+  a = min (a, b, c, d, e, f, g, h)
+end subroutine
+
+! { dg-final { scan-tree-dump-times "MIN_EXPR " 21 "optimized" } }
diff --git a/gcc/testsuite/gfortran.dg/minmax_integer.f90 b/gcc/testsuite/gfortran.dg/minmax_integer.f90
new file mode 100644
index 0000000000000000000000000000000000000000..5b6be38c7055ce4e8620cf75ec7d8a182436b24f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/minmax_integer.f90
@@ -0,0 +1,15 @@
+! { dg-do compile }
+! { dg-options "-O2 -fdump-tree-optimized" }
+
+subroutine foo (a, b, c, d, e, f, g, h)
+  integer (kind=4) :: a, b, c, d, e, f, g, h
+  a = min (a, b, c, d, e, f, g, h)
+end subroutine
+
+subroutine foof (a, b, c, d, e, f, g, h)
+  integer (kind=4) :: a, b, c, d, e, f, g, h
+  a = max (a, b, c, d, e, f, g, h)
+end subroutine
+
+! { dg-final { scan-tree-dump-times "MIN_EXPR" 7 "optimized" } }
+! { dg-final { scan-tree-dump-times "MAX_EXPR" 7 "optimized" } }

[PATCH][Fortran][v2] Use MIN/MAX_EXPR for min/max intrinsics

Reply via email to