I've committed this patch to gomp4.

It removes acc_on_device handling from the oacc_xform pass, and moves it into the builtin folder. I force the runtime version to be built with optimization and remove the expander too.

Expansion is rather later than I'm confortable with, but until we have use cases where it causes a problem, this is fine.

Bernd, I'd managed to confuse myself last week -- compiling w/o optimization can generate a different set of rtl dumps than with optimization, so I ended up seeing some stale ones.

Will prepare trunk  versions next ...

nathan
2015-09-29  Nathan Sidwell  <nat...@codesourcery.com>

	gcc/
	* omp-low.c (oacc_xform_on_device): Delete.
	(oacc_xform_dim): Return bool.
	(execute_oacc_transform): Don't handle acc_on_device here.  Adjust
	rescan logic.
	* builtins.c (expand_builtin_acc_on_device): Delete.
	(expand_builtin): Do not call it.
	(fold_builtin_1): Fold acc_on_device.

	libgomp/
	* oacc-init.c (acc_on_device): Compile with optimization.
	* config/nvptx/oacc-init.c (acc_on_device): Compile with optimization.

Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 228215)
+++ gcc/omp-low.c	(working copy)
@@ -14719,45 +14719,10 @@ make_pass_late_lower_omp (gcc::context *
   return new pass_late_lower_omp (ctxt);
 }
 
-/* Transform an acc_on_device call.  OpenACC 2.0a requires this folded at
-   compile time for constant operands.  We always fold it.  In an
-   offloaded function we're never 'none'.  */
-
-static void
-oacc_xform_on_device (gcall *call)
-{
-  tree arg = gimple_call_arg (call, 0);
-  unsigned val = GOMP_DEVICE_HOST;
-	      
-#ifdef ACCEL_COMPILER
-  val = GOMP_DEVICE_NOT_HOST;
-#endif
-  tree result = build2 (EQ_EXPR, boolean_type_node, arg,
-			build_int_cst (integer_type_node, val));
-#ifdef ACCEL_COMPILER
-  {
-    tree dev  = build2 (EQ_EXPR, boolean_type_node, arg,
-			build_int_cst (integer_type_node,
-				       ACCEL_COMPILER_acc_device));
-    result = build2 (TRUTH_OR_EXPR, boolean_type_node, result, dev);
-  }
-#endif
-  result = fold_convert (integer_type_node, result);
-  tree lhs = gimple_call_lhs (call);
-  gimple_seq seq = NULL;
-
-  push_gimplify_context (true);
-  gimplify_assign (lhs, result, &seq);
-  pop_gimplify_context (NULL);
-
-  gimple_stmt_iterator gsi = gsi_for_stmt (call);
-  gsi_replace_with_seq (&gsi, seq, false);
-}
-
 /* Transform oacc_dim_size and oacc_dim_pos internal function calls to
    constants, where possible.  */
 
-static void
+static bool
 oacc_xform_dim (gcall *call, const int dims[], bool is_pos)
 {
   tree arg = gimple_call_arg (call, 0);
@@ -14766,13 +14731,13 @@ oacc_xform_dim (gcall *call, const int d
 
   if (!size)
     /* Dimension size is dynamic.  */
-    return;
+    return false;
   
   if (is_pos)
     {
       if (size != 1)
 	/* Size is more than 1, so POS might be non-zero.  */
-	return;
+	return false;
       size = 0;
     }
 
@@ -14783,6 +14748,7 @@ oacc_xform_dim (gcall *call, const int d
 
   gimple_stmt_iterator gsi = gsi_for_stmt (call);
   gsi_replace (&gsi, g, false);
+  return true;
 }
 
 /* Validate and update the dimensions for offloaded FN.  ATTRS is the
@@ -14868,64 +14834,57 @@ execute_oacc_transform ()
     for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
       {
 	gimple *stmt = gsi_stmt (gsi);
-	int rescan = 0;
-	
 	if (!is_gimple_call (stmt))
 	  {
 	    gsi_next (&gsi);
 	    continue;
 	  }
 
+	gcall *call = as_a <gcall *> (stmt);
+	if (!gimple_call_internal_p (call))
+	  {
+	    gsi_next (&gsi);
+	    continue;
+	  }
+
 	/* Rewind to allow rescan.  */
 	gsi_prev (&gsi);
+	int rescan = 0;
+	unsigned ifn_code = gimple_call_internal_fn (call);
 
-	gcall *call = as_a <gcall *> (stmt);
-	
-	if (gimple_call_builtin_p (call, BUILT_IN_ACC_ON_DEVICE))
-	  /* acc_on_device must be evaluated at compile time for
-	     constant arguments.  */
+	switch (ifn_code)
 	  {
-	    oacc_xform_on_device (call);
+	  default: break;
+
+	  case IFN_GOACC_DIM_POS:
+	  case IFN_GOACC_DIM_SIZE:
+	    if (oacc_xform_dim (call, dims, ifn_code == IFN_GOACC_DIM_POS))
+	      rescan = 1;
+	    break;
+
+	  case IFN_GOACC_REDUCTION_SETUP:
+	  case IFN_GOACC_REDUCTION_INIT:
+	  case IFN_GOACC_REDUCTION_FINI:
+	  case IFN_GOACC_REDUCTION_TEARDOWN:
+	    /* Mark the function for SSA renaming.  */
+	    mark_virtual_operands_for_renaming (cfun);
+	    targetm.goacc.reduction (call);
 	    rescan = 1;
+	    break;
+
+	  case IFN_UNIQUE:
+	    {
+	      unsigned code = TREE_INT_CST_LOW (gimple_call_arg (call, 0));
+
+	      if ((code == IFN_UNIQUE_OACC_FORK
+		   || code == IFN_UNIQUE_OACC_JOIN)
+		  && (targetm.goacc.fork_join
+		      (call, dims, code == IFN_UNIQUE_OACC_FORK)))
+		rescan = -1;
+	      break;
+	    }
 	  }
-	else if (gimple_call_internal_p (call))
-	  {
-	    unsigned ifn_code = gimple_call_internal_fn (call);
-	    switch (ifn_code)
-	      {
-	      default: break;
-
-	      case IFN_GOACC_DIM_POS:
-	      case IFN_GOACC_DIM_SIZE:
-		oacc_xform_dim (call, dims, ifn_code == IFN_GOACC_DIM_POS);
-		rescan = 0;
-		break;
-
-	      case IFN_GOACC_REDUCTION_SETUP:
-	      case IFN_GOACC_REDUCTION_INIT:
-	      case IFN_GOACC_REDUCTION_FINI:
-	      case IFN_GOACC_REDUCTION_TEARDOWN:
-		/* Mark the function for SSA renaming.  */
-		mark_virtual_operands_for_renaming (cfun);
-		if (targetm.goacc.reduction (call))
-		  rescan = 1;
-		break;
-
-	      case IFN_UNIQUE:
-		{
-		  unsigned code;
-
-		  code = TREE_INT_CST_LOW (gimple_call_arg (call, 0));
-
-		  if ((code == IFN_UNIQUE_OACC_FORK
-		       || code == IFN_UNIQUE_OACC_JOIN)
-		      && (targetm.goacc.fork_join
-			  (call, dims, code == IFN_UNIQUE_OACC_FORK)))
-		    rescan = -1;
-		  break;
-		}
-	      }
-	  }
+
 	if (gsi_end_p (gsi))
 	  /* We rewound past the beginning of the BB.  */
 	  gsi = gsi_start_bb (bb);
@@ -14934,12 +14893,8 @@ execute_oacc_transform ()
 	  gsi_next (&gsi);
 
 	if (!rescan)
-	  {
-	    /* If not rescanning, advance over the call.  */
-	    if (gsi_end_p (gsi))
-	      break;
-	    gsi_next (&gsi);
-	  }
+	  /* If not rescanning, advance over the call.  */
+	  gsi_next (&gsi);
 	else if (rescan < 0)
 	  {
 	    replace_uses_by (gimple_vdef (call),
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 228215)
+++ gcc/builtins.c	(working copy)
@@ -5859,47 +5859,6 @@ expand_stack_save (void)
 }
 
 
-/* Expand OpenACC acc_on_device.  This is usually expanded in the
-   oacc_transform pass, earlier on, but if used outside of an offloaded region,
-   we'll find it here.  */
-
-static rtx
-expand_builtin_acc_on_device (tree exp, rtx target)
-{
-#ifndef ACCEL_COMPILER
-  gcc_assert (!get_oacc_fn_attrib (current_function_decl));
-#endif
-  
-  if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
-    return NULL_RTX;
-
-  tree arg = CALL_EXPR_ARG (exp, 0);
-
-  /* Return (arg == v1 || arg == v2) ? 1 : 0.  */
-  machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
-  rtx v = expand_normal (arg), v1, v2;
-#ifdef ACCEL_COMPILER
-  v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
-  v2 = GEN_INT (ACCEL_COMPILER_acc_device);
-#else
-  v1 = GEN_INT (GOMP_DEVICE_NONE);
-  v2 = GEN_INT (GOMP_DEVICE_HOST);
-#endif
-  machine_mode target_mode = TYPE_MODE (integer_type_node);
-  if (!target || !register_operand (target, target_mode))
-    target = gen_reg_rtx (target_mode);
-  emit_move_insn (target, const1_rtx);
-  rtx_code_label *done_label = gen_label_rtx ();
-  do_compare_rtx_and_jump (v, v1, EQ, false, v_mode, NULL_RTX,
-			   NULL, done_label, PROB_EVEN);
-  do_compare_rtx_and_jump (v, v2, EQ, false, v_mode, NULL_RTX,
-			   NULL, done_label, PROB_EVEN);
-  emit_move_insn (target, const0_rtx);
-  emit_label (done_label);
-
-  return target;
-}
-
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient
    (and in mode MODE if that's convenient).
@@ -7037,9 +6996,8 @@ expand_builtin (tree exp, rtx target, rt
       break;
 
     case BUILT_IN_ACC_ON_DEVICE:
-      target = expand_builtin_acc_on_device (exp, target);
-      if (target)
-	return target;
+      /* Do library call, if we failed to expand the builtin when
+	 folding.  */
       break;
 
     default:	/* just do library call, if unknown builtin */
@@ -10272,6 +10230,27 @@ fold_builtin_1 (location_t loc, tree fnd
 	return build_empty_stmt (loc);
       break;
 
+    case BUILT_IN_ACC_ON_DEVICE:
+      /* Don't fold on_device until we know which compiler is active.  */
+      if (symtab->state == EXPANSION)
+	{
+	  unsigned val_host = GOMP_DEVICE_HOST;
+	  unsigned val_dev = GOMP_DEVICE_NONE;
+
+#ifdef ACCEL_COMPILER
+	  val_host = GOMP_DEVICE_NOT_HOST;
+	  val_dev = ACCEL_COMPILER_acc_device;
+#endif
+	  tree host = build2 (EQ_EXPR, boolean_type_node, arg0,
+			      build_int_cst (integer_type_node, val_host));
+	  tree dev = build2 (EQ_EXPR, boolean_type_node, arg0,
+			     build_int_cst (integer_type_node, val_dev));
+
+	  tree result = build2 (TRUTH_OR_EXPR, boolean_type_node, host, dev);
+	  return fold_convert (integer_type_node, result);
+	}
+      break;
+
     default:
       break;
     }
Index: libgomp/oacc-init.c
===================================================================
--- libgomp/oacc-init.c	(revision 228215)
+++ libgomp/oacc-init.c	(working copy)
@@ -638,14 +638,12 @@ acc_set_device_num (int ord, acc_device_
 
 ialias (acc_set_device_num)
 
-/* The compiler always attempts to expand acc_on_device, but if the
-   user disables the builtin, or calls it via a pointer, we have this
-   version.  */
+/* Compile on_device with optimization, so that the compiler expands
+   this, rather than generating infinitely recursive code.  */
 
-int
+int __attribute__ ((__optimize__ ("O2")))
 acc_on_device (int dev)
 {
-  /* Just rely on the compiler builtin.  */
   return __builtin_acc_on_device (dev);
 }
 
Index: libgomp/config/nvptx/oacc-init.c
===================================================================
--- libgomp/config/nvptx/oacc-init.c	(revision 228215)
+++ libgomp/config/nvptx/oacc-init.c	(working copy)
@@ -28,13 +28,11 @@
 
 #include "openacc.h"
 
-/* The compiler always attempts to expand acc_on_device, but if the
-   user disables the builtin, or calls it via a pointer, we have this
-   version.  */
+/* Compile on_device with optimization, so that the compiler expands
+   this, rather than generating infinitely recursive code.  */
 
-int
+int __attribute__ ((__optimize__ ("O2")))
 acc_on_device (int dev)
 {
-  /* Just rely on the compiler builtin.  */
   return __builtin_acc_on_device (dev);
 }

Reply via email to