[PATCH] [OpenACC] Add tests for implied copy of variables in reduction clause.

2023-12-20 Thread Abid Qadeer
From: Hafiz Abid Qadeer 

The OpenACC reduction clause on compute construct implies a copy clause
for each reduction variable [1]. This patch adds tests to check if the
implied copy is being generated. The check covers various types and
operators as described in the specification.

gcc/testsuite/ChangeLog:

* c-c++-common/goacc/implied-copy-1.c: New test.
* c-c++-common/goacc/implied-copy-2.c: New test.
* g++.dg/goacc/implied-copy.C: New test.
* gcc.dg/goacc/implied-copy.c: New test.
* gfortran.dg/goacc/implied-copy-1.f90: New test.
* gfortran.dg/goacc/implied-copy-2.f90: New test.

[1] OpenACC 2.7 Specification section 2.5.13
---
 .../c-c++-common/goacc/implied-copy-1.c   |  33 
 .../c-c++-common/goacc/implied-copy-2.c   | 121 +
 gcc/testsuite/g++.dg/goacc/implied-copy.C |  24 +++
 gcc/testsuite/gcc.dg/goacc/implied-copy.c |  29 
 .../gfortran.dg/goacc/implied-copy-1.f90  |  35 
 .../gfortran.dg/goacc/implied-copy-2.f90  | 160 ++
 6 files changed, 402 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/implied-copy-1.c
 create mode 100644 gcc/testsuite/c-c++-common/goacc/implied-copy-2.c
 create mode 100644 gcc/testsuite/g++.dg/goacc/implied-copy.C
 create mode 100644 gcc/testsuite/gcc.dg/goacc/implied-copy.c
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/implied-copy-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/implied-copy-2.f90

diff --git a/gcc/testsuite/c-c++-common/goacc/implied-copy-1.c 
b/gcc/testsuite/c-c++-common/goacc/implied-copy-1.c
new file mode 100644
index 000..ae06339dc2d
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/implied-copy-1.c
@@ -0,0 +1,33 @@
+/* { dg-additional-options "-fdump-tree-gimple" } */
+
+/* Test for implied copy of reduction variable on combined construct.  */
+void test1 (void)
+{
+  int i, sum = 0, prod = 1, a[100];
+
+  #pragma acc kernels loop reduction(+:sum) reduction(*:prod)
+  for (int i = 0; i < 10; ++i)
+  {
+sum += a[i];
+prod *= a[i];
+  }
+
+  #pragma acc parallel loop reduction(+:sum) reduction(*:prod)
+  for (int i = 0; i < 10; ++i)
+  {
+sum += a[i];
+prod *= a[i];
+  }
+
+  #pragma acc serial loop reduction(+:sum) reduction(*:prod)
+  for (int i = 0; i < 10; ++i)
+  {
+sum += a[i];
+prod *= a[i];
+  }
+}
+
+/* { dg-final { scan-tree-dump-times "map\\(force_tofrom:sum \\\[len: 
\[0-9\]+\\\]\\)" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(force_tofrom:prod \\\[len: 
\[0-9\]+\\\]\\)" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:sum \\\[len: 
\[0-9\]+\\\]\\)" 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:prod \\\[len: 
\[0-9\]+\\\]\\)" 2 "gimple" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/implied-copy-2.c 
b/gcc/testsuite/c-c++-common/goacc/implied-copy-2.c
new file mode 100644
index 000..124f128964d
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/implied-copy-2.c
@@ -0,0 +1,121 @@
+/* { dg-additional-options "-fdump-tree-gimple" } */
+
+/* Test that reduction on compute construct implies a copy of the reduction
+  variable .  */
+
+#define n 1000
+
+#if __cplusplus
+  typedef bool BOOL;
+#else
+  typedef _Bool BOOL;
+#endif
+
+int
+main(void)
+{
+  int i;
+  int sum = 0;
+  int prod = 1;
+  int result = 0;
+  int tmp = 1;
+  int array[n];
+
+  double sumd = 0.0;
+  double arrayd[n];
+
+  float sumf = 0.0;
+  float arrayf[n];
+
+  char sumc;
+  char arrayc[n];
+
+  BOOL lres;
+
+#pragma acc parallel reduction(+:sum, sumf, sumd, sumc) reduction(*:prod)
+  for (i = 0; i < n; i++)
+{
+  sum += array[i];
+  sumf += arrayf[i];
+  sumd += arrayd[i];
+  sumc += arrayc[i];
+  prod *= array[i];
+}
+
+#pragma acc parallel reduction (max:result)
+  for (i = 0; i < n; i++)
+result = result > array[i] ? result : array[i];
+
+#pragma acc parallel reduction (min:result)
+  for (i = 0; i < n; i++)
+result = result < array[i] ? result : array[i];
+
+#pragma acc parallel reduction (&:result)
+  for (i = 0; i < n; i++)
+result &= array[i];
+
+#pragma acc parallel reduction (|:result)
+  for (i = 0; i < n; i++)
+result |= array[i];
+
+#pragma acc parallel reduction (^:result)
+  for (i = 0; i < n; i++)
+result ^= array[i];
+
+#pragma acc parallel reduction (&&:lres) copy(tmp)
+  for (i = 0; i < n; i++)
+lres = lres && (tmp > array[i]);
+
+#pragma acc parallel reduction (||:lres) copy(tmp)
+  for (i = 0; i < n; i++)
+lres = lres || (tmp > array[i]);
+
+  /* Same checks on serial construct.  */
+#pragma acc serial reduction(+:sum, sumf, sumd, sumc) reduction(*:prod)
+  for (i = 0; i < n; i++)
+{
+  sum += array[i];
+  sumf += arrayf[i];
+  sumd += arrayd[i];
+  s

[PATCH 0/5] [gfortran] Support for allocate directive (OpenMP 5.0)

2022-01-13 Thread Hafiz Abid Qadeer
This patch series add initial support for allocate directive in the
gfortran.  Although every allocate directive is parsed, only those
which are associated with an allocate statement are translated. The
lowering consists of replacing implicitly generated malloc/free call
from the allocate statement to GOMP_alloc and GOMP_free calls.

Hafiz Abid Qadeer (5):
  [gfortran] Add parsing support for allocate directive (OpenMP 5.0).
  [gfortran] Translate allocate directive (OpenMP 5.0).
  [gfortran] Handle cleanup of omp allocated variables (OpenMP 5.0).
  Gimplify allocate directive (OpenMP 5.0).
  Lower allocate directive  (OpenMP 5.0).

 gcc/doc/gimple.texi   |  38 ++-
 gcc/fortran/dump-parse-tree.c |   3 +
 gcc/fortran/gfortran.h|   5 +-
 gcc/fortran/match.h   |   1 +
 gcc/fortran/openmp.c  | 229 +-
 gcc/fortran/parse.c   |  10 +-
 gcc/fortran/resolve.c |   1 +
 gcc/fortran/st.c  |   1 +
 gcc/fortran/trans-decl.c  |  20 ++
 gcc/fortran/trans-openmp.c|  50 
 gcc/fortran/trans.c   |   1 +
 gcc/gimple-pretty-print.c |  37 +++
 gcc/gimple.c  |  10 +
 gcc/gimple.def|   6 +
 gcc/gimple.h  |  60 -
 gcc/gimplify.c|  19 ++
 gcc/gsstruct.def  |   1 +
 gcc/omp-low.c | 125 ++
 gcc/testsuite/gfortran.dg/gomp/allocate-4.f90 | 112 +
 gcc/testsuite/gfortran.dg/gomp/allocate-5.f90 |  73 ++
 gcc/testsuite/gfortran.dg/gomp/allocate-6.f90 |  84 +++
 gcc/tree-core.h   |   9 +
 gcc/tree-pretty-print.c   |  23 ++
 gcc/tree.c|   1 +
 gcc/tree.def  |   4 +
 gcc/tree.h|  15 ++
 .../testsuite/libgomp.fortran/allocate-1.c|   7 +
 .../testsuite/libgomp.fortran/allocate-2.f90  |  49 
 28 files changed, 986 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-4.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-5.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-6.f90
 create mode 100644 libgomp/testsuite/libgomp.fortran/allocate-1.c
 create mode 100644 libgomp/testsuite/libgomp.fortran/allocate-2.f90

-- 
2.25.1



[PATCH 1/5] [gfortran] Add parsing support for allocate directive (OpenMP 5.0).

2022-01-13 Thread Hafiz Abid Qadeer
Currently we only make use of this directive when it is associated
with an allocate statement.

gcc/fortran/ChangeLog:

* dump-parse-tree.c (show_omp_node): Handle EXEC_OMP_ALLOCATE.
(show_code_node): Likewise.
* gfortran.h (enum gfc_statement): Add ST_OMP_ALLOCATE.
(OMP_LIST_ALLOCATOR): New enum value.
(enum gfc_exec_op): Add EXEC_OMP_ALLOCATE.
* match.h (gfc_match_omp_allocate): New function.
* openmp.c (enum omp_mask1): Add OMP_CLAUSE_ALLOCATOR.
(OMP_ALLOCATE_CLAUSES): New define.
(gfc_match_omp_allocate): New function.
(resolve_omp_clauses): Add ALLOCATOR in clause_names.
(omp_code_to_statement): Handle EXEC_OMP_ALLOCATE.
(EMPTY_VAR_LIST): New define.
(check_allocate_directive_restrictions): New function.
(gfc_resolve_omp_allocate): Likewise.
(gfc_resolve_omp_directive): Handle EXEC_OMP_ALLOCATE.
* parse.c (decode_omp_directive): Handle ST_OMP_ALLOCATE.
(next_statement): Likewise.
(gfc_ascii_statement): Likewise.
* resolve.c (gfc_resolve_code): Handle EXEC_OMP_ALLOCATE.
* st.c (gfc_free_statement): Likewise.
* trans.c (trans_code): Likewise

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/allocate-4.f90: New test.
* gfortran.dg/gomp/allocate-5.f90: New test.
---
 gcc/fortran/dump-parse-tree.c |   3 +
 gcc/fortran/gfortran.h|   4 +-
 gcc/fortran/match.h   |   1 +
 gcc/fortran/openmp.c  | 199 +-
 gcc/fortran/parse.c   |  10 +-
 gcc/fortran/resolve.c |   1 +
 gcc/fortran/st.c  |   1 +
 gcc/fortran/trans.c   |   1 +
 gcc/testsuite/gfortran.dg/gomp/allocate-4.f90 | 112 ++
 gcc/testsuite/gfortran.dg/gomp/allocate-5.f90 |  73 +++
 10 files changed, 400 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-4.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-5.f90

diff --git a/gcc/fortran/dump-parse-tree.c b/gcc/fortran/dump-parse-tree.c
index 7459f4b89a9..38fef42150a 100644
--- a/gcc/fortran/dump-parse-tree.c
+++ b/gcc/fortran/dump-parse-tree.c
@@ -1993,6 +1993,7 @@ show_omp_node (int level, gfc_code *c)
 case EXEC_OACC_CACHE: name = "CACHE"; is_oacc = true; break;
 case EXEC_OACC_ENTER_DATA: name = "ENTER DATA"; is_oacc = true; break;
 case EXEC_OACC_EXIT_DATA: name = "EXIT DATA"; is_oacc = true; break;
+case EXEC_OMP_ALLOCATE: name = "ALLOCATE"; break;
 case EXEC_OMP_ATOMIC: name = "ATOMIC"; break;
 case EXEC_OMP_BARRIER: name = "BARRIER"; break;
 case EXEC_OMP_CANCEL: name = "CANCEL"; break;
@@ -2194,6 +2195,7 @@ show_omp_node (int level, gfc_code *c)
   || c->op == EXEC_OMP_TARGET_UPDATE || c->op == EXEC_OMP_TARGET_ENTER_DATA
   || c->op == EXEC_OMP_TARGET_EXIT_DATA || c->op == EXEC_OMP_SCAN
   || c->op == EXEC_OMP_DEPOBJ || c->op == EXEC_OMP_ERROR
+  || c->op == EXEC_OMP_ALLOCATE
   || (c->op == EXEC_OMP_ORDERED && c->block == NULL))
 return;
   if (c->op == EXEC_OMP_SECTIONS || c->op == EXEC_OMP_PARALLEL_SECTIONS)
@@ -3314,6 +3316,7 @@ show_code_node (int level, gfc_code *c)
 case EXEC_OACC_CACHE:
 case EXEC_OACC_ENTER_DATA:
 case EXEC_OACC_EXIT_DATA:
+case EXEC_OMP_ALLOCATE:
 case EXEC_OMP_ATOMIC:
 case EXEC_OMP_CANCEL:
 case EXEC_OMP_CANCELLATION_POINT:
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 3b791a4f6be..79a43a2fdf0 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -259,7 +259,7 @@ enum gfc_statement
   ST_OACC_CACHE, ST_OACC_KERNELS_LOOP, ST_OACC_END_KERNELS_LOOP,
   ST_OACC_SERIAL_LOOP, ST_OACC_END_SERIAL_LOOP, ST_OACC_SERIAL,
   ST_OACC_END_SERIAL, ST_OACC_ENTER_DATA, ST_OACC_EXIT_DATA, ST_OACC_ROUTINE,
-  ST_OACC_ATOMIC, ST_OACC_END_ATOMIC,
+  ST_OACC_ATOMIC, ST_OACC_END_ATOMIC, ST_OMP_ALLOCATE,
   ST_OMP_ATOMIC, ST_OMP_BARRIER, ST_OMP_CRITICAL, ST_OMP_END_ATOMIC,
   ST_OMP_END_CRITICAL, ST_OMP_END_DO, ST_OMP_END_MASTER, ST_OMP_END_ORDERED,
   ST_OMP_END_PARALLEL, ST_OMP_END_PARALLEL_DO, ST_OMP_END_PARALLEL_SECTIONS,
@@ -1392,6 +1392,7 @@ enum
   OMP_LIST_USE_DEVICE_PTR,
   OMP_LIST_USE_DEVICE_ADDR,
   OMP_LIST_NONTEMPORAL,
+  OMP_LIST_ALLOCATOR,
   OMP_LIST_NUM
 };
 
@@ -2893,6 +2894,7 @@ enum gfc_exec_op
   EXEC_OACC_DATA, EXEC_OACC_HOST_DATA, EXEC_OACC_LOOP, EXEC_OACC_UPDATE,
   EXEC_OACC_WAIT, EXEC_OACC_CACHE, EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA,
   EXEC_OACC_ATOMIC, EXEC_OACC_DECLARE,
+  EXEC_OMP_ALLOCATE,
   EXEC_OMP_CRITICAL, EXEC_OMP_DO, EXEC_OMP_FLUSH, EXEC_OMP_MASTER,
   EXEC_OMP_ORDERED, EXEC_OMP_PARALLEL, EXEC_OMP_PARALLEL_DO,
   EXEC_OMP_PARALLEL_SECTIONS, EXEC_OMP_PARALLEL_WORKSHARE,
diff --git a/gcc/fortran/match.h b/gcc/fortran/match.h
index 65ee3b6cb41..9f0449eda0e 100644
--- a/gcc/fortran/match.h
+++ b/gcc/fortr

[PATCH 2/5] [gfortran] Translate allocate directive (OpenMP 5.0).

2022-01-13 Thread Hafiz Abid Qadeer
gcc/fortran/ChangeLog:

* trans-openmp.c (gfc_trans_omp_clauses): Handle OMP_LIST_ALLOCATOR.
(gfc_trans_omp_allocate): New function.
(gfc_trans_omp_directive): Handle EXEC_OMP_ALLOCATE.

gcc/ChangeLog:

* tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE_ALLOCATOR.
(dump_generic_node): Handle OMP_ALLOCATE.
* tree.def (OMP_ALLOCATE): New.
* tree.h (OMP_ALLOCATE_CLAUSES): Likewise.
(OMP_ALLOCATE_DECL): Likewise.
(OMP_ALLOCATE_ALLOCATOR): Likewise.
* tree.c (omp_clause_num_ops): Add entry for OMP_CLAUSE_ALLOCATOR.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/allocate-6.f90: New test.
---
 gcc/fortran/trans-openmp.c| 44 
 gcc/testsuite/gfortran.dg/gomp/allocate-6.f90 | 72 +++
 gcc/tree-core.h   |  3 +
 gcc/tree-pretty-print.c   | 19 +
 gcc/tree.c|  1 +
 gcc/tree.def  |  4 ++
 gcc/tree.h| 11 +++
 7 files changed, 154 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-6.f90

diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 9661c77f905..cb389f40370 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -2649,6 +2649,28 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
  }
  }
  break;
+   case OMP_LIST_ALLOCATOR:
+ for (; n != NULL; n = n->next)
+   if (n->sym->attr.referenced)
+ {
+   tree t = gfc_trans_omp_variable (n->sym, false);
+   if (t != error_mark_node)
+ {
+   tree node = build_omp_clause (input_location,
+ OMP_CLAUSE_ALLOCATOR);
+   OMP_ALLOCATE_DECL (node) = t;
+   if (n->expr)
+ {
+   tree allocator_;
+   gfc_init_se (&se, NULL);
+   gfc_conv_expr (&se, n->expr);
+   allocator_ = gfc_evaluate_now (se.expr, block);
+   OMP_ALLOCATE_ALLOCATOR (node) = allocator_;
+ }
+   omp_clauses = gfc_trans_add_clause (node, omp_clauses);
+ }
+ }
+ break;
case OMP_LIST_LINEAR:
  {
gfc_expr *last_step_expr = NULL;
@@ -4888,6 +4910,26 @@ gfc_trans_omp_atomic (gfc_code *code)
   return gfc_finish_block (&block);
 }
 
+static tree
+gfc_trans_omp_allocate (gfc_code *code)
+{
+  stmtblock_t block;
+  tree stmt;
+
+  gfc_omp_clauses *clauses = code->ext.omp_clauses;
+  gcc_assert (clauses);
+
+  gfc_start_block (&block);
+  stmt = make_node (OMP_ALLOCATE);
+  TREE_TYPE (stmt) = void_type_node;
+  OMP_ALLOCATE_CLAUSES (stmt) = gfc_trans_omp_clauses (&block, clauses,
+  code->loc, false,
+  true);
+  gfc_add_expr_to_block (&block, stmt);
+  gfc_merge_block_scope (&block);
+  return gfc_finish_block (&block);
+}
+
 static tree
 gfc_trans_omp_barrier (void)
 {
@@ -7280,6 +7322,8 @@ gfc_trans_omp_directive (gfc_code *code)
 {
   switch (code->op)
 {
+case EXEC_OMP_ALLOCATE:
+  return gfc_trans_omp_allocate (code);
 case EXEC_OMP_ATOMIC:
   return gfc_trans_omp_atomic (code);
 case EXEC_OMP_BARRIER:
diff --git a/gcc/testsuite/gfortran.dg/gomp/allocate-6.f90 
b/gcc/testsuite/gfortran.dg/gomp/allocate-6.f90
new file mode 100644
index 000..2de2b52ee44
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/allocate-6.f90
@@ -0,0 +1,72 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original" }
+
+module omp_lib_kinds
+  use iso_c_binding, only: c_int, c_intptr_t
+  implicit none
+  private :: c_int, c_intptr_t
+  integer, parameter :: omp_allocator_handle_kind = c_intptr_t
+
+  integer (kind=omp_allocator_handle_kind), &
+ parameter :: omp_null_allocator = 0
+  integer (kind=omp_allocator_handle_kind), &
+ parameter :: omp_default_mem_alloc = 1
+  integer (kind=omp_allocator_handle_kind), &
+ parameter :: omp_large_cap_mem_alloc = 2
+  integer (kind=omp_allocator_handle_kind), &
+ parameter :: omp_const_mem_alloc = 3
+  integer (kind=omp_allocator_handle_kind), &
+ parameter :: omp_high_bw_mem_alloc = 4
+  integer (kind=omp_allocator_handle_kind), &
+ parameter :: omp_low_lat_mem_alloc = 5
+  integer (kind=omp_allocator_handle_kind), &
+ parameter :: omp_cgroup_mem_alloc = 6
+  integer (kind=omp_allocator_handle_kind), &
+ parameter :: omp_pteam_mem_alloc = 7
+  integer (kind=omp_allocator_handle_kind), &
+ parameter :: omp_thread_mem_alloc = 8
+end module
+
+
+subroutine foo(x, y, al)
+  use omp_lib_kinds
+  implicit none
+  
+type :: my_type
+  in

[PATCH 3/5] [gfortran] Handle cleanup of omp allocated variables (OpenMP 5.0).

2022-01-13 Thread Hafiz Abid Qadeer
Currently we are only handling omp allocate directive that is associated
with an allocate statement.  This statement results in malloc and free calls.
The malloc calls are easy to get to as they are in the same block as allocate
directive.  But the free calls come in a separate cleanup block.  To help any
later passes finding them, an allocate directive is generated in the
cleanup block with kind=free. The normal allocate directive is given
kind=allocate.

gcc/fortran/ChangeLog:

* gfortran.h (struct access_ref): Declare new members
omp_allocated and omp_allocated_end.
* openmp.c (gfc_match_omp_allocate): Set new_st.resolved_sym to
NULL.
(prepare_omp_allocated_var_list_for_cleanup): New function.
(gfc_resolve_omp_allocate): Call it.
* trans-decl.c (gfc_trans_deferred_vars): Process omp_allocated.
* trans-openmp.c (gfc_trans_omp_allocate): Set kind for the stmt
generated for allocate directive.

gcc/ChangeLog:

* tree-core.h (struct tree_base): Add comments.
* tree-pretty-print.c (dump_generic_node): Handle allocate directive
kind.
* tree.h (OMP_ALLOCATE_KIND_ALLOCATE): New define.
(OMP_ALLOCATE_KIND_FREE): Likewise.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/allocate-6.f90: Test kind of allocate directive.
---
 gcc/fortran/gfortran.h|  1 +
 gcc/fortran/openmp.c  | 30 +++
 gcc/fortran/trans-decl.c  | 20 +
 gcc/fortran/trans-openmp.c|  6 
 gcc/testsuite/gfortran.dg/gomp/allocate-6.f90 |  3 +-
 gcc/tree-core.h   |  6 
 gcc/tree-pretty-print.c   |  4 +++
 gcc/tree.h|  4 +++
 8 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 79a43a2fdf0..6a43847d31f 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -1820,6 +1820,7 @@ typedef struct gfc_symbol
   gfc_array_spec *as;
   struct gfc_symbol *result;   /* function result symbol */
   gfc_component *components;   /* Derived type components */
+  gfc_omp_namelist *omp_allocated, *omp_allocated_end;
 
   /* Defined only for Cray pointees; points to their pointer.  */
   struct gfc_symbol *cp_pointer;
diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index ee7c39980bb..f11812b0b12 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -5818,6 +5818,7 @@ gfc_match_omp_allocate (void)
 
   new_st.op = EXEC_OMP_ALLOCATE;
   new_st.ext.omp_clauses = c;
+  new_st.resolved_sym = NULL;
   gfc_free_expr (allocator);
   return MATCH_YES;
 }
@@ -9049,6 +9050,34 @@ gfc_resolve_oacc_routines (gfc_namespace *ns)
 }
 }
 
+static void
+prepare_omp_allocated_var_list_for_cleanup (gfc_omp_namelist *cn, locus loc)
+{
+  gfc_symbol *proc = cn->sym->ns->proc_name;
+  gfc_omp_namelist *p, *n;
+
+  for (n = cn; n; n = n->next)
+{
+  if (n->sym->attr.allocatable && !n->sym->attr.save
+ && !n->sym->attr.result && !proc->attr.is_main_program)
+   {
+ p = gfc_get_omp_namelist ();
+ p->sym = n->sym;
+ p->expr = gfc_copy_expr (n->expr);
+ p->where = loc;
+ p->next = NULL;
+ if (proc->omp_allocated == NULL)
+   proc->omp_allocated_end = proc->omp_allocated = p;
+ else
+   {
+ proc->omp_allocated_end->next = p;
+ proc->omp_allocated_end = p;
+   }
+
+   }
+}
+}
+
 static void
 check_allocate_directive_restrictions (gfc_symbol *sym, gfc_expr *omp_al,
   gfc_namespace *ns, locus loc)
@@ -9179,6 +9208,7 @@ gfc_resolve_omp_allocate (gfc_code *code, gfc_namespace 
*ns)
 code->loc);
}
 }
+  prepare_omp_allocated_var_list_for_cleanup (cn, code->loc);
 }
 
 
diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index 066fb3a5f61..e5c9bf413e7 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -4583,6 +4583,26 @@ gfc_trans_deferred_vars (gfc_symbol * proc_sym, 
gfc_wrapped_block * block)
  }
 }
 
+  /* Generate a dummy allocate pragma with free kind so that cleanup
+ of those variables which were allocated using the allocate statement
+ associated with an allocate clause happens correctly.  */
+
+  if (proc_sym->omp_allocated)
+{
+  gfc_clear_new_st ();
+  new_st.op = EXEC_OMP_ALLOCATE;
+  gfc_omp_clauses *c = gfc_get_omp_clauses ();
+  c->lists[OMP_LIST_ALLOCATOR] = proc_sym->omp_allocated;
+  new_st.ext.omp_clauses = c;
+  /* This is just a hacky way to convey to handler that we are
+dealing with cleanup here.  Saves us from using another field
+for it.  */
+  new_st.resolved_sym = proc_sym->omp_allocated->sym;
+  gfc_add_init_cleanup (block, NULL,
+ 

[PATCH 4/5] [gfortran] Gimplify allocate directive (OpenMP 5.0).

2022-01-13 Thread Hafiz Abid Qadeer
gcc/ChangeLog:

* doc/gimple.texi: Describe GIMPLE_OMP_ALLOCATE.
* gimple-pretty-print.c (dump_gimple_omp_allocate): New function.
(pp_gimple_stmt_1): Call it.
* gimple.c (gimple_build_omp_allocate): New function.
* gimple.def (GIMPLE_OMP_ALLOCATE): New node.
* gimple.h (enum gf_mask): Add GF_OMP_ALLOCATE_KIND_MASK,
GF_OMP_ALLOCATE_KIND_ALLOCATE and GF_OMP_ALLOCATE_KIND_FREE.
(struct gomp_allocate): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(gimple_build_omp_allocate): Declare.
(gimple_omp_subcode): Replace GIMPLE_OMP_TEAMS with
GIMPLE_OMP_ALLOCATE.
(gimple_omp_allocate_set_clauses): New.
(gimple_omp_allocate_set_kind): Likewise.
(gimple_omp_allocate_clauses): Likewise.
(gimple_omp_allocate_kind): Likewise.
(CASE_GIMPLE_OMP): Add GIMPLE_OMP_ALLOCATE.
* gimplify.c (gimplify_omp_allocate): New.
(gimplify_expr): Call it.
* gsstruct.def (GSS_OMP_ALLOCATE): Define.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/allocate-6.f90: Add tests.
---
 gcc/doc/gimple.texi   | 38 +++-
 gcc/gimple-pretty-print.c | 37 
 gcc/gimple.c  | 10 
 gcc/gimple.def|  6 ++
 gcc/gimple.h  | 60 ++-
 gcc/gimplify.c| 19 ++
 gcc/gsstruct.def  |  1 +
 gcc/testsuite/gfortran.dg/gomp/allocate-6.f90 |  4 +-
 8 files changed, 171 insertions(+), 4 deletions(-)

diff --git a/gcc/doc/gimple.texi b/gcc/doc/gimple.texi
index 65ef63d6ee9..60a4d2c17ca 100644
--- a/gcc/doc/gimple.texi
+++ b/gcc/doc/gimple.texi
@@ -420,6 +420,9 @@ kinds, along with their relationships to @code{GSS_} values 
(layouts) and
  + gomp_continue
  |layout: GSS_OMP_CONTINUE, code: GIMPLE_OMP_CONTINUE
  |
+ + gomp_allocate
+ |layout: GSS_OMP_ALLOCATE, code: GIMPLE_OMP_ALLOCATE
+ |
  + gomp_atomic_load
  |layout: GSS_OMP_ATOMIC_LOAD, code: GIMPLE_OMP_ATOMIC_LOAD
  |
@@ -454,6 +457,7 @@ The following table briefly describes the GIMPLE 
instruction set.
 @item @code{GIMPLE_GOTO}   @tab x  @tab x
 @item @code{GIMPLE_LABEL}  @tab x  @tab x
 @item @code{GIMPLE_NOP}@tab x  @tab x
+@item @code{GIMPLE_OMP_ALLOCATE}   @tab x  @tab x
 @item @code{GIMPLE_OMP_ATOMIC_LOAD}@tab x  @tab x
 @item @code{GIMPLE_OMP_ATOMIC_STORE}   @tab x  @tab x
 @item @code{GIMPLE_OMP_CONTINUE}   @tab x  @tab x
@@ -1029,6 +1033,7 @@ Return a deep copy of statement @code{STMT}.
 * @code{GIMPLE_LABEL}::
 * @code{GIMPLE_GOTO}::
 * @code{GIMPLE_NOP}::
+* @code{GIMPLE_OMP_ALLOCATE}::
 * @code{GIMPLE_OMP_ATOMIC_LOAD}::
 * @code{GIMPLE_OMP_ATOMIC_STORE}::
 * @code{GIMPLE_OMP_CONTINUE}::
@@ -1729,6 +1734,38 @@ Build a @code{GIMPLE_NOP} statement.
 Returns @code{TRUE} if statement @code{G} is a @code{GIMPLE_NOP}.
 @end deftypefn
 
+@node @code{GIMPLE_OMP_ALLOCATE}
+@subsection @code{GIMPLE_OMP_ALLOCATE}
+@cindex @code{GIMPLE_OMP_ALLOCATE}
+
+@deftypefn {GIMPLE function} gomp_allocate *gimple_build_omp_allocate ( @
+tree clauses, int kind)
+Build a @code{GIMPLE_OMP_ALLOCATE} statement.  @code{CLAUSES} is the clauses
+associated with this node.  @code{KIND} is the enumeration value
+@code{GF_OMP_ALLOCATE_KIND_ALLOCATE} if this directive allocates memory
+or @code{GF_OMP_ALLOCATE_KIND_FREE} if it de-allocates.
+@end deftypefn
+
+@deftypefn {GIMPLE function} void gimple_omp_allocate_set_clauses ( @
+gomp_allocate *g, tree clauses)
+Set the @code{CLAUSES} for a @code{GIMPLE_OMP_ALLOCATE}.
+@end deftypefn
+
+@deftypefn {GIMPLE function} tree gimple_omp_aallocate_clauses ( @
+const gomp_allocate *g)
+Get the @code{CLAUSES} of a @code{GIMPLE_OMP_ALLOCATE}.
+@end deftypefn
+
+@deftypefn {GIMPLE function} void gimple_omp_allocate_set_kind ( @
+gomp_allocate *g, int kind)
+Set the @code{KIND} for a @code{GIMPLE_OMP_ALLOCATE}.
+@end deftypefn
+
+@deftypefn {GIMPLE function} tree gimple_omp_allocate_kind ( @
+const gomp_atomic_load *g)
+Get the @code{KIND} of a @code{GIMPLE_OMP_ALLOCATE}.
+@end deftypefn
+
 @node @code{GIMPLE_OMP_ATOMIC_LOAD}
 @subsection @code{GIMPLE_OMP_ATOMIC_LOAD}
 @cindex @code{GIMPLE_OMP_ATOMIC_LOAD}
@@ -1760,7 +1797,6 @@ const gomp_atomic_load *g)
 Get the @code{RHS} of an atomic set.
 @end deftypefn
 
-
 @node @code{GIMPLE_OMP_ATOMIC_STORE}
 @subsection @code{GIMPLE_OMP_ATOMIC_STORE}
 @cindex @code{GIMPLE_OMP_ATOMIC_STORE}
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index ebd87b20a0a..bb961a900df 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -1967,6 +1967,38 @@ dump_gimple_omp_critical (pretty_printer *bu

[PATCH 5/5] [gfortran] Lower allocate directive (OpenMP 5.0).

2022-01-13 Thread Hafiz Abid Qadeer
This patch looks for malloc/free calls that were generated by allocate statement
that is associated with allocate directive and replaces them with GOMP_alloc
and GOMP_free.

gcc/ChangeLog:

* omp-low.c (scan_sharing_clauses): Handle OMP_CLAUSE_ALLOCATOR.
(scan_omp_allocate): New.
(scan_omp_1_stmt): Call it.
(lower_omp_allocate): New function.
(lower_omp_1): Call it.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/allocate-6.f90: Add tests.

libgomp/ChangeLog:

* testsuite/libgomp.fortran/allocate-1.c: New test.
* testsuite/libgomp.fortran/allocate-2.f90: New test.
---
 gcc/omp-low.c | 125 ++
 gcc/testsuite/gfortran.dg/gomp/allocate-6.f90 |   9 ++
 .../testsuite/libgomp.fortran/allocate-1.c|   7 +
 .../testsuite/libgomp.fortran/allocate-2.f90  |  49 +++
 4 files changed, 190 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.fortran/allocate-1.c
 create mode 100644 libgomp/testsuite/libgomp.fortran/allocate-2.f90

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index f2237428de1..8a0ae3932b9 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -1684,6 +1684,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_FINALIZE:
case OMP_CLAUSE_TASK_REDUCTION:
case OMP_CLAUSE_ALLOCATE:
+   case OMP_CLAUSE_ALLOCATOR:
  break;
 
case OMP_CLAUSE_ALIGNED:
@@ -1892,6 +1893,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_FINALIZE:
case OMP_CLAUSE_FILTER:
case OMP_CLAUSE__CONDTEMP_:
+   case OMP_CLAUSE_ALLOCATOR:
  break;
 
case OMP_CLAUSE__CACHE_:
@@ -2962,6 +2964,16 @@ scan_omp_simd_scan (gimple_stmt_iterator *gsi, gomp_for 
*stmt,
   maybe_lookup_ctx (new_stmt)->for_simd_scan_phase = true;
 }
 
+/* Scan an OpenMP allocate directive.  */
+
+static void
+scan_omp_allocate (gomp_allocate *stmt, omp_context *outer_ctx)
+{
+  omp_context *ctx;
+  ctx = new_omp_context (stmt, outer_ctx);
+  scan_sharing_clauses (gimple_omp_allocate_clauses (stmt), ctx);
+}
+
 /* Scan an OpenMP sections directive.  */
 
 static void
@@ -4247,6 +4259,9 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool 
*handled_ops_p,
insert_decl_map (&ctx->cb, var, var);
   }
   break;
+case GIMPLE_OMP_ALLOCATE:
+  scan_omp_allocate (as_a  (stmt), ctx);
+  break;
 default:
   *handled_ops_p = false;
   break;
@@ -8680,6 +8695,111 @@ lower_omp_single_simple (gomp_single *single_stmt, 
gimple_seq *pre_p)
   gimple_seq_add_stmt (pre_p, gimple_build_label (flabel));
 }
 
+static void
+lower_omp_allocate (gimple_stmt_iterator *gsi_p, omp_context *)
+{
+  gomp_allocate *st = as_a  (gsi_stmt (*gsi_p));
+  tree clauses = gimple_omp_allocate_clauses (st);
+  int kind = gimple_omp_allocate_kind (st);
+  gcc_assert (kind == GF_OMP_ALLOCATE_KIND_ALLOCATE
+ || kind == GF_OMP_ALLOCATE_KIND_FREE);
+  bool allocate = (kind == GF_OMP_ALLOCATE_KIND_ALLOCATE);
+
+  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+{
+  if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_ALLOCATOR)
+   continue;
+  tree var = OMP_ALLOCATE_DECL (c);
+
+  gimple_stmt_iterator gsi = *gsi_p;
+  for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
+   {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (gimple_code (stmt) != GIMPLE_CALL
+ || (allocate && gimple_call_fndecl (stmt)
+ != builtin_decl_explicit (BUILT_IN_MALLOC))
+ || (!allocate && gimple_call_fndecl (stmt)
+ != builtin_decl_explicit (BUILT_IN_FREE)))
+   continue;
+ const gcall *gs = as_a  (stmt);
+ tree allocator = OMP_ALLOCATE_ALLOCATOR (c)
+  ? OMP_ALLOCATE_ALLOCATOR (c)
+  : integer_zero_node;
+ if (allocate)
+   {
+ tree lhs = gimple_call_lhs (gs);
+ if (lhs && TREE_CODE (lhs) == SSA_NAME)
+   {
+ gimple_stmt_iterator gsi2 = gsi;
+ gsi_next (&gsi2);
+ gimple *assign = gsi_stmt (gsi2);
+ if (gimple_code (assign) == GIMPLE_ASSIGN)
+   {
+ lhs = gimple_assign_lhs (as_a  (assign));
+ if (lhs == NULL_TREE
+ || TREE_CODE (lhs) != COMPONENT_REF)
+   continue;
+ lhs = TREE_OPERAND (lhs, 0);
+   }
+   }
+
+ if (lhs == var)
+   {
+ unsigned HOST_WIDE_INT ialign = 0;
+ tree align;
+ if (TYPE_P (var))
+   ialign = TYPE_ALIGN_UNIT (var);
+ else
+   ialign = DECL_ALIGN_UNIT (var);
+ align = build_int_cst (size_type_node, ialign);
+ tree repl = builtin_decl_explicit (BUILT_IN_GOMP_

Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-01-17 Thread Hafiz Abid Qadeer
On 14/01/2022 12:20, Tobias Burnus wrote:
> On 14.01.22 12:55, Jakub Jelinek via Fortran wrote:
>> If we want to check intptr_t, we should guard the dg-error with
>> "" { target { lp64 || llp64 } }
>> or so.
> 
> Well, if we want to use intptr_t, we could use be explicitly as with:
> 
>   use iso_c_binding, only: c_intptr_t
>   ! use omp_lib, only: omp_allocator_handle_kind
>   ...  ('implicit none' etc.)
>   integer, parameter :: omp_allocator_handle_kind = c_intptr_t
>   ...
>   integer(kind=omp_allocator_handle_kind)
> 
> (@Abid: The 'use omp_lib' line is commented as in gcc/testsuite/*/gomp/,
> the OpenMP module/header from libgomp is not available - and then a
> stub parameter is created.)
> 
>> Otherwise yes, we can add some other kind and hope it is not the
>> same as omp_allocator_handle_kind.  Or we can do both,
>> keep the current one with the target lp64 || llp64 and
>> add another one with some integer(kind=1).
> 
> For just testing something invalid, I think it makes more sense to just set 
> it to kind=1.
> 
> For checking the valid value, using c_intptr_t seems to make more sense than 
> restricting it to (l)l64.

Problem was with the literal 0 having same kind as c_intptr_t for m32. So there 
was no diagnostic in
that case. I am going to change it 0_1 to make the test more robust.

Thanks,
-- 
Hafiz Abid Qadeer
Mentor, a Siemens Business


[Committed] Change kind of integer literal to fix a testcase.

2022-01-17 Thread Hafiz Abid Qadeer
As Thomas reported in
https://gcc.gnu.org/pipermail/gcc-patches/2022-January/588448.html
a test added in my recent allocate clause patch fails on m32. It was due
to default kind for integer matching c_intptr_t for m32. I have now
changed it to 0_1 so that always integer with kind=1 is used.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/allocate-2.f90: Change 0 to 0_1.
---
 gcc/testsuite/gfortran.dg/gomp/allocate-2.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gfortran.dg/gomp/allocate-2.f90 
b/gcc/testsuite/gfortran.dg/gomp/allocate-2.f90
index 88b2d26872d..657ff44d023 100644
--- a/gcc/testsuite/gfortran.dg/gomp/allocate-2.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/allocate-2.f90
@@ -33,7 +33,7 @@ subroutine foo(x)
   x=5
   !$omp end parallel
 
-  !$omp parallel allocate (0: x) private(x) ! { dg-error "Expected integer 
expression of the 'omp_allocator_handle_kind' kind at .1." }
+  !$omp parallel allocate (0_1: x) private(x) ! { dg-error "Expected integer 
expression of the 'omp_allocator_handle_kind' kind at .1." }
   x=6
   !$omp end parallel
   
-- 
2.25.1



Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-01-24 Thread Hafiz Abid Qadeer
On 24/01/2022 08:45, Tobias Burnus wrote:
> On 21.01.22 18:43, Tobias Burnus wrote:
>> On 21.01.22 18:15, Thomas Schwinge wrote:
>>>     11 | integer(c_int) function is_64bit_aligned (a) bind(C)
>>>  Warning: Variable ‘a’ at (1) is a dummy argument of the BIND(C) 
>>> procedure ‘is_64bit_aligned’
>>> but may not be C interoperable [-Wc-binding-type]
>>>
>>> Is that something to worry about?
> I have attached a patch (not commited), which silences the three kind of 
> warnings and fixes the
> interface issue.
> TODO: commit it.
> 
> On 21.01.22 18:15, Thomas Schwinge wrote:
>> I'm seeing this test case randomly/non-deterministically FAIL to execute,
>> differently on different systems and runs, for example: [...]
>> I'd assume there's some concurrency issue: the problem disappears if I
>> manually specify a lowerish 'OMP_NUM_THREADS'
> 
> If one compiles the program with -fsanitize=thread, it shows tons of errors 
> :-(
> The first one is:
> 
> WARNING: ThreadSanitizer: data race (pid=3034413)
>   Read of size 8 at 0x7fff8b5a8340 by thread T1:
>     #0 __m_MOD_foo._omp_fn.2 
> ../../libgomp/testsuite/libgomp.fortran/allocate-1.f90:116
> (a.out+0x402a88)
>     #1 gomp_thread_start ../../../repos/gcc-trunk-commit/libgomp/team.c:129 
> (libgomp.so.1+0x1e5ed)
> 
>   Previous write of size 8 at 0x7fff8b5a8340 by main thread:
>     #0 __m_MOD_foo._omp_fn.1 
> ../../libgomp/testsuite/libgomp.fortran/allocate-1.f90:116
> (a.out+0x4029c0)
>     #1 GOMP_teams_reg ../../../repos/gcc-trunk-commit/libgomp/teams.c:51 
> (libgomp.so.1+0x3638c)
>     #2 MAIN__ ../../libgomp/testsuite/libgomp.fortran/allocate-1.f90:328 
> (a.out+0x4024c0)
>     #3 main ../../libgomp/testsuite/libgomp.fortran/allocate-1.f90:312 
> (a.out+0x4025b0)
> 
>   Location is stack of main thread.
> 
>   Location is global '' at 0x ([stack]+0x1f340)
> 
>   Thread T1 (tid=3034416, running) created by main thread at:
>     #0 pthread_create
> ../../../../repos/gcc-trunk-commit/libsanitizer/tsan/tsan_interceptors_posix.cpp:1001
> (libtsan.so.2+0x62c76)
>     #1 gomp_team_start ../../../repos/gcc-trunk-commit/libgomp/team.c:858 
> (libgomp.so.1+0x1ec18)
>     #2 MAIN__ ../../libgomp/testsuite/libgomp.fortran/allocate-1.f90:328 
> (a.out+0x4024c0)
>     #3 main ../../libgomp/testsuite/libgomp.fortran/allocate-1.f90:312 
> (a.out+0x4025b0)
> 
> SUMMARY: ThreadSanitizer: data race 
> ../../libgomp/testsuite/libgomp.fortran/allocate-1.f90:116 in
> __m_MOD_foo._omp_fn.2
> 
> Tobias
@Tobias: Thanks for your comments and the patch.

@Thomas: Thanks for reporting the problem. Did you notice similar behavior with
libgomp/testsuite/libgomp.c-c++-common/allocate-1.c? It was used as base for 
fortran testcase and it
shows similar warnings with -fthread=sanitize. I am trying to figure out if the 
problem you observed
is a general one or just specific to fortran testcase.

-- 
Hafiz Abid Qadeer
Mentor, a Siemens Business


Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-01-31 Thread Hafiz Abid Qadeer
On 25/01/2022 10:32, Tobias Burnus wrote:
> On 25.01.22 10:19, Thomas Schwinge wrote:
>>> I am trying to figure out if the problem you observed
>>> is a general one or just specific to fortran testcase.
>> So, unless the '-fsanitize=thread' issues are bogus -- unlikely ;-) -- it
>> seems a latent issue generally, now fatal with
>> 'libgomp.fortran/allocate-1.f90'.
> 
> There is one known issue with libgomp and TSAN (-fsanitize=thread)
> that I tend to forget about :-(
> 
> That's according to Jakub, who wrote a while ago:
> 
> "TSAN doesn't understand what libgomp is doing, unless built with 
> --disable-linux-futex"
> 
> 
> 
> However, I now tried to disable futex and still get the following.
> (First result for libgomp.c-c++-common/allocate-1.c).
> 
> On the other hand, I have the feeling that the configure option is
> a no op for libgomp. This can also be seen in the configure.ac script,
> which only for libstdc++ uses the result and the others have a no-op
> call to 'true' (alias ':'):
> 
> libgomp/configure.ac:GCC_LINUX_FUTEX(:)
> libitm/configure.ac:GCC_LINUX_FUTEX(:)
> libstdc++-v3/configure.ac:GCC_LINUX_FUTEX([AC_DEFINE(HAVE_LINUX_FUTEX, 1, 
> [Define if futex syscall
> is available.])])
> 
> (The check is not completely pointless as some checks are still done;
> e.g. 'SYS_gettid and SYS_futex required'.)
> 
> (TSAN did find issues in libgomp in the past, however. But those
> habe been fixed.)
> 
> 
> Thus, there might or might not be an issue when TSAN reports one.
> 
>  * * *
> 
> Glancing at the Fortran testcase, I noted the following,
> which probably does not cause the problems. But still,
> I want to mention it:
> 
>   !$omp parallel private (y, v) firstprivate (x) allocate (x, y, v)
>   if (x /= 42) then
>     stop 1
>   end if
> 
>   v(1) = 7
>   if ( (and(fl, 2) /= 0) .and.  &
>    ((is_64bit_aligned(x) == 0) .or. &
>     (is_64bit_aligned(y) == 0) .or. &
>     (is_64bit_aligned(v(1)) == 0))) then
>   stop 2
>   end if
> 
> If one compares this with the C/C++ testcase, I note that there
> is a barrier before the alignment check in C/C++ but not in
> Fortran. Additionally, 'v(1) = 7' is set twice and the
> alignment check happens earlier than in C/C++. Not that that
> should really matter, but I just saw it.
> 
> 
> In C/C++:
>   int v[x], w[x];
> ...
>     v[0] = 7;
>     v[41] = 8;
> 
> In Fortran:
>   integer, dimension(x) :: v
> ...
>   v(1) = 7
>   v(41) = 8
> 
> where 'x == 42'. The Fortran version is not really wrong, but I think
> the idea is to set the first and last array element - and that's here
> v(42) and not v(41).
> 
> BTW: Fortran permits to specify a different lower bound. When converting
> C/C++ testcases, it can be useful to use the same lower bound also in
> Fortran:   integer :: v(0:x-1)  (or: 'integer, dimension(0:x-1) :: v')
> uses then 0 ... 41 for the indices instead of 1 ... 42.
> 
> But one has to be careful as Fortran uses the upper bound and C uses the
> number of elements. (Same with OpenMP array sections in Fortran vs. C.)
> 
> 
> Tobias
> 
> PS: The promised data-race warning:
> ==
> WARNING: ThreadSanitizer: data race (pid=4135381)
>   Read of size 8 at 0x7ffc0888bdc0 by thread T10:
>     #0 foo._omp_fn.2 libgomp.c-c++-common/allocate-1.c:47 (a.out+0x402c05)
>     #1 gomp_thread_start ../../../repos/gcc/libgomp/team.c:129 
> (libgomp.so.1+0x1e5ed)
> 
>   Previous write of size 8 at 0x7ffc0888bdc0 by main thread:
>     #0 foo._omp_fn.1 libgomp.c-c++-common/allocate-1.c:47 (a.out+0x402aee)
>     #1 GOMP_teams_reg ../../../repos/gcc/libgomp/teams.c:51 
> (libgomp.so.1+0x3638c)
>     #2 main libgomp.c-c++-common/allocate-1.c:366 (a.out+0x40273e)
> 
>   Location is stack of main thread.
> 
>   Location is global '' at 0x ([stack]+0x1ddc0)
> 
>   Thread T10 (tid=4135398, running) created by main thread at:
>     #0 pthread_create 
> ../../../../repos/gcc/libsanitizer/tsan/tsan_interceptors_posix.cpp:1001
> (libtsan.so.2+0x62c76)
>     #1 gomp_team_start ../../../repos/gcc/libgomp/team.c:858 
> (libgomp.so.1+0x1ec18)
>     #2 main libgomp.c-c++-common/allocate-1.c:366 (a.out+0x40273e)
> 
> SUMMARY: ThreadSanitizer: data race libgomp.c-c++-common/allocate-1.c:47 in 
> foo._omp_fn.2
> ==
> 

Problem was with the pool_size trait. It has limited size which this testcase 
exceeded. I have
removed it now which seems to fix the problem. Ok to commit the attached patch?

Thanks,
-- 
Hafiz Abid Q

Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-02-04 Thread Hafiz Abid Qadeer
On 04/02/2022 09:46, Thomas Schwinge wrote:

> 
> Abid, are you going to address these?  I think it does make sense if the
> C/C++ and Fortran test cases match as much as feasible.
> 
Sure. I will do that.

> However: really (a) remove 'omp_alloctrait (omp_atk_pool_size, 8192)'
> altogether, or instead: (b) increase its size (if that can be computed)
> -- and/or (c) limit the number of OpenMP threads executing in parallel?
> Due to unfamiliarity with all that, I don't know what's best here.
> 
C testcase also does not have the pool_size trait. So it makes sense to me to 
not have it in fortran
testcase too. It also seems more cleaner than putting some limits on number of 
threads or increasing
the size which will be a bit fragile.

Thanks,
-- 
Hafiz Abid Qadeer



Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-02-05 Thread Hafiz Abid Qadeer
On 04/02/2022 11:25, Hafiz Abid Qadeer wrote:
> On 04/02/2022 09:46, Thomas Schwinge wrote:
> 
>>
>> Abid, are you going to address these?  I think it does make sense if the
>> C/C++ and Fortran test cases match as much as feasible.
>>
> Sure. I will do that.

The attached patch address those issues apart from removing pool_size trait.

Thanks
-- 
Hafiz Abid QadeerFrom 7b4dbd5b7c853f0165436ef58339663edce802d5 Mon Sep 17 00:00:00 2001
From: Hafiz Abid Qadeer 
Date: Mon, 31 Jan 2022 19:02:14 +
Subject: [PATCH] [libgomp] Fix multiple issue in the testcase allocate-1.f90.

1. Thomas reported in
https://gcc.gnu.org/pipermail/gcc-patches/2022-January/589039.html
that this testcase is randomly failing. The problem was fixed pool
size which was exhausted when there were a lot of threads. Fixed it
by removing pool_size trait which causes default pool size to be used
which should be big enough.

2. Array indices have been changed to check the last element in the
array.

3. Remove a redundant assignment and move some code to better match
C testcase.

libgomp/ChangeLog:

	* testsuite/libgomp.fortran/allocate-1.f90: Remove pool_size
	trait.  Test last index in w and v array.  Remove redundant
	assignment to V(1).  Move alignment checks at the end of
	parallel region.
---
 .../testsuite/libgomp.fortran/allocate-1.f90  | 26 +--
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/libgomp/testsuite/libgomp.fortran/allocate-1.f90 b/libgomp/testsuite/libgomp.fortran/allocate-1.f90
index 0a31d35d5ac..1547d2baeef 100644
--- a/libgomp/testsuite/libgomp.fortran/allocate-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/allocate-1.f90
@@ -74,31 +74,30 @@ subroutine foo (x, p, q, h, fl)
   if (x /= 42) then
 stop 1
   end if
-  v(1) = 7
-  if ( (and(fl, 2) /= 0) .and.  &
-   ((is_64bit_aligned(x) == 0) .or. &
-(is_64bit_aligned(y) == 0) .or. &
-(is_64bit_aligned(v(1)) == 0))) then
-  stop 2
-  end if
 
   !$omp barrier
   y = 1;
   x = x + 1
   v(1) = 7
-  v(41) = 8
+  v(42) = 8
   !$omp barrier
   if (x /= 43 .or. y /= 1) then
 stop 3
   end if
-  if (v(1) /= 7 .or. v(41) /= 8) then
+  if (v(1) /= 7 .or. v(42) /= 8) then
 stop 4
   end if
+  if ( (and(fl, 2) /= 0) .and.&
+ ((is_64bit_aligned(x) == 0) .or. &
+  (is_64bit_aligned(y) == 0) .or. &
+  (is_64bit_aligned(v(1)) == 0))) then
+stop 2
+  end if
   !$omp end parallel
   !$omp teams
   !$omp parallel private (y) firstprivate (x, w) allocate (h: x, y, w)
 
-  if (x /= 42 .or. w(17) /= 17 .or. w(41) /= 41) then
+  if (x /= 42 .or. w(17) /= 17 .or. w(42) /= 42) then
 stop 5
   end if
   !$omp barrier
@@ -314,13 +313,12 @@ program main
   integer, dimension(4) :: p
   integer, dimension(4) :: q
 
-  type (omp_alloctrait) :: traits(3)
+  type (omp_alloctrait) :: traits(2)
   integer (omp_allocator_handle_kind) :: a
 
   traits = [omp_alloctrait (omp_atk_alignment, 64), &
-omp_alloctrait (omp_atk_fallback, omp_atv_null_fb), &
-omp_alloctrait (omp_atk_pool_size, 8192)]
-  a = omp_init_allocator (omp_default_mem_space, 3, traits)
+omp_alloctrait (omp_atk_fallback, omp_atv_null_fb)]
+  a = omp_init_allocator (omp_default_mem_space, 2, traits)
   if (a == omp_null_allocator) stop 1
 
   call omp_set_default_allocator (omp_default_mem_alloc);
-- 
2.25.1



Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-02-16 Thread Hafiz Abid Qadeer
On 05/02/2022 19:09, Hafiz Abid Qadeer wrote:
> On 04/02/2022 11:25, Hafiz Abid Qadeer wrote:
>> On 04/02/2022 09:46, Thomas Schwinge wrote:
>>
>>>
>>> Abid, are you going to address these?  I think it does make sense if the
>>> C/C++ and Fortran test cases match as much as feasible.
>>>
>> Sure. I will do that.
> 
> The attached patch address those issues apart from removing pool_size trait.

Is this change ok to commit?

Thanks,
-- 
Hafiz Abid Qadeer



[PATCH] Add a restriction on allocate clause (OpenMP 5.0)

2022-02-18 Thread Hafiz Abid Qadeer
An allocate clause in target region must specify an allocator
unless the compilation unit has requires construct with
dynamic_allocators clause.  Current implementation of the allocate
clause did not check for this restriction. This patch fills that
gap.

gcc/ChangeLog:

* omp-low.cc (omp_maybe_offloaded_ctx): New prototype.
(scan_sharing_clauses):  Check a restriction on allocate clause.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/allocate-2.c: Add tests.
* c-c++-common/gomp/allocate-8.c: New test.
* gfortran.dg/gomp/allocate-3.f90: Add tests.
* gcc.dg/gomp/pr104517.c: Update.
---
 gcc/omp-low.cc| 10 ++
 gcc/testsuite/c-c++-common/gomp/allocate-2.c  | 15 +++
 gcc/testsuite/c-c++-common/gomp/allocate-8.c  | 18 ++
 gcc/testsuite/gcc.dg/gomp/pr104517.c  | 18 ++
 gcc/testsuite/gfortran.dg/gomp/allocate-3.f90 | 14 ++
 5 files changed, 67 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/allocate-8.c

diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 77176efe715..658cb3de7d6 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -195,6 +195,7 @@ static vec task_cpyfns;
 
 static void scan_omp (gimple_seq *, omp_context *);
 static tree scan_omp_1_op (tree *, int *, void *);
+static bool omp_maybe_offloaded_ctx (omp_context *ctx);
 
 #define WALK_SUBSTMTS  \
 case GIMPLE_BIND: \
@@ -1169,6 +1170,15 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
|| !integer_onep (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c))
|| OMP_CLAUSE_ALLOCATE_ALIGN (c) != NULL_TREE))
   {
+   /* The allocate clauses that appear on a target construct or on
+  constructs in a target region must specify an allocator expression
+  unless a requires directive with the dynamic_allocators clause
+  is present in the same compilation unit.  */
+   if (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c) == NULL_TREE
+   && ((omp_requires_mask & OMP_REQUIRES_DYNAMIC_ALLOCATORS) == 0)
+   && omp_maybe_offloaded_ctx (ctx))
+ error_at (OMP_CLAUSE_LOCATION (c), "% clause must"
+   " specify an allocator here");
if (ctx->allocate_map == NULL)
  ctx->allocate_map = new hash_map;
tree val = integer_zero_node;
diff --git a/gcc/testsuite/c-c++-common/gomp/allocate-2.c 
b/gcc/testsuite/c-c++-common/gomp/allocate-2.c
index cc77efc8ffe..6bb4a8af2e7 100644
--- a/gcc/testsuite/c-c++-common/gomp/allocate-2.c
+++ b/gcc/testsuite/c-c++-common/gomp/allocate-2.c
@@ -43,3 +43,18 @@ foo (int x, int z)
   #pragma omp parallel private (x) allocate (0 : x)/* { dg-error 
"'allocate' clause allocator expression has type 'int' rather than 
'omp_allocator_handle_t'" } */
   bar (x, &x, 0);
 }
+
+void
+foo1 ()
+{
+  int a = 10;
+#pragma omp target
+  {
+#pragma omp parallel private (a) allocate(a) // { dg-error "'allocate' 
clause must specify an allocator here" }
+a = 20;
+  }
+#pragma omp target private(a) allocate(a) // { dg-error "'allocate' clause 
must specify an allocator here" }
+  {
+a = 30;
+  }
+}
diff --git a/gcc/testsuite/c-c++-common/gomp/allocate-8.c 
b/gcc/testsuite/c-c++-common/gomp/allocate-8.c
new file mode 100644
index 000..bacefafc6fc
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/allocate-8.c
@@ -0,0 +1,18 @@
+#pragma omp requires dynamic_allocators
+void
+foo ()
+{
+  int a = 10;
+#pragma omp parallel private (a) allocate(a)
+  a = 20;
+#pragma omp target
+  {
+#pragma omp parallel private (a) allocate(a)
+a = 30;
+  }
+#pragma omp target private(a) allocate(a)
+  {
+a = 40;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.dg/gomp/pr104517.c 
b/gcc/testsuite/gcc.dg/gomp/pr104517.c
index efb3175beb3..7e3bd1a1d1e 100644
--- a/gcc/testsuite/gcc.dg/gomp/pr104517.c
+++ b/gcc/testsuite/gcc.dg/gomp/pr104517.c
@@ -2,11 +2,13 @@
 /* { dg-do compile } */
 /* { dg-options "-O1 -fcompare-debug -fopenmp -fno-tree-ter -save-temps" } */
 
-enum {
-  omp_default_mem_alloc,
-  omp_large_cap_mem_alloc,
-  omp_const_mem_alloc,
-  omp_high_bw_mem_alloc
+typedef enum omp_allocator_handle_t
+{
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
 } omp_allocator_handle_t;
 
 int t, bar_nte, bar_tl, bar_i3, bar_dd;
@@ -23,7 +25,7 @@ bar (int *idp, int s, int nth, int g, int nta, int fi, int 
pp, int *q,
   int p = 0, i2 = 0, i1 = 0, m = 0, d = 0;
 
 #pragma omp target parallel for   \
-  device(p) firstprivate (f) allocate (f) in_reduction(+:r2)
+  device(p) firstprivate (f) allocate (omp_default_mem_alloc:f) 
in_reduction(+:r2)
   for (int i = 0; i < 4; i++)
 ll++;
 
@@ -31,8 +33,8 @@ bar (int *idp, int s, int nth, int g, int nta, int fi, int 
pp, int *q,
   device(d) map (m) \
   if

[PATCH] [gfortran] Set omp_requires_mask for dynamic_allocators.

2022-02-21 Thread Hafiz Abid Qadeer
This patch fixes an issue that although gfortran accepts
'requires dynamic_allocators', it does not set the omp_requires_mask
accordingly.

gcc/fortran/ChangeLog:

* parse.cc (gfc_parse_file): Set OMP_REQUIRES_DYNAMIC_ALLOCATORS
bit in omp_requires_mask.
---
 gcc/fortran/parse.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/fortran/parse.cc b/gcc/fortran/parse.cc
index db918291b10..821555bd85f 100644
--- a/gcc/fortran/parse.cc
+++ b/gcc/fortran/parse.cc
@@ -6890,6 +6890,9 @@ done:
   break;
 }
 
+  if (omp_requires & OMP_REQ_DYNAMIC_ALLOCATORS)
+omp_requires_mask
+   = (enum omp_requires) (omp_requires_mask | 
OMP_REQUIRES_DYNAMIC_ALLOCATORS);
   /* Do the parse tree dump.  */
   gfc_current_ns = flag_dump_fortran_original ? gfc_global_ns_list : NULL;
 
-- 
2.25.1



[PATCH 0/5] openmp: Handle pinned and unified shared memory.

2022-03-08 Thread Hafiz Abid Qadeer
This patch series add support for unified shared memory (USM) and pinned
memory. The support in libgomp is for nvptx offloading only.  A new
command line option -foffload-memory allows user to choose either USM
or pinned memory. The USM can also be enabled using requires construct.

When USM us in use, calls to memory allocation function like malloc are
changed to omp_alloc with appropriate allocator.  No transformations are
required for the pinned memory which is implemented using mlockall so is
only available on Linux.

Andrew Stubbs (4):
  openmp: Add -foffload-memory
  openmp: allow requires unified_shared_memory
  openmp, nvptx: ompx_unified_shared_mem_alloc
  openmp: -foffload-memory=pinned

Hafiz Abid Qadeer (1):
  openmp: Use libgomp memory allocation functions with unified shared
memory.

 gcc/c/c-parser.cc |  13 +-
 gcc/common.opt|  16 ++
 gcc/coretypes.h   |   7 +
 gcc/cp/parser.cc  |  13 +-
 gcc/doc/invoke.texi   |  16 +-
 gcc/fortran/openmp.cc |  10 +-
 gcc/omp-low.cc| 220 ++
 gcc/passes.def|   1 +
 .../c-c++-common/gomp/alloc-pinned-1.c|  28 +++
 gcc/testsuite/c-c++-common/gomp/usm-1.c   |   4 +
 gcc/testsuite/c-c++-common/gomp/usm-2.c   |  34 +++
 gcc/testsuite/c-c++-common/gomp/usm-3.c   |  32 +++
 gcc/testsuite/g++.dg/gomp/usm-1.C |  32 +++
 gcc/testsuite/g++.dg/gomp/usm-2.C |  30 +++
 gcc/testsuite/g++.dg/gomp/usm-3.C |  38 +++
 gcc/testsuite/gfortran.dg/gomp/usm-1.f90  |   6 +
 gcc/testsuite/gfortran.dg/gomp/usm-2.f90  |  16 ++
 gcc/testsuite/gfortran.dg/gomp/usm-3.f90  |  13 ++
 gcc/tree-pass.h   |   1 +
 libgomp/allocator.c   |  13 +-
 libgomp/config/linux/allocator.c  |  70 --
 libgomp/config/nvptx/allocator.c  |   6 +
 libgomp/libgomp-plugin.h  |   3 +
 libgomp/libgomp.h |   6 +
 libgomp/libgomp.map   |   5 +
 libgomp/omp.h.in  |   4 +
 libgomp/omp_lib.f90.in|   8 +
 libgomp/plugin/plugin-nvptx.c |  45 +++-
 libgomp/target.c  |  70 ++
 libgomp/testsuite/libgomp.c++/usm-1.C |  54 +
 libgomp/testsuite/libgomp.c/alloc-pinned-7.c  |  66 ++
 libgomp/testsuite/libgomp.c/usm-1.c   |  24 ++
 libgomp/testsuite/libgomp.c/usm-2.c   |  32 +++
 libgomp/testsuite/libgomp.c/usm-3.c   |  35 +++
 libgomp/testsuite/libgomp.c/usm-4.c   |  36 +++
 libgomp/testsuite/libgomp.c/usm-5.c   |  28 +++
 libgomp/testsuite/libgomp.c/usm-6.c   |  70 ++
 37 files changed, 1075 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/alloc-pinned-1.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-1.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-2.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-3.c
 create mode 100644 gcc/testsuite/g++.dg/gomp/usm-1.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/usm-2.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/usm-3.C
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-3.f90
 create mode 100644 libgomp/testsuite/libgomp.c++/usm-1.C
 create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-7.c
 create mode 100644 libgomp/testsuite/libgomp.c/usm-1.c
 create mode 100644 libgomp/testsuite/libgomp.c/usm-2.c
 create mode 100644 libgomp/testsuite/libgomp.c/usm-3.c
 create mode 100644 libgomp/testsuite/libgomp.c/usm-4.c
 create mode 100644 libgomp/testsuite/libgomp.c/usm-5.c
 create mode 100644 libgomp/testsuite/libgomp.c/usm-6.c

-- 
2.25.1



[PATCH 1/5] openmp: Add -foffload-memory

2022-03-08 Thread Hafiz Abid Qadeer
From: Andrew Stubbs 

Add a new option.  It will be used in follow-up patches.

gcc/ChangeLog:

* common.opt: Add -foffload-memory and its enum values.
* coretypes.h (enum offload_memory): New.
* doc/invoke.texi: Document -foffload-memory.
---
 gcc/common.opt  | 16 
 gcc/coretypes.h |  7 +++
 gcc/doc/invoke.texi | 16 +++-
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 8b6513de47c..17426523e23 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2182,6 +2182,22 @@ Enum(offload_abi) String(ilp32) Value(OFFLOAD_ABI_ILP32)
 EnumValue
 Enum(offload_abi) String(lp64) Value(OFFLOAD_ABI_LP64)
 
+foffload-memory=
+Common Joined RejectNegative Enum(offload_memory) Var(flag_offload_memory) 
Init(OFFLOAD_MEMORY_NONE)
+-foffload-memory=[none|unified|pinned] Use an offload memory optimization.
+
+Enum
+Name(offload_memory) Type(enum offload_memory) UnknownError(Unknown offload 
memory option %qs)
+
+EnumValue
+Enum(offload_memory) String(none) Value(OFFLOAD_MEMORY_NONE)
+
+EnumValue
+Enum(offload_memory) String(unified) Value(OFFLOAD_MEMORY_UNIFIED)
+
+EnumValue
+Enum(offload_memory) String(pinned) Value(OFFLOAD_MEMORY_PINNED)
+
 fomit-frame-pointer
 Common Var(flag_omit_frame_pointer) Optimization
 When possible do not generate stack frames.
diff --git a/gcc/coretypes.h b/gcc/coretypes.h
index 08b9ac9094c..dd52d5bb113 100644
--- a/gcc/coretypes.h
+++ b/gcc/coretypes.h
@@ -206,6 +206,13 @@ enum offload_abi {
   OFFLOAD_ABI_ILP32
 };
 
+/* Types of memory optimization for an offload device.  */
+enum offload_memory {
+  OFFLOAD_MEMORY_NONE,
+  OFFLOAD_MEMORY_UNIFIED,
+  OFFLOAD_MEMORY_PINNED
+};
+
 /* Types of profile update methods.  */
 enum profile_update {
   PROFILE_UPDATE_SINGLE,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 248ed534aee..d16019fc8c3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -202,7 +202,7 @@ in the following sections.
 -fno-builtin  -fno-builtin-@var{function}  -fcond-mismatch @gol
 -ffreestanding  -fgimple  -fgnu-tm  -fgnu89-inline  -fhosted @gol
 -flax-vector-conversions  -fms-extensions @gol
--foffload=@var{arg}  -foffload-options=@var{arg} @gol
+-foffload=@var{arg}  -foffload-options=@var{arg} -foffload-memory=@var{arg} 
@gol
 -fopenacc  -fopenacc-dim=@var{geom} @gol
 -fopenmp  -fopenmp-simd @gol
 -fpermitted-flt-eval-methods=@var{standard} @gol
@@ -2694,6 +2694,20 @@ Typical command lines are
 -foffload-options=amdgcn-amdhsa=-march=gfx906 -foffload-options=-lm
 @end smallexample
 
+@item -foffload-memory=none
+@itemx -foffload-memory=unified
+@itemx -foffload-memory=pinned
+@opindex foffload-memory
+@cindex OpenMP offloading memory modes
+Enable a memory optimization mode to use with OpenMP.  The default behavior,
+@option{-foffload-memory=none}, is to do nothing special (unless enabled via
+a requires directive in the code).  @option{-foffload-memory=unified} is
+equivalent to @code{#pragma omp requires unified_shared_memory}.
+@option{-foffload-memory=pinned} forces all host memory to be pinned (this
+mode may require the user to increase the ulimit setting for locked memory).
+All translation units must select the same setting to avoid undefined
+behavior.
+
 @item -fopenacc
 @opindex fopenacc
 @cindex OpenACC accelerator programming
-- 
2.25.1



[PATCH 2/5] openmp: allow requires unified_shared_memory

2022-03-08 Thread Hafiz Abid Qadeer
From: Andrew Stubbs 

This is the front-end portion of the Unified Shared Memory implementation.
It removes the "sorry, unimplemented message" in C, C++, and Fortran, and sets
flag_offload_memory, but is otherwise inactive, for now.

It also checks that -foffload-memory isn't set to an incompatible mode.

gcc/c/ChangeLog:

* c-parser.cc (c_parser_omp_requires): Allow "requires
  unified_share_memory".

gcc/cp/ChangeLog:

* parser.cc (cp_parser_omp_requires): Allow "requires
unified_share_memory".

gcc/fortran/ChangeLog:

* openmp.cc (gfc_match_omp_requires): Allow "requires
unified_share_memory".

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/usm-1.c: New test.
* gfortran.dg/gomp/usm-1.f90: New test.
---
 gcc/c/c-parser.cc| 13 -
 gcc/cp/parser.cc | 13 -
 gcc/fortran/openmp.cc| 10 +-
 gcc/testsuite/c-c++-common/gomp/usm-1.c  |  4 
 gcc/testsuite/gfortran.dg/gomp/usm-1.f90 |  6 ++
 5 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-1.c
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-1.f90

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 84deac04c44..dc834158d1c 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -22542,7 +22542,16 @@ c_parser_omp_requires (c_parser *parser)
  if (!strcmp (p, "unified_address"))
this_req = OMP_REQUIRES_UNIFIED_ADDRESS;
  else if (!strcmp (p, "unified_shared_memory"))
+ {
this_req = OMP_REQUIRES_UNIFIED_SHARED_MEMORY;
+
+   if (flag_offload_memory != OFFLOAD_MEMORY_UNIFIED
+   && flag_offload_memory != OFFLOAD_MEMORY_NONE)
+ error_at (cloc,
+   "unified_shared_memory is incompatible with the "
+   "selected -foffload-memory option");
+   flag_offload_memory = OFFLOAD_MEMORY_UNIFIED;
+ }
  else if (!strcmp (p, "dynamic_allocators"))
this_req = OMP_REQUIRES_DYNAMIC_ALLOCATORS;
  else if (!strcmp (p, "reverse_offload"))
@@ -22609,7 +22618,9 @@ c_parser_omp_requires (c_parser *parser)
  c_parser_skip_to_pragma_eol (parser, false);
  return;
}
- if (p && this_req != OMP_REQUIRES_DYNAMIC_ALLOCATORS)
+ if (p
+ && this_req != OMP_REQUIRES_DYNAMIC_ALLOCATORS
+ && this_req != OMP_REQUIRES_UNIFIED_SHARED_MEMORY)
sorry_at (cloc, "%qs clause on % directive not "
"supported yet", p);
  if (p)
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 03d99aba13e..ba263152aaf 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -46464,7 +46464,16 @@ cp_parser_omp_requires (cp_parser *parser, cp_token 
*pragma_tok)
  if (!strcmp (p, "unified_address"))
this_req = OMP_REQUIRES_UNIFIED_ADDRESS;
  else if (!strcmp (p, "unified_shared_memory"))
+ {
this_req = OMP_REQUIRES_UNIFIED_SHARED_MEMORY;
+
+   if (flag_offload_memory != OFFLOAD_MEMORY_UNIFIED
+   && flag_offload_memory != OFFLOAD_MEMORY_NONE)
+ error_at (cloc,
+   "unified_shared_memory is incompatible with the "
+   "selected -foffload-memory option");
+   flag_offload_memory = OFFLOAD_MEMORY_UNIFIED;
+ }
  else if (!strcmp (p, "dynamic_allocators"))
this_req = OMP_REQUIRES_DYNAMIC_ALLOCATORS;
  else if (!strcmp (p, "reverse_offload"))
@@ -46537,7 +46546,9 @@ cp_parser_omp_requires (cp_parser *parser, cp_token 
*pragma_tok)
  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
  return false;
}
- if (p && this_req != OMP_REQUIRES_DYNAMIC_ALLOCATORS)
+ if (p
+ && this_req != OMP_REQUIRES_DYNAMIC_ALLOCATORS
+ && this_req != OMP_REQUIRES_UNIFIED_SHARED_MEMORY)
sorry_at (cloc, "%qs clause on % directive not "
"supported yet", p);
  if (p)
diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index 16cd03a3d67..1f434857719 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "diagnostic.h"
 #include "gomp-constants.h"
 #include "target-memory.h"  /* For gfc_encode_character.  */
+#include "options.h"
 
 /* Match an end of OpenMP directive.  End of OpenMP directive is optional
whitespace, followed by '\n' or comment '!'.  */
@@ -5373,6 +5374,12 @@ gfc_match_omp_requires (void)
  requires_clause = OMP_REQ_UNIFIED_SHARED_MEMORY;
  if (requires_clauses & OMP_REQ_UNIFIED_SHARED_MEMORY)
goto duplicate_clause;
+
+ if (flag_offload_memory != OFFLOAD_MEMORY_UNIFIED
+ && flag_offload_memory != OFFLOAD_M

[PATCH 3/5] openmp, nvptx: ompx_unified_shared_mem_alloc

2022-03-08 Thread Hafiz Abid Qadeer
From: Andrew Stubbs 

This adds support for using Cuda Managed Memory with omp_alloc.  It will be
used as the underpinnings for "requires unified_shared_memory" in a later
patch.

There are two new predefined allocators, ompx_unified_shared_mem_alloc and
ompx_host_mem_alloc, plus corresponding memory spaces, which can be used to
allocate memory in the "managed" space and explicitly on the host (it is
intended that "malloc" will be intercepted by the compiler).

The nvptx plugin is modified to make the necessary Cuda calls, and libgomp
is modified to switch to shared-memory mode for USM allocated mappings.

libgomp/ChangeLog:

* allocator.c (omp_max_predefined_alloc): Update.
(omp_aligned_alloc): Don't fallback ompx_host_mem_alloc.
(omp_aligned_calloc): Likewise.
(omp_realloc): Likewise.
* config/linux/allocator.c (linux_memspace_alloc): Handle USM.
(linux_memspace_calloc): Handle USM.
(linux_memspace_free): Handle USM.
(linux_memspace_realloc): Handle USM.
* config/nvptx/allocator.c (nvptx_memspace_alloc): Reject
ompx_host_mem_alloc.
(nvptx_memspace_calloc): Likewise.
(nvptx_memspace_realloc): Likewise.
* libgomp-plugin.h (GOMP_OFFLOAD_usm_alloc): New prototype.
(GOMP_OFFLOAD_usm_free): New prototype.
(GOMP_OFFLOAD_is_usm_ptr): New prototype.
* libgomp.h (gomp_usm_alloc): New prototype.
(gomp_usm_free): New prototype.
(gomp_is_usm_ptr): New prototype.
(struct gomp_device_descr): Add USM functions.
* omp.h.in (omp_memspace_handle_t): Add ompx_unified_shared_mem_space
and ompx_host_mem_space.
(omp_allocator_handle_t): Add ompx_unified_shared_mem_alloc and
ompx_host_mem_alloc.
* omp_lib.f90.in: Likewise.
* plugin/plugin-nvptx.c (nvptx_alloc): Add "usm" parameter.
Call cuMemAllocManaged as appropriate.
(GOMP_OFFLOAD_alloc): Move internals to ...
(GOMP_OFFLOAD_alloc_1): ... this, and add usm parameter.
(GOMP_OFFLOAD_usm_alloc): New function.
(GOMP_OFFLOAD_usm_free): New function.
(GOMP_OFFLOAD_is_usm_ptr): New function.
* target.c (gomp_map_vars_internal): Add USM support.
(gomp_usm_alloc): New function.
(gomp_usm_free): New function.
(gomp_load_plugin_for_device): New function.
* testsuite/libgomp.c/usm-1.c: New test.
* testsuite/libgomp.c/usm-2.c: New test.
* testsuite/libgomp.c/usm-3.c: New test.
* testsuite/libgomp.c/usm-4.c: New test.
* testsuite/libgomp.c/usm-5.c: New test.
---
 libgomp/allocator.c | 13 --
 libgomp/config/linux/allocator.c| 48 
 libgomp/config/nvptx/allocator.c|  6 +++
 libgomp/libgomp-plugin.h|  3 ++
 libgomp/libgomp.h   |  6 +++
 libgomp/omp.h.in|  4 ++
 libgomp/omp_lib.f90.in  |  8 
 libgomp/plugin/plugin-nvptx.c   | 45 ---
 libgomp/target.c| 70 +
 libgomp/testsuite/libgomp.c/usm-1.c | 24 ++
 libgomp/testsuite/libgomp.c/usm-2.c | 32 +
 libgomp/testsuite/libgomp.c/usm-3.c | 35 +++
 libgomp/testsuite/libgomp.c/usm-4.c | 36 +++
 libgomp/testsuite/libgomp.c/usm-5.c | 28 
 14 files changed, 330 insertions(+), 28 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.c/usm-1.c
 create mode 100644 libgomp/testsuite/libgomp.c/usm-2.c
 create mode 100644 libgomp/testsuite/libgomp.c/usm-3.c
 create mode 100644 libgomp/testsuite/libgomp.c/usm-4.c
 create mode 100644 libgomp/testsuite/libgomp.c/usm-5.c

diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index 000ccc2dd9c..18045dbe0c4 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -32,7 +32,7 @@
 #include 
 #include 
 
-#define omp_max_predefined_alloc ompx_pinned_mem_alloc
+#define omp_max_predefined_alloc ompx_host_mem_alloc
 
 /* These macros may be overridden in config//allocator.c.  */
 #ifndef MEMSPACE_ALLOC
@@ -68,6 +68,8 @@ static const omp_memspace_handle_t predefined_alloc_mapping[] 
= {
   omp_low_lat_mem_space,   /* omp_pteam_mem_alloc. */
   omp_low_lat_mem_space,   /* omp_thread_mem_alloc. */
   omp_default_mem_space,   /* ompx_pinned_mem_alloc. */
+  ompx_unified_shared_mem_space,  /* ompx_unified_shared_mem_alloc. */
+  ompx_host_mem_space, /* ompx_host_mem_alloc.  */
 };
 
 struct omp_allocator_data
@@ -367,7 +369,8 @@ fail:
   int fallback = (allocator_data
  ? allocator_data->fallback
  : (allocator == omp_default_mem_alloc
-|| allocator == ompx_pinned_mem_alloc)
+|| allocator == ompx_pinned_mem_alloc
+|| allocator == ompx_host_mem_alloc)
  ? omp_atv_null_fb
  : omp_atv_default_mem_fb);
   switch (fallback)
@@ -597,7 +600,8 @@ fail:
   

[PATCH 4/5] openmp: Use libgomp memory allocation functions with unified shared memory.

2022-03-08 Thread Hafiz Abid Qadeer
This patches changes calls to malloc/free/calloc/realloc and operator new to
memory allocation functions in libgomp with
allocator=ompx_unified_shared_mem_alloc.  This helps existing code to benefit
from the unified shared memory.  The libgomp does the correct thing with all
the mapping constructs and there is no memory copies if the pointer is pointing
to unified shared memory.

We only replace replacable new operator and not the class member or placement 
new.

gcc/ChangeLog:

* omp-low.cc (usm_transform): New function.
(make_pass_usm_transform): Likewise.
(class pass_usm_transform): New.
* passes.def: Add pass_usm_transform.
* tree-pass.h (make_pass_usm_transform): New declaration.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/usm-2.c: New test.
* c-c++-common/gomp/usm-3.c: New test.
* g++.dg/gomp/usm-1.C: New test.
* g++.dg/gomp/usm-2.C: New test.
* g++.dg/gomp/usm-3.C: New test.
* gfortran.dg/gomp/usm-2.f90: New test.
* gfortran.dg/gomp/usm-3.f90: New test.

libgomp/ChangeLog:

* testsuite/libgomp.c/usm-6.c: New test.
* testsuite/libgomp.c++/usm-1.C: Likewise.
---
 gcc/omp-low.cc   | 152 +++
 gcc/passes.def   |   1 +
 gcc/testsuite/c-c++-common/gomp/usm-2.c  |  34 +
 gcc/testsuite/c-c++-common/gomp/usm-3.c  |  32 +
 gcc/testsuite/g++.dg/gomp/usm-1.C|  32 +
 gcc/testsuite/g++.dg/gomp/usm-2.C|  30 +
 gcc/testsuite/g++.dg/gomp/usm-3.C|  38 ++
 gcc/testsuite/gfortran.dg/gomp/usm-2.f90 |  16 +++
 gcc/testsuite/gfortran.dg/gomp/usm-3.f90 |  13 ++
 gcc/tree-pass.h  |   1 +
 libgomp/testsuite/libgomp.c++/usm-1.C|  54 
 libgomp/testsuite/libgomp.c/usm-6.c  |  70 +++
 12 files changed, 473 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-2.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-3.c
 create mode 100644 gcc/testsuite/g++.dg/gomp/usm-1.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/usm-2.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/usm-3.C
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-3.f90
 create mode 100644 libgomp/testsuite/libgomp.c++/usm-1.C
 create mode 100644 libgomp/testsuite/libgomp.c/usm-6.c

diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 5ce3a50709a..ec08d59f676 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -14849,6 +14849,158 @@ make_pass_diagnose_omp_blocks (gcc::context *ctxt)
 {
   return new pass_diagnose_omp_blocks (ctxt);
 }
+
+/* Provide transformation required for using unified shared memory
+   by replacing calls to standard memory allocation functions with
+   function provided by the libgomp.  */
+
+static tree
+usm_transform (gimple_stmt_iterator *gsi_p, bool *,
+  struct walk_stmt_info *wi)
+{
+  gimple *stmt = gsi_stmt (*gsi_p);
+  /* ompx_unified_shared_mem_alloc is 10.  */
+  const unsigned int unified_shared_mem_alloc = 10;
+
+  switch (gimple_code (stmt))
+{
+case GIMPLE_CALL:
+  {
+   gcall *gs = as_a  (stmt);
+   tree fndecl = gimple_call_fndecl (gs);
+   if (fndecl)
+ {
+   tree allocator = build_int_cst (pointer_sized_int_node,
+   unified_shared_mem_alloc);
+   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+   if ((strcmp (name, "malloc") == 0)
+|| (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+&& DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MALLOC)
+|| DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl))
+ {
+ tree omp_alloc_type
+   = build_function_type_list (ptr_type_node, size_type_node,
+   pointer_sized_int_node,
+   NULL_TREE);
+   tree repl = build_fn_decl ("omp_alloc", omp_alloc_type);
+   tree size = gimple_call_arg (gs, 0);
+   gimple *g = gimple_build_call (repl, 2, size, allocator);
+   gimple_call_set_lhs (g, gimple_call_lhs (gs));
+   gimple_set_location (g, gimple_location (stmt));
+   gsi_replace (gsi_p, g, true);
+ }
+   else if ((strcmp (name, "calloc") == 0)
+ || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CALLOC))
+ {
+   tree omp_calloc_type
+ = build_function_type_list (ptr_type_node, size_type_node,
+ size_type_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+   tree repl = build_fn_decl ("omp_calloc", omp_calloc_type);
+

[PATCH 5/5] openmp: -foffload-memory=pinned

2022-03-08 Thread Hafiz Abid Qadeer
From: Andrew Stubbs 

Implement the -foffload-memory=pinned option such that libgomp is
instructed to enable fully-pinned memory at start-up.  The option is
intended to provide a performance boost to certain offload programs without
modifying the code.

This feature only works on Linux, at present, and simply calls mlockall to
enable always-on memory pinning.  It requires that the ulimit feature is
set high enough to accommodate all the program's memory usage.

In this mode the ompx_pinned_memory_alloc feature is disabled as it is not
needed and may conflict.

gcc/ChangeLog:

* omp-low.cc (omp_enable_pinned_mode): New function.
(execute_lower_omp): Call omp_enable_pinned_mode.

libgomp/ChangeLog:

* config/linux/allocator.c (always_pinned_mode): New variable.
(GOMP_enable_pinned_mode): New function.
(linux_memspace_alloc): Disable pinning when always_pinned_mode set.
(linux_memspace_calloc): Likewise.
(linux_memspace_free): Likewise.
(linux_memspace_realloc): Likewise.
* libgomp.map (GOMP_5.1.1): New version space with
GOMP_enable_pinned_mode.
* testsuite/libgomp.c/alloc-pinned-7.c: New test.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/alloc-pinned-1.c: New test.
---
 gcc/omp-low.cc| 68 +++
 .../c-c++-common/gomp/alloc-pinned-1.c| 28 
 libgomp/config/linux/allocator.c  | 26 +++
 libgomp/libgomp.map   |  5 ++
 libgomp/testsuite/libgomp.c/alloc-pinned-7.c  | 66 ++
 5 files changed, 193 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/alloc-pinned-1.c
 create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-7.c

diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index ec08d59f676..ce21b3bd6f8 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -14441,6 +14441,70 @@ lower_omp (gimple_seq *body, omp_context *ctx)
   input_location = saved_location;
 }
 
+/* Emit a constructor function to enable -foffload-memory=pinned
+   at runtime.  Libgomp handles the OS mode setting, but we need to trigger
+   it by calling GOMP_enable_pinned mode before the program proper runs.  */
+
+static void
+omp_enable_pinned_mode ()
+{
+  static bool visited = false;
+  if (visited)
+return;
+  visited = true;
+
+  /* Create a new function like this:
+
+   static void __attribute__((constructor))
+   __set_pinned_mode ()
+   {
+GOMP_enable_pinned_mode ();
+   }
+  */
+
+  tree name = get_identifier ("__set_pinned_mode");
+  tree voidfntype = build_function_type_list (void_type_node, NULL_TREE);
+  tree decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name, voidfntype);
+
+  TREE_STATIC (decl) = 1;
+  TREE_USED (decl) = 1;
+  DECL_ARTIFICIAL (decl) = 1;
+  DECL_IGNORED_P (decl) = 0;
+  TREE_PUBLIC (decl) = 0;
+  DECL_UNINLINABLE (decl) = 1;
+  DECL_EXTERNAL (decl) = 0;
+  DECL_CONTEXT (decl) = NULL_TREE;
+  DECL_INITIAL (decl) = make_node (BLOCK);
+  BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
+  DECL_STATIC_CONSTRUCTOR (decl) = 1;
+  DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("constructor"),
+ NULL_TREE, NULL_TREE);
+
+  tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE,
+  void_type_node);
+  DECL_ARTIFICIAL (t) = 1;
+  DECL_IGNORED_P (t) = 1;
+  DECL_CONTEXT (t) = decl;
+  DECL_RESULT (decl) = t;
+
+  push_struct_function (decl);
+  init_tree_ssa (cfun);
+
+  tree callname = get_identifier ("GOMP_enable_pinned_mode");
+  tree calldecl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, callname,
+ voidfntype);
+  gcall *call = gimple_build_call (calldecl, 0);
+
+  gimple_seq seq = NULL;
+  gimple_seq_add_stmt (&seq, call);
+  gimple_set_body (decl, gimple_build_bind (NULL_TREE, seq, NULL));
+
+  cfun->function_end_locus = UNKNOWN_LOCATION;
+  cfun->curr_properties |= PROP_gimple_any;
+  pop_cfun ();
+  cgraph_node::add_new_function (decl, true);
+}
+
 /* Main entry point.  */
 
 static unsigned int
@@ -14497,6 +14561,10 @@ execute_lower_omp (void)
   for (auto task_stmt : task_cpyfns)
 finalize_task_copyfn (task_stmt);
   task_cpyfns.release ();
+
+  if (flag_offload_memory == OFFLOAD_MEMORY_PINNED)
+omp_enable_pinned_mode ();
+
   return 0;
 }
 
diff --git a/gcc/testsuite/c-c++-common/gomp/alloc-pinned-1.c 
b/gcc/testsuite/c-c++-common/gomp/alloc-pinned-1.c
new file mode 100644
index 000..e0e08019bff
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/alloc-pinned-1.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-additional-options "-foffload-memory=pinned" } */
+/* { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu 
} } */
+
+#if __cplusplus
+#define EXTERNC extern "C"
+#else
+#define EXTERNC
+#endif
+
+/* Intercept the libgomp initialization call to check it happens.  */
+
+int good = 0;
+
+EXTERNC void
+GOMP_enable_pinned_m