Hi Andrew,

On 11.12.23 18:04, Andrew Stubbs wrote:
This creates a new predefined allocator as a shortcut for using pinned
memory with OpenMP.  The name uses the OpenMP extension space and is
intended to be consistent with other OpenMP implementations currently in
development.

Discussed this with Jakub - and 9 does not permit to have a contiguous
range of numbers if OpenMP ever extends this,

Thus, maybe start the ompx_ with 100.

We were also pondering whether it should be ompx_gnu_pinned_mem_alloc or
ompx_pinned_mem_alloc.

The only other compiler supporting this flag seems to be IBM; their
compiler uses ompx_pinned_mem_alloc with the same meaning:
https://www.ibm.com/support/pages/system/files/inline-files/OMP5_User_Reference.pdf
(page 5)

As the obvious meaning is what both compilers have, I am fine without
the "gnu" infix, which Jakub accepted.

* * *

And you have not updated the compiler itself to support more this new
allocator. Cf.

https://github.com/gcc-mirror/gcc/blob/master/gcc/testsuite/c-c++-common/gomp/allocate-9.c#L23-L28

That file gives an overview what needs to be changed:

* The check functions mentioned there (seemingly for two ranges now)

* Update the OMP_ALLOCATOR env var parser in env.c

* That linked testcase (and possibly some some more) should be updated,
also to ensure that the new allocator is accepted + to check for new
unsupported values (99, 101 ?)

If we now leave gaps, the const_assert in libgomp/allocator.c probably
needs to be updated as well.

* * *

Glancing through the patches, for test cases, I think you should
'abort()' in CHECK_SIZE if it fails (rlimit issue or not supported
system). Or do you think that the results are still could make sense
when continuing and possibly failing later?

Tobias

The allocator is equivalent to using a custom allocator with the pinned
trait and the null fallback trait.

libgomp/ChangeLog:

      * allocator.c (omp_max_predefined_alloc): Update.
      (predefined_alloc_mapping): Add ompx_pinned_mem_alloc entry.
      (omp_aligned_alloc): Support ompx_pinned_mem_alloc.
      (omp_free): Likewise.
      (omp_aligned_calloc): Likewise.
      (omp_realloc): Likewise.
      * libgomp.texi: Document ompx_pinned_mem_alloc.
      * omp.h.in (omp_allocator_handle_t): Add ompx_pinned_mem_alloc.
      * omp_lib.f90.in: Add ompx_pinned_mem_alloc.
      * testsuite/libgomp.c/alloc-pinned-5.c: New test.
      * testsuite/libgomp.c/alloc-pinned-6.c: New test.
      * testsuite/libgomp.fortran/alloc-pinned-1.f90: New test.

Co-Authored-By: Thomas Schwinge<tho...@codesourcery.com>
---
  libgomp/allocator.c                           |  58 ++++++----
  libgomp/libgomp.texi                          |   7 +-
  libgomp/omp.h.in                              |   1 +
  libgomp/omp_lib.f90.in                        |   2 +
  libgomp/testsuite/libgomp.c/alloc-pinned-5.c  | 103 ++++++++++++++++++
  libgomp/testsuite/libgomp.c/alloc-pinned-6.c  | 101 +++++++++++++++++
  .../libgomp.fortran/alloc-pinned-1.f90        |  16 +++
  7 files changed, 268 insertions(+), 20 deletions(-)
  create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-5.c
  create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-6.c
  create mode 100644 libgomp/testsuite/libgomp.fortran/alloc-pinned-1.f90


v3-0002-libgomp-openmp-Add-ompx_pinned_mem_alloc.patch

diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index 666adf9a3a9..6c69c4f008f 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -35,7 +35,7 @@
  #include <dlfcn.h>
  #endif

-#define omp_max_predefined_alloc omp_thread_mem_alloc
+#define omp_max_predefined_alloc ompx_pinned_mem_alloc

  /* These macros may be overridden in config/<target>/allocator.c.
     The defaults (no override) are to return NULL for pinned memory requests
@@ -78,6 +78,7 @@ static const omp_memspace_handle_t predefined_alloc_mapping[] 
= {
    omp_low_lat_mem_space,   /* omp_cgroup_mem_alloc (implementation defined). 
*/
    omp_low_lat_mem_space,   /* omp_pteam_mem_alloc (implementation defined). */
    omp_low_lat_mem_space,   /* omp_thread_mem_alloc (implementation defined). 
*/
+  omp_default_mem_space,   /* ompx_pinned_mem_alloc. */
  };

  #define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
@@ -623,8 +624,10 @@ retry:
        memspace = (allocator_data
                    ? allocator_data->memspace
                    : predefined_alloc_mapping[allocator]);
-       ptr = MEMSPACE_ALLOC (memspace, new_size,
-                             allocator_data && allocator_data->pinned);
+       int pinned = (allocator_data
+                     ? allocator_data->pinned
+                     : allocator == ompx_pinned_mem_alloc);
+       ptr = MEMSPACE_ALLOC (memspace, new_size, pinned);
      }
        if (ptr == NULL)
      goto fail;
@@ -645,7 +648,8 @@ retry:
  fail:;
    int fallback = (allocator_data
                ? allocator_data->fallback
-               : allocator == omp_default_mem_alloc
+               : (allocator == omp_default_mem_alloc
+                  || allocator == ompx_pinned_mem_alloc)
                ? omp_atv_null_fb
                : omp_atv_default_mem_fb);
    switch (fallback)
@@ -760,6 +764,7 @@ omp_free (void *ptr, omp_allocator_handle_t allocator)
  #endif

        memspace = predefined_alloc_mapping[data->allocator];
+      pinned = (data->allocator == ompx_pinned_mem_alloc);
      }

    MEMSPACE_FREE (memspace, data->ptr, data->size, pinned);
@@ -933,8 +938,10 @@ retry:
        memspace = (allocator_data
                    ? allocator_data->memspace
                    : predefined_alloc_mapping[allocator]);
-       ptr = MEMSPACE_CALLOC (memspace, new_size,
-                              allocator_data && allocator_data->pinned);
+       int pinned = (allocator_data
+                     ? allocator_data->pinned
+                     : allocator == ompx_pinned_mem_alloc);
+       ptr = MEMSPACE_CALLOC (memspace, new_size, pinned);
      }
        if (ptr == NULL)
      goto fail;
@@ -955,7 +962,8 @@ retry:
  fail:;
    int fallback = (allocator_data
                ? allocator_data->fallback
-               : allocator == omp_default_mem_alloc
+               : (allocator == omp_default_mem_alloc
+                  || allocator == ompx_pinned_mem_alloc)
                ? omp_atv_null_fb
                : omp_atv_default_mem_fb);
    switch (fallback)
@@ -1165,11 +1173,14 @@ retry:
        else
  #endif
        if (prev_size)
-     new_ptr = MEMSPACE_REALLOC (allocator_data->memspace, data->ptr,
-                                 data->size, new_size,
-                                 (free_allocator_data
-                                  && free_allocator_data->pinned),
-                                 allocator_data->pinned);
+     {
+       int was_pinned = (free_allocator_data
+                         ? free_allocator_data->pinned
+                         : free_allocator == ompx_pinned_mem_alloc);
+       new_ptr = MEMSPACE_REALLOC (allocator_data->memspace, data->ptr,
+                                   data->size, new_size, was_pinned,
+                                   allocator_data->pinned);
+     }
        else
      new_ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size,
                                allocator_data->pinned);
@@ -1225,10 +1236,14 @@ retry:
        memspace = (allocator_data
                    ? allocator_data->memspace
                    : predefined_alloc_mapping[allocator]);
+       int was_pinned = (free_allocator_data
+                         ? free_allocator_data->pinned
+                         : free_allocator == ompx_pinned_mem_alloc);
+       int pinned = (allocator_data
+                     ? allocator_data->pinned
+                     : allocator == ompx_pinned_mem_alloc);
        new_ptr = MEMSPACE_REALLOC (memspace, data->ptr, data->size, new_size,
-                                   (free_allocator_data
-                                    && free_allocator_data->pinned),
-                                   allocator_data && allocator_data->pinned);
+                                   was_pinned, pinned);
      }
        if (new_ptr == NULL)
      goto fail;
@@ -1262,8 +1277,10 @@ retry:
        memspace = (allocator_data
                    ? allocator_data->memspace
                    : predefined_alloc_mapping[allocator]);
-       new_ptr = MEMSPACE_ALLOC (memspace, new_size,
-                                 allocator_data && allocator_data->pinned);
+       int pinned = (allocator_data
+                     ? allocator_data->pinned
+                     : allocator == ompx_pinned_mem_alloc);
+       new_ptr = MEMSPACE_ALLOC (memspace, new_size, pinned);
      }
        if (new_ptr == NULL)
      goto fail;
@@ -1318,7 +1335,9 @@ retry:
      was_memspace = (free_allocator_data
                  ? free_allocator_data->memspace
                  : predefined_alloc_mapping[free_allocator]);
-    int was_pinned = (free_allocator_data && free_allocator_data->pinned);
+    int was_pinned = (free_allocator_data
+                   ? free_allocator_data->pinned
+                   : free_allocator == ompx_pinned_mem_alloc);
      MEMSPACE_FREE (was_memspace, data->ptr, data->size, was_pinned);
    }
    return ret;
@@ -1326,7 +1345,8 @@ retry:
  fail:;
    int fallback = (allocator_data
                ? allocator_data->fallback
-               : allocator == omp_default_mem_alloc
+               : (allocator == omp_default_mem_alloc
+                  || allocator == ompx_pinned_mem_alloc)
                ? omp_atv_null_fb
                : omp_atv_default_mem_fb);
    switch (fallback)
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 5838311b7d8..1f73c49c9b8 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -3015,6 +3015,7 @@ value.
  @item omp_cgroup_mem_alloc      @tab omp_low_lat_mem_space (implementation 
defined)
  @item omp_pteam_mem_alloc       @tab omp_low_lat_mem_space (implementation 
defined)
  @item omp_thread_mem_alloc      @tab omp_low_lat_mem_space (implementation 
defined)
+@item ompx_pinned_mem_alloc     @tab omp_default_mem_space (GNU extension)
  @end multitable

  The predefined allocators use the default values for the traits,
@@ -3040,7 +3041,7 @@ as listed below.  Except that the last three allocators 
have the
  @item @code{fb_data}   @tab @emph{unsupported as it needs an allocator handle}
                         @tab (none)
  @item @code{pinned}    @tab @code{true}, @code{false}
-                       @tab @code{false}
+                       @tab See below
  @item @code{partition} @tab @code{environment}, @code{nearest},
                              @code{blocked}, @code{interleaved}
                         @tab @code{environment}
@@ -3051,6 +3052,10 @@ For the @code{fallback} trait, the default value is 
@code{null_fb} for the
  with device memory; for all other other allocators, it is 
@code{default_mem_fb}
  by default.

+For the @code{pinned} trait, the default value is @code{true} for
+predefined allocator @code{ompx_pinned_mem_alloc} (a GNU extension), and
+@code{false} for all others.
+
  Examples:
  @smallexample
  OMP_ALLOCATOR=omp_high_bw_mem_alloc
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index bd1286c2a3f..66989e693b7 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -134,6 +134,7 @@ typedef enum omp_allocator_handle_t __GOMP_UINTPTR_T_ENUM
    omp_cgroup_mem_alloc = 6,
    omp_pteam_mem_alloc = 7,
    omp_thread_mem_alloc = 8,
+  ompx_pinned_mem_alloc = 9,
    __omp_allocator_handle_t_max__ = __UINTPTR_MAX__
  } omp_allocator_handle_t;

diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index 755c0bf16ec..e27e4aacbdf 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -158,6 +158,8 @@
                   parameter :: omp_pteam_mem_alloc = 7
          integer (kind=omp_allocator_handle_kind), &
                   parameter :: omp_thread_mem_alloc = 8
+        integer (kind=omp_allocator_handle_kind), &
+                 parameter :: ompx_pinned_mem_alloc = 9
          integer (omp_memspace_handle_kind), &
                   parameter :: omp_default_mem_space = 0
          integer (omp_memspace_handle_kind), &
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-5.c 
b/libgomp/testsuite/libgomp.c/alloc-pinned-5.c
new file mode 100644
index 00000000000..18e6d20ca5b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-5.c
@@ -0,0 +1,103 @@
+/* { dg-do run } */
+
+/* { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu 
} } */
+
+/* Test that ompx_pinned_mem_alloc works.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/resource.h>
+
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#define CHECK_SIZE(SIZE) { \
+  struct rlimit limit; \
+  if (getrlimit (RLIMIT_MEMLOCK, &limit) \
+      || limit.rlim_cur <= SIZE) \
+    fprintf (stderr, "unsufficient lockable memory; please increase 
ulimit\n"); \
+  }
+
+int
+get_pinned_mem ()
+{
+  int pid = getpid ();
+  char buf[100];
+  sprintf (buf, "/proc/%d/status", pid);
+
+  FILE *proc = fopen (buf, "r");
+  if (!proc)
+    abort ();
+  while (fgets (buf, 100, proc))
+    {
+      int val;
+      if (sscanf (buf, "VmLck: %d", &val))
+     {
+       fclose (proc);
+       return val;
+     }
+    }
+  abort ();
+}
+#else
+#define PAGE_SIZE 10000 * 1024 /* unknown */
+#define CHECK_SIZE(SIZE) fprintf (stderr, "OS unsupported\n");
+
+int
+get_pinned_mem ()
+{
+  return 0;
+}
+#endif
+
+static void
+verify0 (char *p, size_t s)
+{
+  for (size_t i = 0; i < s; ++i)
+    if (p[i] != 0)
+      abort ();
+}
+
+#include <omp.h>
+
+int
+main ()
+{
+  /* Allocate at least a page each time, allowing space for overhead,
+     but stay within the ulimit.  */
+  const int SIZE = PAGE_SIZE - 128;
+  CHECK_SIZE (SIZE * 5);
+
+  // Sanity check
+  if (get_pinned_mem () != 0)
+    abort ();
+
+  void *p = omp_alloc (SIZE, ompx_pinned_mem_alloc);
+  if (!p)
+    abort ();
+
+  int amount = get_pinned_mem ();
+  if (amount == 0)
+    abort ();
+
+  p = omp_realloc (p, SIZE * 2, ompx_pinned_mem_alloc, ompx_pinned_mem_alloc);
+
+  int amount2 = get_pinned_mem ();
+  if (amount2 <= amount)
+    abort ();
+
+  /* SIZE*2 ensures that it doesn't slot into the space possibly
+     vacated by realloc.  */
+  p = omp_calloc (1, SIZE * 2, ompx_pinned_mem_alloc);
+
+  if (get_pinned_mem () <= amount2)
+    abort ();
+
+  verify0 (p, SIZE * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-6.c 
b/libgomp/testsuite/libgomp.c/alloc-pinned-6.c
new file mode 100644
index 00000000000..f80a0264f97
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-6.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+
+/* Test that ompx_pinned_mem_alloc fails correctly.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/resource.h>
+
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+
+int
+get_pinned_mem ()
+{
+  int pid = getpid ();
+  char buf[100];
+  sprintf (buf, "/proc/%d/status", pid);
+
+  FILE *proc = fopen (buf, "r");
+  if (!proc)
+    abort ();
+  while (fgets (buf, 100, proc))
+    {
+      int val;
+      if (sscanf (buf, "VmLck: %d", &val))
+     {
+       fclose (proc);
+       return val;
+     }
+    }
+  abort ();
+}
+
+void
+set_pin_limit (int size)
+{
+  struct rlimit limit;
+  if (getrlimit (RLIMIT_MEMLOCK, &limit))
+    abort ();
+  limit.rlim_cur = (limit.rlim_max < size ? limit.rlim_max : size);
+  if (setrlimit (RLIMIT_MEMLOCK, &limit))
+    abort ();
+}
+#else
+#define PAGE_SIZE 10000 * 1024 /* unknown */
+
+int
+get_pinned_mem ()
+{
+  return 0;
+}
+
+void
+set_pin_limit ()
+{
+}
+#endif
+
+#include <omp.h>
+
+int
+main ()
+{
+  /* Allocate at least a page each time, but stay within the ulimit.  */
+  const int SIZE = PAGE_SIZE * 4;
+
+  /* Ensure that the limit is smaller than the allocation.  */
+  set_pin_limit (SIZE / 2);
+
+  // Sanity check
+  if (get_pinned_mem () != 0)
+    abort ();
+
+  // Should fail
+  void *p = omp_alloc (SIZE, ompx_pinned_mem_alloc);
+  if (p)
+    abort ();
+
+  // Should fail
+  p = omp_calloc (1, SIZE, ompx_pinned_mem_alloc);
+  if (p)
+    abort ();
+
+  // Should fail to realloc
+  void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
+  p = omp_realloc (notpinned, SIZE, ompx_pinned_mem_alloc, 
omp_default_mem_alloc);
+  if (!notpinned || p)
+    abort ();
+
+  // No memory should have been pinned
+  int amount = get_pinned_mem ();
+  if (amount != 0)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/alloc-pinned-1.f90 
b/libgomp/testsuite/libgomp.fortran/alloc-pinned-1.f90
new file mode 100644
index 00000000000..798dc3d5a12
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/alloc-pinned-1.f90
@@ -0,0 +1,16 @@
+! Ensure that the ompx_pinned_mem_alloc predefined allocator is present and
+! accepted.  The majority of the functionality testing lives in the C tests.
+!
+! { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu } 
}
+
+program main
+  use omp_lib
+  use ISO_C_Binding
+  implicit none (external, type)
+
+  type(c_ptr) :: p
+
+  p = omp_alloc (10_c_size_t, ompx_pinned_mem_alloc);
+  if (.not. c_associated (p)) stop 1
+  call omp_free (p, ompx_pinned_mem_alloc);
+end program main
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Reply via email to