This patch implements the OpenMP pinned memory trait for Linux hosts. On
other hosts and on devices the trait becomes a no-op (instead of being
rejected).
The memory is locked via the mlock syscall, which is both the "correct"
way to do it on Linux, and a problem because the default ulimit for
pinned memory is very small (and most users don't have permission to
increase it (much?)). Therefore the code emits a non-fatal warning
message if locking fails.
Another approach might be to use cudaHostAlloc to allocate the memory in
the first place, which bypasses the ulimit somehow, but this would not
help non-NVidia users.
The tests work on Linux and will xfail on other hosts; neither libgomp
nor the test knows how to allocate or query pinned memory elsewhere.
The patch applies on top of the text of my previously submitted patches,
but does not actually depend on the functionality of those patches.
OK for stage 1?
I'll commit a backport to OG11 shortly.
Andrew
libgomp: pinned memory
Implement the OpenMP pinned memory trait on Linux hosts using the mlock
syscall.
libgomp/ChangeLog:
* allocator.c (MEMSPACE_PIN): New macro.
(xmlock): New function.
(omp_init_allocator): Don't disallow the pinned trait.
(omp_aligned_alloc): Add pinning via MEMSPACE_PIN.
(omp_aligned_calloc): Likewise.
(omp_realloc): Likewise.
* testsuite/libgomp.c/alloc-pinned-1.c: New test.
* testsuite/libgomp.c/alloc-pinned-2.c: New test.
diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index b1f5fe0a5e2..671b91e7ff8 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -51,6 +51,25 @@
#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE) \
((void)MEMSPACE, (void)SIZE, free (ADDR))
#endif
+#ifndef MEMSPACE_PIN
+/* Only define this on supported host platforms. */
+#ifdef __linux__
+#define MEMSPACE_PIN(MEMSPACE, ADDR, SIZE) \
+ ((void)MEMSPACE, xmlock (ADDR, SIZE))
+
+#include <sys/mman.h>
+#include <stdio.h>
+void
+xmlock (void *addr, size_t size)
+{
+ if (mlock (addr, size))
+ perror ("libgomp: failed to pin memory (ulimit too low?)");
+}
+#else
+#define MEMSPACE_PIN(MEMSPACE, ADDR, SIZE) \
+ ((void)MEMSPACE, (void)ADDR, (void)SIZE)
+#endif
+#endif
/* Map the predefined allocators to the correct memory space.
The index to this table is the omp_allocator_handle_t enum value. */
@@ -212,7 +231,7 @@ omp_init_allocator (omp_memspace_handle_t memspace, int
ntraits,
data.alignment = sizeof (void *);
/* No support for these so far (for hbw will use memkind). */
- if (data.pinned || data.memspace == omp_high_bw_mem_space)
+ if (data.memspace == omp_high_bw_mem_space)
return omp_null_allocator;
ret = gomp_malloc (sizeof (struct omp_allocator_data));
@@ -326,6 +345,9 @@ retry:
#endif
goto fail;
}
+
+ if (allocator_data->pinned)
+ MEMSPACE_PIN (allocator_data->memspace, ptr, new_size);
}
else
{
@@ -335,6 +357,9 @@ retry:
ptr = MEMSPACE_ALLOC (memspace, new_size);
if (ptr == NULL)
goto fail;
+
+ if (allocator_data && allocator_data->pinned)
+ MEMSPACE_PIN (allocator_data->memspace, ptr, new_size);
}
if (new_alignment > sizeof (void *))
@@ -539,6 +564,9 @@ retry:
#endif
goto fail;
}
+
+ if (allocator_data->pinned)
+ MEMSPACE_PIN (allocator_data->memspace, ptr, new_size);
}
else
{
@@ -548,6 +576,9 @@ retry:
ptr = MEMSPACE_CALLOC (memspace, new_size);
if (ptr == NULL)
goto fail;
+
+ if (allocator_data && allocator_data->pinned)
+ MEMSPACE_PIN (allocator_data->memspace, ptr, new_size);
}
if (new_alignment > sizeof (void *))
@@ -727,7 +758,11 @@ retry:
#endif
goto fail;
}
- else if (prev_size)
+
+ if (allocator_data->pinned)
+ MEMSPACE_PIN (allocator_data->memspace, new_ptr, new_size);
+
+ if (prev_size)
{
ret = (char *) new_ptr + sizeof (struct omp_mem_header);
((struct omp_mem_header *) ret)[-1].ptr = new_ptr;
@@ -747,6 +782,10 @@ retry:
new_ptr = MEMSPACE_REALLOC (memspace, data->ptr, data->size, new_size);
if (new_ptr == NULL)
goto fail;
+
+ if (allocator_data && allocator_data->pinned)
+ MEMSPACE_PIN (allocator_data->memspace, ptr, new_size);
+
ret = (char *) new_ptr + sizeof (struct omp_mem_header);
((struct omp_mem_header *) ret)[-1].ptr = new_ptr;
((struct omp_mem_header *) ret)[-1].size = new_size;
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
new file mode 100644
index 00000000000..0a6360cda29
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
@@ -0,0 +1,81 @@
+/* { dg-do run } */
+
+/* { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu
} } */
+
+/* Test that pinned memory works. */
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/mman.h>
+
+int
+get_pinned_mem ()
+{
+ int pid = getpid ();
+ char buf[100];
+ sprintf (buf, "/proc/%d/status", pid);
+
+ FILE *proc = fopen (buf, "r");
+ if (!proc)
+ abort ();
+ while (fgets (buf, 100, proc))
+ {
+ int val;
+ if (sscanf (buf, "VmLck: %d", &val))
+ {
+ fclose (proc);
+ return val;
+ }
+ }
+ abort ();
+}
+#else
+int
+get_pinned_mem ()
+{
+ return 0;
+}
+#endif
+
+#include <omp.h>
+
+/* Allocate more than a page each time, but stay within the ulimit. */
+#define SIZE 10*1024
+
+int
+main ()
+{
+ const omp_alloctrait_t traits[] = {
+ { omp_atk_pinned, 1 }
+ };
+ omp_allocator_handle_t allocator = omp_init_allocator
(omp_default_mem_space, 1, traits);
+
+ // Sanity check
+ if (get_pinned_mem () != 0)
+ abort ();
+
+ void *p = omp_alloc (SIZE, allocator);
+ if (!p)
+ abort ();
+
+ int amount = get_pinned_mem ();
+ if (amount == 0)
+ abort ();
+
+ p = omp_realloc (p, SIZE*2, allocator, allocator);
+
+ int amount2 = get_pinned_mem ();
+ if (amount2 <= amount)
+ abort ();
+
+ p = omp_calloc (1, SIZE, allocator);
+
+ if (get_pinned_mem () <= amount2)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
new file mode 100644
index 00000000000..8fdb4ff5cfd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
@@ -0,0 +1,87 @@
+/* { dg-do run } */
+
+/* { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu
} } */
+
+/* Test that pinned memory works (pool_size code path). */
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/mman.h>
+
+int
+get_pinned_mem ()
+{
+ int pid = getpid ();
+ char buf[100];
+ sprintf (buf, "/proc/%d/status", pid);
+
+ FILE *proc = fopen (buf, "r");
+ if (!proc)
+ abort ();
+ while (fgets (buf, 100, proc))
+ {
+ int val;
+ if (sscanf (buf, "VmLck: %d", &val))
+ {
+ fclose (proc);
+ return val;
+ }
+ }
+ abort ();
+}
+#else
+int
+get_pinned_mem ()
+{
+ return 0;
+}
+#endif
+
+#include <omp.h>
+
+/* Allocate more than a page each time, but stay within the ulimit. */
+#define SIZE 10*1024
+
+int
+main ()
+{
+ const omp_alloctrait_t traits[] = {
+ { omp_atk_pinned, 1 },
+ { omp_atk_pool_size, SIZE*8 }
+ };
+ omp_allocator_handle_t allocator = omp_init_allocator (omp_default_mem_space,
+ 2, traits);
+
+ // Sanity check
+ if (get_pinned_mem () != 0)
+ abort ();
+
+ void *p = omp_alloc (SIZE, allocator);
+ if (!p)
+ abort ();
+
+ int amount = get_pinned_mem ();
+ if (amount == 0)
+ abort ();
+
+ p = omp_realloc (p, SIZE*2, allocator, allocator);
+ if (!p)
+ abort ();
+
+ int amount2 = get_pinned_mem ();
+ if (amount2 <= amount)
+ abort ();
+
+ p = omp_calloc (1, SIZE, allocator);
+ if (!p)
+ abort ();
+
+ if (get_pinned_mem () <= amount2)
+ abort ();
+
+ return 0;
+}