[PATCH 1/4] oslib: introduce new qemu_prealloc_mem_with_timeout() api

Daniil Tatianin Fri, 20 Jan 2023 05:57:01 -0800

This helper allows limiting the maximum amount time to be spent
preallocating a block of memory, which is important on systems that
might have unpredictable page allocation delays because of possible
fragmentation or other reasons specific to the backend.


It also exposes a way to register a callback that is invoked in case the
specified timeout is exceeded. The callback is provided with a
PreallocStats structure that includes a bunch of statistics about the
progress including total & allocated number of pages, as well as page
size and number of allocation threads.

The win32 implementation is currently a stub that just calls into the
old qemu_prealloc_mem api.

Signed-off-by: Daniil Tatianin <d-tatia...@yandex-team.ru>
---
 include/qemu/osdep.h |  19 ++++++++
 util/oslib-posix.c   | 114 +++++++++++++++++++++++++++++++++++++++----
 util/oslib-win32.c   |   9 ++++
 3 files changed, 133 insertions(+), 9 deletions(-)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index bd23a08595..21757e5144 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -595,6 +595,25 @@ typedef struct ThreadContext ThreadContext;
 void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
                        ThreadContext *tc, Error **errp);
 
+typedef struct PreallocStats {
+    size_t page_size;
+    size_t total_pages;
+    size_t allocated_pages;
+    int threads;
+    time_t seconds_elapsed;
+} PreallocStats;
+
+typedef struct PreallocTimeout {
+    time_t seconds;
+    void *user;
+    void (*on_timeout)(void *user, const PreallocStats *stats);
+} PreallocTimeout;
+
+void qemu_prealloc_mem_with_timeout(int fd, char *area, size_t sz,
+                                    int max_threads, ThreadContext *tc,
+                                    const PreallocTimeout *timeout,
+                                    Error **errp);
+
 /**
  * qemu_get_pid_name:
  * @pid: pid of a process
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 59a891b6a8..570fca601f 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -74,6 +74,7 @@ typedef struct MemsetContext {
     bool any_thread_failed;
     struct MemsetThread *threads;
     int num_threads;
+    PreallocStats stats;
 } MemsetContext;
 
 struct MemsetThread {
@@ -83,6 +84,7 @@ struct MemsetThread {
     QemuThread pgthread;
     sigjmp_buf env;
     MemsetContext *context;
+    size_t touched_pages;
 };
 typedef struct MemsetThread MemsetThread;
 
@@ -373,6 +375,7 @@ static void *do_touch_pages(void *arg)
              */
             *(volatile char *)addr = *addr;
             addr += hpagesize;
+            qatomic_inc(&memset_args->touched_pages);
         }
     }
     pthread_sigmask(SIG_SETMASK, &oldset, NULL);
@@ -396,6 +399,11 @@ static void *do_madv_populate_write_pages(void *arg)
     if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) {
         ret = -errno;
     }
+
+    if (!ret) {
+        qatomic_set(&memset_args->touched_pages, memset_args->numpages);
+    }
+
     return (void *)(uintptr_t)ret;
 }
 
@@ -418,8 +426,68 @@ static inline int get_memset_num_threads(size_t hpagesize, 
size_t numpages,
     return ret;
 }
 
+static int do_join_memset_threads_with_timeout(MemsetContext *context,
+                                               time_t seconds)
+{
+    struct timespec ts;
+    int i = 0;
+
+    if (clock_gettime(CLOCK_REALTIME, &ts) < 0) {
+        return i;
+    }
+    ts.tv_sec += seconds;
+
+    for (; i < context->num_threads; ++i) {
+        if (pthread_timedjoin_np(context->threads[i].pgthread.thread,
+                                 NULL, &ts)) {
+            break;
+        }
+    }
+
+    return i;
+}
+
+static void memset_stats_count_pages(MemsetContext *context)
+{
+    int i;
+
+    for (i = 0; i < context->num_threads; ++i) {
+        size_t pages = qatomic_load_acquire(
+                            &context->threads[i].touched_pages);
+        context->stats.allocated_pages += pages;
+    }
+}
+
+static int timed_join_memset_threads(MemsetContext *context,
+                                     const PreallocTimeout *timeout)
+{
+    int i, off;
+    PreallocStats *stats = &context->stats;
+    off = do_join_memset_threads_with_timeout(context, timeout->seconds);
+
+    if (off != context->num_threads && timeout->on_timeout) {
+        memset_stats_count_pages(context);
+
+        /*
+         * Guard against possible races if preallocation finishes right
+         * after the timeout is exceeded.
+         */
+        if (stats->allocated_pages < stats->total_pages) {
+            stats->seconds_elapsed = timeout->seconds;
+            timeout->on_timeout(timeout->user, stats);
+        }
+    }
+
+    for (i = off; i < context->num_threads; ++i) {
+        pthread_cancel(context->threads[i].pgthread.thread);
+    }
+
+    return off;
+}
+
 static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
                            int max_threads, ThreadContext *tc,
+                           const PreallocTimeout *timeout,
                            bool use_madv_populate_write)
 {
     static gsize initialized = 0;
@@ -452,6 +520,9 @@ static int touch_all_pages(char *area, size_t hpagesize, 
size_t numpages,
     }
 
     context.threads = g_new0(MemsetThread, context.num_threads);
+    context.stats.page_size = hpagesize;
+    context.stats.total_pages = numpages;
+    context.stats.threads = context.num_threads;
     numpages_per_thread = numpages / context.num_threads;
     leftover = numpages % context.num_threads;
     for (i = 0; i < context.num_threads; i++) {
@@ -481,11 +552,20 @@ static int touch_all_pages(char *area, size_t hpagesize, 
size_t numpages,
     qemu_cond_broadcast(&page_cond);
     qemu_mutex_unlock(&page_mutex);
 
-    for (i = 0; i < context.num_threads; i++) {
-        int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread);
+    if (timeout) {
+        i = timed_join_memset_threads(&context, timeout);
+
+        if (i != context.num_threads &&
+            context.stats.allocated_pages != context.stats.total_pages) {
+            ret = -ETIMEDOUT;
+        }
+    }
+
+    for (; i < context.num_threads; i++) {
+        void *thread_ret = qemu_thread_join(&context.threads[i].pgthread);
 
-        if (tmp) {
-            ret = tmp;
+        if (thread_ret && thread_ret != PTHREAD_CANCELED) {
+            ret = (uintptr_t)thread_ret;
         }
     }
 
@@ -503,8 +583,10 @@ static bool madv_populate_write_possible(char *area, 
size_t pagesize)
            errno != EINVAL;
 }
 
-void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
-                       ThreadContext *tc, Error **errp)
+void qemu_prealloc_mem_with_timeout(int fd, char *area, size_t sz,
+                                    int max_threads, ThreadContext *tc,
+                                    const PreallocTimeout *timeout,
+                                    Error **errp)
 {
     static gsize initialized;
     int ret;
@@ -546,10 +628,18 @@ void qemu_prealloc_mem(int fd, char *area, size_t sz, int 
max_threads,
 
     /* touch pages simultaneously */
     ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
-                          use_madv_populate_write);
+                          timeout, use_madv_populate_write);
+
     if (ret) {
-        error_setg_errno(errp, -ret,
-                         "qemu_prealloc_mem: preallocating memory failed");
+        const char *msg;
+
+        if (timeout && ret == -ETIMEDOUT) {
+            msg = "preallocation timed out";
+        } else {
+            msg = "preallocating memory failed";
+        }
+
+        error_setg_errno(errp, -ret, "qemu_prealloc_mem: %s", msg);
     }
 
     if (!use_madv_populate_write) {
@@ -563,6 +653,12 @@ void qemu_prealloc_mem(int fd, char *area, size_t sz, int 
max_threads,
     }
 }
 
+void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
+                       ThreadContext *tc, Error **errp)
+{
+    qemu_prealloc_mem_with_timeout(fd, area, sz, max_threads, tc, NULL, errp);
+}
+
 char *qemu_get_pid_name(pid_t pid)
 {
     char *name = NULL;
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 07ade41800..27f39ef66a 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -276,6 +276,15 @@ void qemu_prealloc_mem(int fd, char *area, size_t sz, int 
max_threads,
     }
 }
 
+void qemu_prealloc_mem_with_timeout(int fd, char *area, size_t sz,
+                                    int max_threads, ThreadContext *tc,
+                                    const PreallocTimeout *timeout,
+                                    Error **errp)
+{
+    /* FIXME: actually implement timing out here */
+    qemu_prealloc_mem(fd, area, sz, max_threads, tc, errp);
+}
+
 char *qemu_get_pid_name(pid_t pid)
 {
     /* XXX Implement me */
-- 
2.25.1

[PATCH 1/4] oslib: introduce new qemu_prealloc_mem_with_timeout() api

Reply via email to