On 1/8/24 9:40 AM, David Hildenbrand wrote:
On 08.01.24 16:10, Mark Kanda wrote:
Refactor the memory prealloc threads support:
- Make memset context a global qlist
- Move the memset thread join/cleanup code to a separate routine

This is functionally equivalent and facilitates multiple memset contexts
(used in a subsequent patch).

Signed-off-by: Mark Kanda <mark.ka...@oracle.com>
---
  util/oslib-posix.c | 104 +++++++++++++++++++++++++++++----------------
  1 file changed, 68 insertions(+), 36 deletions(-)

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index e86fd64e09..293297ac6c 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -63,11 +63,15 @@
    struct MemsetThread;
  +static QLIST_HEAD(, MemsetContext) memset_contexts =
+    QLIST_HEAD_INITIALIZER(memset_contexts);
+
  typedef struct MemsetContext {
      bool all_threads_created;
      bool any_thread_failed;
      struct MemsetThread *threads;
      int num_threads;
+    QLIST_ENTRY(MemsetContext) next;
  } MemsetContext;
    struct MemsetThread {
@@ -81,7 +85,7 @@ struct MemsetThread {
  typedef struct MemsetThread MemsetThread;
    /* used by sigbus_handler() */
-static MemsetContext *sigbus_memset_context;
+static bool sigbus_memset_context;
  struct sigaction sigbus_oldact;
  static QemuMutex sigbus_mutex;
  @@ -295,13 +299,16 @@ static void sigbus_handler(int signal)
  #endif /* CONFIG_LINUX */
  {
      int i;
+    MemsetContext *context;
        if (sigbus_memset_context) {
-        for (i = 0; i < sigbus_memset_context->num_threads; i++) {
-            MemsetThread *thread = &sigbus_memset_context->threads[i];
+        QLIST_FOREACH(context, &memset_contexts, next) {
+            for (i = 0; i < context->num_threads; i++) {
+                MemsetThread *thread = &context->threads[i];
  -            if (qemu_thread_is_self(&thread->pgthread)) {
-                siglongjmp(thread->env, 1);
+                if (qemu_thread_is_self(&thread->pgthread)) {
+                    siglongjmp(thread->env, 1);
+                }
              }
          }
      }
@@ -417,14 +424,15 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
                             bool use_madv_populate_write)
  {
      static gsize initialized = 0;
-    MemsetContext context = {
-        .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads),
-    };
+    MemsetContext *context = g_malloc0(sizeof(MemsetContext));
      size_t numpages_per_thread, leftover;
      void *(*touch_fn)(void *);
-    int ret = 0, i = 0;
+    int i = 0;
      char *addr = area;
  +    context->num_threads =
+        get_memset_num_threads(hpagesize, numpages, max_threads);
+
      if (g_once_init_enter(&initialized)) {
          qemu_mutex_init(&page_mutex);
          qemu_cond_init(&page_cond);
@@ -433,7 +441,7 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
        if (use_madv_populate_write) {
          /* Avoid creating a single thread for MADV_POPULATE_WRITE */
-        if (context.num_threads == 1) {
+        if (context->num_threads == 1) {
              if (qemu_madvise(area, hpagesize * numpages,
                               QEMU_MADV_POPULATE_WRITE)) {
                  return -errno;
@@ -445,49 +453,74 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
          touch_fn = do_touch_pages;
      }
  -    context.threads = g_new0(MemsetThread, context.num_threads);
-    numpages_per_thread = numpages / context.num_threads;
-    leftover = numpages % context.num_threads;
-    for (i = 0; i < context.num_threads; i++) {
-        context.threads[i].addr = addr;
-        context.threads[i].numpages = numpages_per_thread + (i < leftover);
-        context.threads[i].hpagesize = hpagesize;
-        context.threads[i].context = &context;
+    context->threads = g_new0(MemsetThread, context->num_threads);
+    numpages_per_thread = numpages / context->num_threads;
+    leftover = numpages % context->num_threads;
+    for (i = 0; i < context->num_threads; i++) {
+        context->threads[i].addr = addr;
+        context->threads[i].numpages = numpages_per_thread + (i < leftover);
+        context->threads[i].hpagesize = hpagesize;
+        context->threads[i].context = context;
          if (tc) {
-            thread_context_create_thread(tc, &context.threads[i].pgthread, +            thread_context_create_thread(tc, &context->threads[i].pgthread,
                                           "touch_pages",
-                                         touch_fn, &context.threads[i],
+                                         touch_fn, &context->threads[i],
QEMU_THREAD_JOINABLE);
          } else {
- qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
-                               touch_fn, &context.threads[i],
+ qemu_thread_create(&context->threads[i].pgthread, "touch_pages",
+                               touch_fn, &context->threads[i],
                                 QEMU_THREAD_JOINABLE);
          }
-        addr += context.threads[i].numpages * hpagesize;
+        addr += context->threads[i].numpages * hpagesize;
      }
  +    QLIST_INSERT_HEAD(&memset_contexts, context, next);
+
      if (!use_madv_populate_write) {
-        sigbus_memset_context = &context;
+        sigbus_memset_context = true;

Thanks David,

Could we just use the sigbus handling alone and support parallel init only when using MADV_POPULATE_WRITE where we don't have to mess with signal handlers?


Ideally, we're hoping to support this with earlier kernels which don't support MADV_POPULATE _WRITE. But, I will check to see if we really need it.

Further, how do you changes interact with other callers of qemu_prealloc_mem(), like virtio-mem?


I'm not familiar with the intricacies of virtio-mem, but the basic idea of this series is to *only* allow parallel init during the start up phase (while prealloc_init == false). Once we have parsed all the command line args, we set prealloc_init = true (wait_mem_prealloc_init()) which causes all subsequent calls to qemu_prealloc_mem() to perform initialization synchronously. So, I *think* this covers the virtio-mem use case. Am I missing something?

Thanks/regards,
-Mark

Reply via email to