On Wed, Jan 31, 2024 at 12:27:51PM -0300, Fabiano Rosas wrote:
> > +/* Reset a MultiFDPages_t* object for the next use */
> > +static void multifd_pages_reset(MultiFDPages_t *pages)
> > +{
> > +    /*
> > +     * We don't need to touch offset[] array, because it will be
> > +     * overwritten later when reused.
> > +     */
> > +    pages->num = 0;
> > +    pages->block = NULL;
> 
> Having to do this at all is a huge overloading of this field. This not
> only resets it, but it also communicates to multifd_queue_page() that
> the previous payload has been sent. Otherwise, multifd_queue_page()
> wouldn't know whether the previous call to multifd_queue_page() has
> called multifd_send_pages() or if it has exited early. So this basically
> means "the block that was previously here has been sent".
> 
> That's why we need the changed=true logic. A
> multifd_send_state->pages->block still has a few pages left to send, but
> because it's less than pages->allocated, it skips
> multifd_send_pages(). The next call to multifd_queue_page() already has
> the next ramblock. So we set changed=true, call multifd_send_pages() to
> send the remaining pages of that block and recurse into
> multifd_queue_page() once more to send the new block.

I agree, the queue page routines are not easy to follow as well.

How do you like a rewrite of the queue logic, like this?

=====
bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
{
    MultiFDPages_t *pages;

retry:
    pages = multifd_send_state->pages;

    /* If the queue is empty, we can already enqueue now */
    if (multifd_queue_empty(pages)) {
        pages->block = block;
        multifd_enqueue(pages, offset);
        return true;
    }

    /*
     * Not empty, meanwhile we need a flush.  It can because of either:
     *
     * (1) The page is not on the same ramblock of previous ones, or,
     * (2) The queue is full.
     *
     * After flush, always retry.
     */
    if (pages->block != block || multifd_queue_full(pages)) {
        if (!multifd_send_pages()) {
            return false;
        }
        goto retry;
    }

    /* Not empty, and we still have space, do it! */
    multifd_enqueue(pages, offset);
    return true;
}
=====

Would this be clearer?  With above, we can drop the ->ramblock reset,
afaict.

I attached three patches if you agree it's better, then I'll include them
in v2.

-- 
Peter Xu
>From c5dc2052794efd6da6a1e6f4b49f25d5b32879f7 Mon Sep 17 00:00:00 2001
From: Peter Xu <pet...@redhat.com>
Date: Thu, 1 Feb 2024 17:50:21 +0800
Subject: [PATCH 1/3] migration/multifd: Change retval of multifd_queue_page()

Using int is an overkill when there're only two options.  Change it to a
boolean.

Signed-off-by: Peter Xu <pet...@redhat.com>
---
 migration/multifd.h | 2 +-
 migration/multifd.c | 9 +++++----
 migration/ram.c     | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/migration/multifd.h b/migration/multifd.h
index 34a2ecb9f4..a320c53a6f 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -22,7 +22,7 @@ bool multifd_recv_all_channels_created(void);
 void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
 void multifd_recv_sync_main(void);
 int multifd_send_sync_main(void);
-int multifd_queue_page(RAMBlock *block, ram_addr_t offset);
+bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
 
 /* Multifd Compression flags */
 #define MULTIFD_FLAG_SYNC (1 << 0)
diff --git a/migration/multifd.c b/migration/multifd.c
index 91be6d2fc4..d0a3b4e062 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -505,7 +505,8 @@ static int multifd_send_pages(void)
     return 1;
 }
 
-int multifd_queue_page(RAMBlock *block, ram_addr_t offset)
+/* Returns true if enqueue successful, false otherwise */
+bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
 {
     MultiFDPages_t *pages = multifd_send_state->pages;
     bool changed = false;
@@ -519,21 +520,21 @@ int multifd_queue_page(RAMBlock *block, ram_addr_t offset)
         pages->num++;
 
         if (pages->num < pages->allocated) {
-            return 1;
+            return true;
         }
     } else {
         changed = true;
     }
 
     if (multifd_send_pages() < 0) {
-        return -1;
+        return false;
     }
 
     if (changed) {
         return multifd_queue_page(block, offset);
     }
 
-    return 1;
+    return true;
 }
 
 /* Multifd send side hit an error; remember it and prepare to quit */
diff --git a/migration/ram.c b/migration/ram.c
index d5b7cd5ac2..4649a81204 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1252,7 +1252,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus 
*pss)
 
 static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset)
 {
-    if (multifd_queue_page(block, offset) < 0) {
+    if (!multifd_queue_page(block, offset)) {
         return -1;
     }
     stat64_add(&mig_stats.normal_pages, 1);
-- 
2.43.0

>From f393f1cfe95d79bed72e6043903ee4c4cb298c21 Mon Sep 17 00:00:00 2001
From: Peter Xu <pet...@redhat.com>
Date: Thu, 1 Feb 2024 17:51:38 +0800
Subject: [PATCH 2/3] migration/multifd: Change retval of multifd_send_pages()

Using int is an overkill when there're only two options.  Change it to a
boolean.

Signed-off-by: Peter Xu <pet...@redhat.com>
---
 migration/multifd.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index d0a3b4e062..d2b0f0eda9 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -449,9 +449,10 @@ static void multifd_send_kick_main(MultiFDSendParams *p)
  * thread is using the channel mutex when changing it, and the channel
  * have to had finish with its own, otherwise pending_job can't be
  * false.
+ *
+ * Returns true if succeed, false otherwise.
  */
-
-static int multifd_send_pages(void)
+static bool multifd_send_pages(void)
 {
     int i;
     static int next_channel;
@@ -459,7 +460,7 @@ static int multifd_send_pages(void)
     MultiFDPages_t *pages = multifd_send_state->pages;
 
     if (multifd_send_should_exit()) {
-        return -1;
+        return false;
     }
 
     /* We wait here, until at least one channel is ready */
@@ -473,7 +474,7 @@ static int multifd_send_pages(void)
     next_channel %= migrate_multifd_channels();
     for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
         if (multifd_send_should_exit()) {
-            return -1;
+            return false;
         }
         p = &multifd_send_state->params[i];
         /*
@@ -502,7 +503,7 @@ static int multifd_send_pages(void)
     qemu_mutex_unlock(&p->mutex);
     qemu_sem_post(&p->sem);
 
-    return 1;
+    return true;
 }
 
 /* Returns true if enqueue successful, false otherwise */
@@ -526,7 +527,7 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
         changed = true;
     }
 
-    if (multifd_send_pages() < 0) {
+    if (!multifd_send_pages()) {
         return false;
     }
 
@@ -666,7 +667,7 @@ int multifd_send_sync_main(void)
         return 0;
     }
     if (multifd_send_state->pages->num) {
-        if (multifd_send_pages() < 0) {
+        if (!multifd_send_pages()) {
             error_report("%s: multifd_send_pages fail", __func__);
             return -1;
         }
-- 
2.43.0

>From fcddc942cb31bc9d395d67a555d9a2281da452b1 Mon Sep 17 00:00:00 2001
From: Peter Xu <pet...@redhat.com>
Date: Thu, 1 Feb 2024 17:55:42 +0800
Subject: [PATCH 3/3] migration/multifd: Rewrite multifd_queue_page()

The current multifd_queue_page() is not easy to read and follow.  It is not
good with a few reasons:

  - No helper at all to show what exactly does a condition mean; in short,
  readability is low.

  - Rely on pages->ramblock being cleared to detect an empty queue.  It's
  slightly an overload of the ramblock pointer, per Fabiano [1], which I
  also agree.

  - Contains a self recursion, even if not necessary..

Rewrite this function.  We add some comments to make it even clearer on
what it does.

[1] https://lore.kernel.org/r/87wmrpjzew....@suse.de

Signed-off-by: Peter Xu <pet...@redhat.com>
---
 migration/multifd.c | 56 ++++++++++++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 19 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index d2b0f0eda9..5a64a9c2e2 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -506,35 +506,53 @@ static bool multifd_send_pages(void)
     return true;
 }
 
+static inline bool multifd_queue_empty(MultiFDPages_t *pages)
+{
+    return pages->num == 0;
+}
+
+static inline bool multifd_queue_full(MultiFDPages_t *pages)
+{
+    return pages->num == pages->allocated;
+}
+
+static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset)
+{
+    pages->offset[pages->num++] = offset;
+}
+
 /* Returns true if enqueue successful, false otherwise */
 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
 {
-    MultiFDPages_t *pages = multifd_send_state->pages;
-    bool changed = false;
+    MultiFDPages_t *pages;
+
+retry:
+    pages = multifd_send_state->pages;
 
-    if (!pages->block) {
+    /* If the queue is empty, we can already enqueue now */
+    if (multifd_queue_empty(pages)) {
         pages->block = block;
+        multifd_enqueue(pages, offset);
+        return true;
     }
 
-    if (pages->block == block) {
-        pages->offset[pages->num] = offset;
-        pages->num++;
-
-        if (pages->num < pages->allocated) {
-            return true;
+    /*
+     * Not empty, meanwhile we need a flush.  It can because of either:
+     *
+     * (1) The page is not on the same ramblock of previous ones, or,
+     * (2) The queue is full.
+     *
+     * After flush, always retry.
+     */
+    if (pages->block != block || multifd_queue_full(pages)) {
+        if (!multifd_send_pages()) {
+            return false;
         }
-    } else {
-        changed = true;
-    }
-
-    if (!multifd_send_pages()) {
-        return false;
-    }
-
-    if (changed) {
-        return multifd_queue_page(block, offset);
+        goto retry;
     }
 
+    /* Not empty, and we still have space, do it! */
+    multifd_enqueue(pages, offset);
     return true;
 }
 
-- 
2.43.0

Reply via email to