The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will 
appear at g...@bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.19
------>
commit 0946cf907557e9c22294b0831bf760608e184416
Author: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
Date:   Thu Mar 6 13:21:09 2025 +0800

    dm-qcow2: make merge_backward command asyncronous
    
    This adds merge_backward "start", "complete", "update_eventfd" and
    "cancel" commands. By that we are able to split single merge_backward
    into two stages: start asyncronous merging and completion. That can be
    usefull for restarting qemu process while allowing backward merging to
    run asyncronously in kernel.
    
    The "start" command runs merging preparations in workqueue work. After
    it finishes it sends event to eventfd set on "start", receiving event on
    eventfd the "complete" command can be called to finish the process and
    actually replace the top qcow2 with it's lower. In case work encounters
    any errors or "cancel" request it will also send event to eventfd,
    calling "complete" after that will fail. Basically userspace is
    guaranteed to receive event from eventfd in any case after start.
    
    The "cancel" command forces the work to stop and flushes it. In case we
    are in completion waiting state already and there is no work running,
    the "cancel" command also reverts merging preparations.
    
    The "update_eventfd" command can be used to update eventfd for currently
    running merge, e.g. in case old eventfd was lost for some reason. This
    command on success guarantees that the caller will receive event from
    the new eventfd. If "update_eventfd" fails with -EBUSY, it means that
    there is no currently running merge in progress.
    
    Locking:
    
    Data in tgt->backward_merge is protected by tgt->ctl_mutex. The "start"
    and "complete" commands are fully under this lock, and the "cancel"
    operation takes the lock explicitly and releases it for work flushing.
    The work also takes the lock but only when updating tgt->backward_merge
    data. For checks, if the work was caneled in the middle, we read the
    state without locking as we don't modify the state there, also we would
    re-check the state again before exiting the work function under lock.
    
    Now on target suspend we "cancel" currently running backward merge,
    previously we were just hanging untill backward merge have been
    finished for possibly a long time, cancelling seems cleaner. Though we
    don't really expect hypervisor suspending the target in the middle of
    backward merge that it by itself started.
    
    The backward_merge.eventfd_ctx is also protected from being released by
    tgt->ctl_mutex.
    
    Note: After this patch the backward merge runs in a workqueue and also
    the tgt->ctl_mutex is not held for a long time anymore, so we remove
    interruptible mutex wait, and replace pending signal checks in the
    middle of backward merge with checking "should stop" state.
    
    https://virtuozzo.atlassian.net/browse/VSTOR-100466
    Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
    Reviewed-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
    
    ======
    Patchset description:
    dm-qcow2: make backward merge asyncronous
    
    That can be usefull for restarting qemu process while allowing backward
    merging to run asyncronously in kernel.
    
    Feature: dm-qcow2: block device over QCOW2 files driver
---
 drivers/md/dm-qcow2-cmd.c    | 222 +++++++++++++++++++++++++++++++++++++++----
 drivers/md/dm-qcow2-target.c |   6 ++
 drivers/md/dm-qcow2.h        |  21 ++++
 3 files changed, 233 insertions(+), 16 deletions(-)

diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c
index 7b4b0ee68ad9..cd416ffc1814 100644
--- a/drivers/md/dm-qcow2-cmd.c
+++ b/drivers/md/dm-qcow2-cmd.c
@@ -5,6 +5,8 @@
 #include <linux/device-mapper.h>
 #include <linux/sched/signal.h>
 #include <linux/file.h>
+#include <linux/eventfd.h>
+#include <linux/minmax.h>
 #include <linux/error-injection.h>
 #include "dm-qcow2.h"
 
@@ -17,8 +19,10 @@ static int qcow2_get_errors(struct qcow2_target *tgt, char 
*result,
        unsigned int sz = 0;
        int ret;
 
-       ret = 
DMEMIT("wants_check=%d\nmd_writeback_error=%d\ntruncate_error=%d\n",
-                     wants_check, tgt->md_writeback_error, 
tgt->truncate_error);
+       ret = 
DMEMIT("wants_check=%d\nmd_writeback_error=%d\ntruncate_error=%d\n"
+                    "merge_backward_error=%d\n",
+                     wants_check, tgt->md_writeback_error, tgt->truncate_error,
+                     tgt->backward_merge.error);
 
        return ret ? 1 : 0;
 }
@@ -52,6 +56,8 @@ static void service_qio_endio(struct qcow2_target *tgt, 
struct qio *qio,
        wake_up(&tgt->service_wq);
 }
 
+static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt);
+
 static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2,
                  loff_t end, loff_t step, unsigned int bi_op, u8 qio_flags)
 {
@@ -63,7 +69,7 @@ static int qcow2_service_iter(struct qcow2_target *tgt, 
struct qcow2 *qcow2,
        WRITE_ONCE(service_status, BLK_STS_OK);
 
        for (pos = 0; pos < end; pos += step) {
-               if (fatal_signal_pending(current)) {
+               if (qcow2_backward_merge_should_stop(tgt)) {
                        ret = -EINTR;
                        break;
                }
@@ -161,10 +167,14 @@ static void set_backward_merge_in_process(struct 
qcow2_target *tgt,
        qcow2_submit_embedded_qios(tgt, &list);
 }
 
-static int qcow2_merge_backward(struct qcow2_target *tgt)
+static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt, int efd);
+
+static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd)
 {
        struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
-       int ret, ret2;
+       int ret;
+
+       lockdep_assert_held(&tgt->ctl_mutex);
 
        if (!lower)
                return -ENOENT;
@@ -174,6 +184,43 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
                return -EOPNOTSUPP;
        if (lower->hdr.size < qcow2->hdr.size)
                return -EBADSLT;
+
+       if (tgt->backward_merge.state != BACKWARD_MERGE_STOPPED)
+               return -EBUSY;
+
+       ret = qcow2_merge_backward_set_eventfd(tgt, efd);
+       if (ret)
+               return ret;
+
+       tgt->backward_merge.state = BACKWARD_MERGE_START;
+       tgt->backward_merge.error = 0;
+
+       schedule_work(&tgt->backward_merge.work);
+       return 0;
+}
+ALLOW_ERROR_INJECTION(qcow2_merge_backward_start, ERRNO);
+
+void qcow2_merge_backward_work(struct work_struct *work)
+{
+       struct qcow2_target *tgt = container_of(work, struct qcow2_target,
+                                               backward_merge.work);
+       struct qcow2 *qcow2, *lower;
+       int ret, ret2;
+
+       mutex_lock(&tgt->ctl_mutex);
+       if (tgt->backward_merge.state != BACKWARD_MERGE_START) {
+               if (tgt->backward_merge.eventfd_ctx)
+                       eventfd_signal(tgt->backward_merge.eventfd_ctx, 1);
+               qcow2_merge_backward_set_eventfd(tgt, -1);
+               mutex_unlock(&tgt->ctl_mutex);
+               return;
+       }
+       tgt->backward_merge.state = BACKWARD_MERGE_RUN;
+       mutex_unlock(&tgt->ctl_mutex);
+
+       qcow2 = tgt->top;
+       lower = qcow2->lower;
+
        /*
         * Break all COW clus at L1 level. Otherwise, later
         * there would be problems with unusing them:
@@ -183,13 +230,13 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
        ret = qcow2_break_l1cow(tgt);
        if (ret) {
                QC_ERR(tgt->ti, "Can't break L1 COW");
-               return ret;
+               goto out_err;
        }
 
        ret = qcow2_set_image_file_features(lower, true);
        if (ret) {
                QC_ERR(tgt->ti, "Can't set dirty bit");
-               return ret;
+               goto out_err;
        }
        set_backward_merge_in_process(tgt, qcow2, true);
 
@@ -200,22 +247,129 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
                ret2 = qcow2_set_image_file_features(lower, false);
                if (ret2 < 0)
                        QC_ERR(tgt->ti, "Can't unuse lower (%d)", ret2);
-               return ret;
        }
+
+out_err:
+       mutex_lock(&tgt->ctl_mutex);
+       if (ret) {
+               /* Error */
+               tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
+               tgt->backward_merge.error = ret;
+       } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
+               /* Merge is canceled */
+               set_backward_merge_in_process(tgt, qcow2, false);
+               tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
+               tgt->backward_merge.error = -EINTR;
+       } else {
+               /* Finish merge */
+               tgt->backward_merge.state = BACKWARD_MERGE_WAIT_COMPLETION;
+       }
+       if (tgt->backward_merge.eventfd_ctx)
+               eventfd_signal(tgt->backward_merge.eventfd_ctx, 1);
+       qcow2_merge_backward_set_eventfd(tgt, -1);
+       mutex_unlock(&tgt->ctl_mutex);
+}
+
+static int qcow2_merge_backward_complete(struct qcow2_target *tgt)
+{
+       struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+       int ret;
+
+       lockdep_assert_held(&tgt->ctl_mutex);
+
+       if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION)
+               return -EBUSY;
+
        tgt->top = lower;
        smp_wmb(); /* Pairs with qcow2_ref_inc() */
        qcow2_inflight_ref_switch(tgt); /* Pending qios */
        qcow2_flush_deferred_activity(tgt, qcow2); /* Delayed md pages */
        qcow2->lower = NULL;
 
-       ret2 = qcow2_set_image_file_features(qcow2, false);
-       if (ret2 < 0)
-               QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret2);
+       ret = qcow2_set_image_file_features(qcow2, false);
+       if (ret < 0)
+               QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret);
        qcow2_destroy(qcow2);
 
+       tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
+
+       return 0;
+}
+ALLOW_ERROR_INJECTION(qcow2_merge_backward_complete, ERRNO);
+
+void qcow2_merge_backward_cancel(struct qcow2_target *tgt)
+{
+       bool flush = false;
+
+       mutex_lock(&tgt->ctl_mutex);
+       if (tgt->backward_merge.state == BACKWARD_MERGE_STOPPED) {
+               mutex_unlock(&tgt->ctl_mutex);
+               return;
+       }
+
+       if (tgt->backward_merge.state == BACKWARD_MERGE_START) {
+               tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
+               flush = true;
+       } else if (tgt->backward_merge.state == BACKWARD_MERGE_RUN) {
+               tgt->backward_merge.state = BACKWARD_MERGE_STOP;
+               flush = true;
+       } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
+               flush = true;
+       } else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) 
{
+               set_backward_merge_in_process(tgt, tgt->top, false);
+               tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
+       }
+       mutex_unlock(&tgt->ctl_mutex);
+
+       if (flush)
+               flush_work(&tgt->backward_merge.work);
+}
+
+static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt)
+{
+       return READ_ONCE(tgt->backward_merge.state) == BACKWARD_MERGE_STOP;
+}
+
+#define QCOW2_FILE_UNBIND -1
+
+static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt, int efd)
+{
+       struct eventfd_ctx *ctx = NULL;
+
+       lockdep_assert_held(&tgt->ctl_mutex);
+
+       ctx = efd == QCOW2_FILE_UNBIND ? NULL : eventfd_ctx_fdget(efd);
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
+
+       swap(ctx, tgt->backward_merge.eventfd_ctx);
+       if (ctx)
+               eventfd_ctx_put(ctx);
+
+       return 0;
+}
+
+static int qcow2_merge_backward_update_eventfd(struct qcow2_target *tgt, int 
efd)
+{
+       int ret;
+
+       mutex_lock(&tgt->ctl_mutex);
+       if (efd != QCOW2_FILE_UNBIND &&
+           (tgt->backward_merge.state != BACKWARD_MERGE_START &&
+            tgt->backward_merge.state != BACKWARD_MERGE_RUN)) {
+               mutex_unlock(&tgt->ctl_mutex);
+               return -EBUSY;
+       }
+
+       ret = qcow2_merge_backward_set_eventfd(tgt, efd);
+       if (ret) {
+               mutex_unlock(&tgt->ctl_mutex);
+               return ret;
+       }
+
+       mutex_unlock(&tgt->ctl_mutex);
        return 0;
 }
-ALLOW_ERROR_INJECTION(qcow2_merge_backward, ERRNO);
 
 static struct qcow2 *qcow2_get_img(struct qcow2_target *tgt, u32 img_id, u8 
*ref_index)
 {
@@ -337,6 +491,7 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, 
char **argv,
        struct qcow2_target *tgt = to_qcow2_target(ti);
        int ret = -EPERM;
        u32 val, val2;
+       int efd;
 
        if (!capable(CAP_SYS_ADMIN))
                goto out;
@@ -374,11 +529,30 @@ int qcow2_message(struct dm_target *ti, unsigned int 
argc, char **argv,
                }
                ret = qcow2_get_event(tgt, result, maxlen);
                goto out;
+       } else if (!strcmp(argv[0], "merge_backward")) {
+               if (argc < 2) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               if (!strcmp(argv[1], "cancel")) {
+                       if (argc != 2) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       qcow2_merge_backward_cancel(tgt);
+                       ret = 0;
+                       goto out;
+               } else if (!strcmp(argv[1], "update_eventfd")) {
+                       if (argc != 3 || kstrtoint(argv[2], 10, &efd)) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       ret = qcow2_merge_backward_update_eventfd(tgt, efd);
+                       goto out;
+               }
        }
 
-       ret = mutex_lock_killable(&tgt->ctl_mutex);
-       if (ret)
-               goto out;
+       mutex_lock(&tgt->ctl_mutex);
 
        if (!strcmp(argv[0], "get_errors")) {
                ret = qcow2_get_errors(tgt, result, maxlen);
@@ -388,11 +562,27 @@ int qcow2_message(struct dm_target *ti, unsigned int 
argc, char **argv,
        } else if (!strcmp(argv[0], "merge_forward")) {
                ret = qcow2_merge_forward(tgt);
        } else if (!strcmp(argv[0], "merge_backward")) {
-               ret = qcow2_merge_backward(tgt);
+               /* argc >= 2 */
+               if (!strcmp(argv[1], "start")) {
+                       if (argc != 3 || kstrtoint(argv[2], 10, &efd) || efd < 
0) {
+                               ret = -EINVAL;
+                               goto out_unlock;
+                       }
+                       ret = qcow2_merge_backward_start(tgt, efd);
+               } else if (!strcmp(argv[1], "complete")) {
+                       if (argc != 2) {
+                               ret = -EINVAL;
+                               goto out_unlock;
+                       }
+                       ret = qcow2_merge_backward_complete(tgt);
+               } else {
+                       ret = -ENOTTY;
+               }
        } else {
                ret = -ENOTTY;
        }
 
+out_unlock:
        mutex_unlock(&tgt->ctl_mutex);
 out:
        return ret;
diff --git a/drivers/md/dm-qcow2-target.c b/drivers/md/dm-qcow2-target.c
index 540c03cb3c44..6e2e583ba0b8 100644
--- a/drivers/md/dm-qcow2-target.c
+++ b/drivers/md/dm-qcow2-target.c
@@ -25,6 +25,8 @@ static void qcow2_set_service_operations(struct dm_target 
*ti, bool allowed)
        mutex_lock(&tgt->ctl_mutex);
        tgt->service_operations_allowed = allowed;
        mutex_unlock(&tgt->ctl_mutex);
+       if (!allowed)
+               qcow2_merge_backward_cancel(tgt);
 }
 static void qcow2_set_wants_suspend(struct dm_target *ti, bool wants)
 {
@@ -251,6 +253,7 @@ static void qcow2_tgt_destroy(struct qcow2_target *tgt)
                /* Now kill the queue */
                destroy_workqueue(tgt->wq);
        }
+       qcow2_merge_backward_cancel(tgt);
 
        mempool_destroy(tgt->qio_pool);
        mempool_destroy(tgt->qrq_pool);
@@ -494,6 +497,9 @@ static struct qcow2_target *alloc_qcow2_target(struct 
dm_target *ti)
        timer_setup(&tgt->enospc_timer, qcow2_enospc_timer, 0);
        ti->private = tgt;
        tgt->ti = ti;
+
+       INIT_WORK(&tgt->backward_merge.work, qcow2_merge_backward_work);
+
        qcow2_set_service_operations(ti, false);
 
        return tgt;
diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h
index a89fe3db2196..ca43e13d35c3 100644
--- a/drivers/md/dm-qcow2.h
+++ b/drivers/md/dm-qcow2.h
@@ -5,6 +5,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/device-mapper.h>
 #include <linux/fs.h>
+#include <linux/eventfd.h>
 #include "dm-core.h"
 
 #define DM_MSG_PREFIX "qcow2"
@@ -149,6 +150,21 @@ struct md_page {
        struct list_head wpc_readers_wait_list;
 };
 
+enum qcow2_backward_merge_state {
+       BACKWARD_MERGE_STOPPED,
+       BACKWARD_MERGE_START,
+       BACKWARD_MERGE_RUN,
+       BACKWARD_MERGE_WAIT_COMPLETION,
+       BACKWARD_MERGE_STOP,
+};
+
+struct qcow2_backward_merge {
+       struct work_struct work;
+       enum qcow2_backward_merge_state state;
+       int error;
+       struct eventfd_ctx *eventfd_ctx;
+};
+
 struct qcow2_target {
        struct dm_target *ti;
 #define QCOW2_QRQ_POOL_SIZE 512 /* Twice nr_requests from blk_mq_init_sched() 
*/
@@ -180,6 +196,8 @@ struct qcow2_target {
        struct work_struct event_work;
        spinlock_t event_lock;
        struct mutex ctl_mutex;
+
+       struct qcow2_backward_merge backward_merge;
 };
 
 enum {
@@ -375,6 +393,9 @@ int qcow2_inflight_ref_switch(struct qcow2_target *tgt);
 void qcow2_flush_deferred_activity(struct qcow2_target *tgt, struct qcow2 
*qcow2);
 int qcow2_truncate_safe(struct file *file, loff_t new_len);
 
+void qcow2_merge_backward_work(struct work_struct *work);
+void qcow2_merge_backward_cancel(struct qcow2_target *tgt);
+
 static inline struct qcow2_target *to_qcow2_target(struct dm_target *ti)
 {
        return ti->private;
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to