This adds merge_backward "start", "complete", "update_eventfd" and
"cancel" commands. By that we are able to split single merge_backward
into two stages: start asyncronous merging and completion. That can be
usefull for restarting qemu process while allowing backward merging to
run asyncronously in kernel.

The "start" command runs merging preparations in workqueue work. After
it finishes it sends event to eventfd set on "start", receiving event on
eventfd the "complete" command can be called to finish the process and
actually replace the top qcow2 with it's lower. In case work encounters
any errors or "cancel" request it will also send event to eventfd,
calling "complete" after that will fail. Basically userspace is
guaranteed to receive event from eventfd in any case after start.

The "cancel" command forces the work to stop and flushes it. In case we
are in completion waiting state already and there is no work running,
the "cancel" command also reverts merging preparations.

The "update_eventfd" command can be used to update eventfd for currently
running merge, e.g. in case old eventfd was lost for some reason. This
command on success guarantees that the caller will receive event from
the new eventfd. If "update_eventfd" fails with -EBUSY, it means that
there is no currently running merge in progress.

Locking:

Data in tgt->backward_merge is protected by tgt->ctl_mutex. The "start"
and "complete" commands are fully under this lock, and the "cancel"
operation takes the lock explicitly and releases it for work flushing.
The work also takes the lock but only when updating tgt->backward_merge
data. For checks, if the work was caneled in the middle, we read the
state without locking as we don't modify the state there, also we would
re-check the state again before exiting the work function under lock.

Now on target suspend we "cancel" currently running backward merge,
previously we were just hanging untill backward merge have been
finished for possibly a long time, cancelling seems cleaner. Though we
don't really expect hypervisor suspending the target in the middle of
backward merge that it by itself started.

The backward_merge.eventfd_ctx is also protected from being released by
tgt->ctl_mutex.

Note: After this patch the backward merge runs in a workqueue and also
the tgt->ctl_mutex is not held for a long time anymore, so we remove
interruptible mutex wait, and replace pending signal checks in the
middle of backward merge with checking "should stop" state.

https://virtuozzo.atlassian.net/browse/VSTOR-100466
Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>

--
v2: Cancel from BACKWARD_MERGE_START state should not try to stop the
work via BACKWARD_MERGE_STOP state, else we will deadlock in this state.
Always report that work finished, e.g. also on error or then it was
canceled, this should be more consistent from the userspace perspective.
v5: Address Alexander's and Andrey's reveiw: remove excess enum first
element init, add backward merge error to qcow2_get_errors, add note
about signals handling removal, merge eventfd into this patch and rework
it to be easier for userspace to use. Unbind eventfd after sending
event.
---
 drivers/md/dm-qcow2-cmd.c    | 222 ++++++++++++++++++++++++++++++++---
 drivers/md/dm-qcow2-target.c |   6 +
 drivers/md/dm-qcow2.h        |  21 ++++
 3 files changed, 233 insertions(+), 16 deletions(-)

diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c
index 7b4b0ee68ad9f..56f2d3e285cdb 100644
--- a/drivers/md/dm-qcow2-cmd.c
+++ b/drivers/md/dm-qcow2-cmd.c
@@ -5,6 +5,8 @@
 #include <linux/device-mapper.h>
 #include <linux/sched/signal.h>
 #include <linux/file.h>
+#include <linux/eventfd.h>
+#include <linux/minmax.h>
 #include <linux/error-injection.h>
 #include "dm-qcow2.h"
 
@@ -17,8 +19,10 @@ static int qcow2_get_errors(struct qcow2_target *tgt, char 
*result,
        unsigned int sz = 0;
        int ret;
 
-       ret = 
DMEMIT("wants_check=%d\nmd_writeback_error=%d\ntruncate_error=%d\n",
-                     wants_check, tgt->md_writeback_error, 
tgt->truncate_error);
+       ret = 
DMEMIT("wants_check=%d\nmd_writeback_error=%d\ntruncate_error=%d\n"
+                    "merge_backward_error=%d\n",
+                     wants_check, tgt->md_writeback_error, tgt->truncate_error,
+                     tgt->backward_merge.error);
 
        return ret ? 1 : 0;
 }
@@ -52,6 +56,8 @@ static void service_qio_endio(struct qcow2_target *tgt, 
struct qio *qio,
        wake_up(&tgt->service_wq);
 }
 
+static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt);
+
 static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2,
                  loff_t end, loff_t step, unsigned int bi_op, u8 qio_flags)
 {
@@ -63,7 +69,7 @@ static int qcow2_service_iter(struct qcow2_target *tgt, 
struct qcow2 *qcow2,
        WRITE_ONCE(service_status, BLK_STS_OK);
 
        for (pos = 0; pos < end; pos += step) {
-               if (fatal_signal_pending(current)) {
+               if (qcow2_backward_merge_should_stop(tgt)) {
                        ret = -EINTR;
                        break;
                }
@@ -161,10 +167,14 @@ static void set_backward_merge_in_process(struct 
qcow2_target *tgt,
        qcow2_submit_embedded_qios(tgt, &list);
 }
 
-static int qcow2_merge_backward(struct qcow2_target *tgt)
+static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt, int efd);
+
+static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd)
 {
        struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
-       int ret, ret2;
+       int ret;
+
+       lockdep_assert_held(&tgt->ctl_mutex);
 
        if (!lower)
                return -ENOENT;
@@ -174,6 +184,43 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
                return -EOPNOTSUPP;
        if (lower->hdr.size < qcow2->hdr.size)
                return -EBADSLT;
+
+       if (tgt->backward_merge.state != BACKWARD_MERGE_STOPPED)
+               return -EBUSY;
+
+       ret = qcow2_merge_backward_set_eventfd(tgt, efd);
+       if (ret)
+               return ret;
+
+       tgt->backward_merge.state = BACKWARD_MERGE_START;
+       tgt->backward_merge.error = 0;
+
+       schedule_work(&tgt->backward_merge.work);
+       return 0;
+}
+ALLOW_ERROR_INJECTION(qcow2_merge_backward_start, ERRNO);
+
+void qcow2_merge_backward_work(struct work_struct *work)
+{
+       struct qcow2_target *tgt = container_of(work, struct qcow2_target,
+                                               backward_merge.work);
+       struct qcow2 *qcow2, *lower;
+       int ret, ret2;
+
+       mutex_lock(&tgt->ctl_mutex);
+       if (tgt->backward_merge.state != BACKWARD_MERGE_START) {
+               if (tgt->backward_merge.eventfd_ctx)
+                       eventfd_signal(tgt->backward_merge.eventfd_ctx, 1);
+               qcow2_merge_backward_set_eventfd(tgt, -1);
+               mutex_unlock(&tgt->ctl_mutex);
+               return;
+       }
+       tgt->backward_merge.state = BACKWARD_MERGE_RUN;
+       mutex_unlock(&tgt->ctl_mutex);
+
+       qcow2 = tgt->top;
+       lower = qcow2->lower;
+
        /*
         * Break all COW clus at L1 level. Otherwise, later
         * there would be problems with unusing them:
@@ -183,13 +230,13 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
        ret = qcow2_break_l1cow(tgt);
        if (ret) {
                QC_ERR(tgt->ti, "Can't break L1 COW");
-               return ret;
+               goto out_err;
        }
 
        ret = qcow2_set_image_file_features(lower, true);
        if (ret) {
                QC_ERR(tgt->ti, "Can't set dirty bit");
-               return ret;
+               goto out_err;
        }
        set_backward_merge_in_process(tgt, qcow2, true);
 
@@ -200,22 +247,129 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
                ret2 = qcow2_set_image_file_features(lower, false);
                if (ret2 < 0)
                        QC_ERR(tgt->ti, "Can't unuse lower (%d)", ret2);
-               return ret;
        }
+
+out_err:
+       mutex_lock(&tgt->ctl_mutex);
+       if (ret) {
+               /* Error */
+               tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
+               tgt->backward_merge.error = ret;
+       } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
+               /* Merge is canceled */
+               set_backward_merge_in_process(tgt, qcow2, false);
+               tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
+               tgt->backward_merge.error = -EINTR;
+       } else {
+               /* Finish merge */
+               tgt->backward_merge.state = BACKWARD_MERGE_WAIT_COMPLETION;
+       }
+       if (tgt->backward_merge.eventfd_ctx)
+               eventfd_signal(tgt->backward_merge.eventfd_ctx, 1);
+       qcow2_merge_backward_set_eventfd(tgt, -1);
+       mutex_unlock(&tgt->ctl_mutex);
+}
+
+static int qcow2_merge_backward_complete(struct qcow2_target *tgt)
+{
+       struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+       int ret;
+
+       lockdep_assert_held(&tgt->ctl_mutex);
+
+       if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION)
+               return -EBUSY;
+
        tgt->top = lower;
        smp_wmb(); /* Pairs with qcow2_ref_inc() */
        qcow2_inflight_ref_switch(tgt); /* Pending qios */
        qcow2_flush_deferred_activity(tgt, qcow2); /* Delayed md pages */
        qcow2->lower = NULL;
 
-       ret2 = qcow2_set_image_file_features(qcow2, false);
-       if (ret2 < 0)
-               QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret2);
+       ret = qcow2_set_image_file_features(qcow2, false);
+       if (ret < 0)
+               QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret);
        qcow2_destroy(qcow2);
 
+       tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
+
+       return 0;
+}
+ALLOW_ERROR_INJECTION(qcow2_merge_backward_complete, ERRNO);
+
+void qcow2_merge_backward_cancel(struct qcow2_target *tgt)
+{
+       bool flush = false;
+
+       mutex_lock(&tgt->ctl_mutex);
+       if (tgt->backward_merge.state == BACKWARD_MERGE_STOPPED) {
+               mutex_unlock(&tgt->ctl_mutex);
+               return;
+       }
+
+       if (tgt->backward_merge.state == BACKWARD_MERGE_START) {
+               tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
+               flush = true;
+       } else if (tgt->backward_merge.state == BACKWARD_MERGE_RUN) {
+               tgt->backward_merge.state = BACKWARD_MERGE_STOP;
+               flush = true;
+       } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
+               flush = true;
+       } else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) 
{
+               set_backward_merge_in_process(tgt, tgt->top, false);
+               tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
+       }
+       mutex_unlock(&tgt->ctl_mutex);
+
+       if (flush)
+               flush_work(&tgt->backward_merge.work);
+}
+
+static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt)
+{
+       return READ_ONCE(tgt->backward_merge.state) == BACKWARD_MERGE_STOP;
+}
+
+#define QCOW2_FILE_UNBIND -1
+
+static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt, int efd)
+{
+       struct eventfd_ctx *ctx = NULL;
+
+       lockdep_assert_held(&tgt->ctl_mutex);
+
+       ctx = efd == QCOW2_FILE_UNBIND ? NULL : eventfd_ctx_fdget(efd);
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
+
+       swap(ctx, tgt->backward_merge.eventfd_ctx);
+       if (ctx)
+               eventfd_ctx_put(ctx);
+
+       return 0;
+}
+
+static int qcow2_merge_backward_update_eventfd(struct qcow2_target *tgt, int 
efd)
+{
+       int ret;
+
+       mutex_lock(&tgt->ctl_mutex);
+       if (efd != QCOW2_FILE_UNBIND &&
+           (tgt->backward_merge.state != BACKWARD_MERGE_START ||
+            tgt->backward_merge.state != BACKWARD_MERGE_RUN)) {
+               mutex_unlock(&tgt->ctl_mutex);
+               return -EBUSY;
+       }
+
+       ret = qcow2_merge_backward_set_eventfd(tgt, efd);
+       if (ret) {
+               mutex_unlock(&tgt->ctl_mutex);
+               return ret;
+       }
+
+       mutex_unlock(&tgt->ctl_mutex);
        return 0;
 }
-ALLOW_ERROR_INJECTION(qcow2_merge_backward, ERRNO);
 
 static struct qcow2 *qcow2_get_img(struct qcow2_target *tgt, u32 img_id, u8 
*ref_index)
 {
@@ -337,6 +491,7 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, 
char **argv,
        struct qcow2_target *tgt = to_qcow2_target(ti);
        int ret = -EPERM;
        u32 val, val2;
+       int efd;
 
        if (!capable(CAP_SYS_ADMIN))
                goto out;
@@ -374,11 +529,30 @@ int qcow2_message(struct dm_target *ti, unsigned int 
argc, char **argv,
                }
                ret = qcow2_get_event(tgt, result, maxlen);
                goto out;
+       } else if (!strcmp(argv[0], "merge_backward")) {
+               if (argc < 2) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               if (!strcmp(argv[1], "cancel")) {
+                       if (argc != 2) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       qcow2_merge_backward_cancel(tgt);
+                       ret = 0;
+                       goto out;
+               } else if (!strcmp(argv[1], "update_eventfd")) {
+                       if (argc != 3 || kstrtoint(argv[2], 10, &efd)) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       ret = qcow2_merge_backward_update_eventfd(tgt, efd);
+                       goto out;
+               }
        }
 
-       ret = mutex_lock_killable(&tgt->ctl_mutex);
-       if (ret)
-               goto out;
+       mutex_lock(&tgt->ctl_mutex);
 
        if (!strcmp(argv[0], "get_errors")) {
                ret = qcow2_get_errors(tgt, result, maxlen);
@@ -388,11 +562,27 @@ int qcow2_message(struct dm_target *ti, unsigned int 
argc, char **argv,
        } else if (!strcmp(argv[0], "merge_forward")) {
                ret = qcow2_merge_forward(tgt);
        } else if (!strcmp(argv[0], "merge_backward")) {
-               ret = qcow2_merge_backward(tgt);
+               /* argc >= 2 */
+               if (!strcmp(argv[1], "start")) {
+                       if (argc != 3 || kstrtoint(argv[2], 10, &efd) || efd < 
0) {
+                               ret = -EINVAL;
+                               goto out_unlock;
+                       }
+                       ret = qcow2_merge_backward_start(tgt, efd);
+               } else if (!strcmp(argv[1], "complete")) {
+                       if (argc != 2) {
+                               ret = -EINVAL;
+                               goto out_unlock;
+                       }
+                       ret = qcow2_merge_backward_complete(tgt);
+               } else {
+                       ret = -ENOTTY;
+               }
        } else {
                ret = -ENOTTY;
        }
 
+out_unlock:
        mutex_unlock(&tgt->ctl_mutex);
 out:
        return ret;
diff --git a/drivers/md/dm-qcow2-target.c b/drivers/md/dm-qcow2-target.c
index 540c03cb3c44f..6e2e583ba0b8b 100644
--- a/drivers/md/dm-qcow2-target.c
+++ b/drivers/md/dm-qcow2-target.c
@@ -25,6 +25,8 @@ static void qcow2_set_service_operations(struct dm_target 
*ti, bool allowed)
        mutex_lock(&tgt->ctl_mutex);
        tgt->service_operations_allowed = allowed;
        mutex_unlock(&tgt->ctl_mutex);
+       if (!allowed)
+               qcow2_merge_backward_cancel(tgt);
 }
 static void qcow2_set_wants_suspend(struct dm_target *ti, bool wants)
 {
@@ -251,6 +253,7 @@ static void qcow2_tgt_destroy(struct qcow2_target *tgt)
                /* Now kill the queue */
                destroy_workqueue(tgt->wq);
        }
+       qcow2_merge_backward_cancel(tgt);
 
        mempool_destroy(tgt->qio_pool);
        mempool_destroy(tgt->qrq_pool);
@@ -494,6 +497,9 @@ static struct qcow2_target *alloc_qcow2_target(struct 
dm_target *ti)
        timer_setup(&tgt->enospc_timer, qcow2_enospc_timer, 0);
        ti->private = tgt;
        tgt->ti = ti;
+
+       INIT_WORK(&tgt->backward_merge.work, qcow2_merge_backward_work);
+
        qcow2_set_service_operations(ti, false);
 
        return tgt;
diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h
index a89fe3db2196d..ca43e13d35c34 100644
--- a/drivers/md/dm-qcow2.h
+++ b/drivers/md/dm-qcow2.h
@@ -5,6 +5,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/device-mapper.h>
 #include <linux/fs.h>
+#include <linux/eventfd.h>
 #include "dm-core.h"
 
 #define DM_MSG_PREFIX "qcow2"
@@ -149,6 +150,21 @@ struct md_page {
        struct list_head wpc_readers_wait_list;
 };
 
+enum qcow2_backward_merge_state {
+       BACKWARD_MERGE_STOPPED,
+       BACKWARD_MERGE_START,
+       BACKWARD_MERGE_RUN,
+       BACKWARD_MERGE_WAIT_COMPLETION,
+       BACKWARD_MERGE_STOP,
+};
+
+struct qcow2_backward_merge {
+       struct work_struct work;
+       enum qcow2_backward_merge_state state;
+       int error;
+       struct eventfd_ctx *eventfd_ctx;
+};
+
 struct qcow2_target {
        struct dm_target *ti;
 #define QCOW2_QRQ_POOL_SIZE 512 /* Twice nr_requests from blk_mq_init_sched() 
*/
@@ -180,6 +196,8 @@ struct qcow2_target {
        struct work_struct event_work;
        spinlock_t event_lock;
        struct mutex ctl_mutex;
+
+       struct qcow2_backward_merge backward_merge;
 };
 
 enum {
@@ -375,6 +393,9 @@ int qcow2_inflight_ref_switch(struct qcow2_target *tgt);
 void qcow2_flush_deferred_activity(struct qcow2_target *tgt, struct qcow2 
*qcow2);
 int qcow2_truncate_safe(struct file *file, loff_t new_len);
 
+void qcow2_merge_backward_work(struct work_struct *work);
+void qcow2_merge_backward_cancel(struct qcow2_target *tgt);
+
 static inline struct qcow2_target *to_qcow2_target(struct dm_target *ti)
 {
        return ti->private;
-- 
2.48.1

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to