Process pios in runner threads while preserving order.

Metadata writeback requries all prios pios to be processed,
since they can generate updates, so we have to wait before
processing writeback. Fsync is yet sequential too.
Both can be improved in a next iterration.

https://virtuozzo.atlassian.net/browse/VSTOR-91821
Signed-off-by: Alexander Atanasov <alexander.atana...@virtuozzo.com>
---
 drivers/md/dm-ploop-map.c    | 93 +++++++++++++++++++++++-------------
 drivers/md/dm-ploop-target.c |  2 +-
 2 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 248dccd342c0..c88359677ea4 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -1694,7 +1694,8 @@ void ploop_index_wb_submit(struct ploop *ploop, struct 
ploop_index_wb *piwb)
        pio->endio_cb = ploop_bat_write_complete;
        pio->endio_cb_data = piwb;
 
-       ploop_submit_rw_mapped(ploop, pio);
+       pio->queue_list_id = PLOOP_LIST_WRITEBACK;
+       ploop_runners_add_work(ploop, pio);
 }
 
 static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq)
@@ -1891,7 +1892,8 @@ static inline int ploop_runners_have_pending(struct ploop 
*ploop)
 }
 #endif
 
-static int ploop_submit_metadata_writeback(struct ploop *ploop, int force)
+static inline int ploop_submit_metadata_writeback(struct ploop *ploop, int 
force,
+                                                 unsigned long old_flags)
 {
        unsigned long flags;
        LIST_HEAD(ll_skipped);
@@ -1934,11 +1936,11 @@ static int ploop_submit_metadata_writeback(struct ploop 
*ploop, int force)
        return ret;
 }
 
-static void process_ploop_fsync_work(struct ploop *ploop, struct llist_node 
*llflush_pios)
+static int process_ploop_fsync_work(struct ploop *ploop, struct llist_node 
*llflush_pios)
 {
        struct file *file;
        struct pio *pio;
-       int ret;
+       int ret, npios = 0;
        struct llist_node *pos, *t;
 
        file = ploop_top_delta(ploop)->file;
@@ -1955,6 +1957,7 @@ static void process_ploop_fsync_work(struct ploop *ploop, 
struct llist_node *llf
                }
                ploop_pio_endio(pio);
        }
+       return npios;
 }
 
 static inline int ploop_runners_add_work(struct ploop *ploop, struct pio *pio)
@@ -1986,8 +1989,11 @@ static inline int ploop_runners_add_work_list(struct 
ploop *ploop, struct llist_
        return 0;
 }
 
+#endif
+
 void do_ploop_run_work(struct ploop *ploop)
 {
+       struct ploop_worker *wrkr = ploop->kt_worker;
        LLIST_HEAD(deferred_pios);
        struct llist_node *llembedded_pios;
        struct llist_node *lldeferred_pios;
@@ -1996,6 +2002,8 @@ void do_ploop_run_work(struct ploop *ploop)
        struct llist_node *llresubmit;
        struct llist_node *llflush_pios;
        unsigned int old_flags = current->flags;
+       struct llist_node *pos, *t;
+       int npios;
 
        current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
 
@@ -2007,35 +2015,56 @@ void do_ploop_run_work(struct ploop *ploop)
        llcow_pios = llist_del_all(&ploop->pios[PLOOP_LIST_COW]);
 
        /* add old deferred back to the list */
-       if (lldeferred_pios) {
-               struct llist_node *pos, *t;
-               struct pio *pio;
-               /* Add one by one we need last for batch add */
-               llist_for_each_safe(pos, t, lldeferred_pios) {
-                       llist_add(pos, &deferred_pios);
-               }
-       }
+       if (lldeferred_pios)
+               ploop_runners_add_work_list(ploop, lldeferred_pios);
 
-       ploop_prepare_embedded_pios(ploop, llembedded_pios, &deferred_pios);
+       if (llembedded_pios)
+               ploop_prepare_embedded_pios(ploop, 
llist_reverse_order(llembedded_pios),
+                                           &deferred_pios);
 
-       llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
        smp_wmb(); /* */
+       llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
 
        if (llresubmit)
-               ploop_process_resubmit_pios(ploop, 
llist_reverse_order(llresubmit));
+               ploop_runners_add_work_list(ploop, llresubmit);
 
-       ploop_process_deferred_pios(ploop, &deferred_pios);
+       ploop_runners_add_work_list(ploop, deferred_pios.first);
 
        if (lldiscard_pios)
-               ploop_process_discard_pios(ploop, 
llist_reverse_order(lldiscard_pios));
+               ploop_runners_add_work_list(ploop, lldiscard_pios);
 
        if (llcow_pios)
-               ploop_process_delta_cow(ploop, llist_reverse_order(llcow_pios));
+               ploop_runners_add_work_list(ploop, llcow_pios);
 
-       ploop_submit_metadata_writeback(ploop);
+       /* wait for all pios to be executed before metadata updates */
+       current->flags = old_flags;
+       wait_event_interruptible(ploop->dispatcher_wq_data, 
(!ploop_runners_have_pending(ploop)));
+       current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
 
-       if (llflush_pios)
-               process_ploop_fsync_work(ploop, 
llist_reverse_order(llflush_pios));
+       /* if we have a flush we must sync md data too */
+       npios = ploop_submit_metadata_writeback(ploop, !!llflush_pios | 
ploop->force_md_writeback, old_flags);
+
+       if (llflush_pios) {
+               if (npios) {
+                       /* wait for metadata writeback to complete */
+                       current->flags = old_flags;
+                       /* First wait all pios to be processed */
+                       wait_event_interruptible(ploop->dispatcher_wq_data,
+                                                
(!ploop_runners_have_pending(ploop)));
+                       current->flags |= 
PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
+               }
+               /* Now process fsync pios after we have done all other */
+               npios = process_ploop_fsync_work(ploop, llflush_pios);
+               /* Since dispatcher is single thread no other work can be 
queued */
+#ifdef USE_RUNNERS__NOT_READY
+               if (npios) {
+                       current->flags = old_flags;
+                       wait_event_interruptible(ploop->dispatcher_wq_fsync,
+                                                atomic_read(&wrkr->fsync_pios) 
!= 0);
+                       current->flags |= 
PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
+               }
+#endif
+       }
 
        current->flags = old_flags;
 }
@@ -2143,8 +2172,9 @@ int ploop_worker(void *data)
                        ) {
 
                                if (kthread_should_stop()) {
-                                       
wait_event_interruptible(ploop->dispatcher_wq_data,
-                                                       
(!ploop_runners_have_pending(ploop)));
+                                       wait_event_interruptible(
+                                               ploop->dispatcher_wq_data,
+                                               
(!ploop_runners_have_pending(ploop)));
                                        __set_current_state(TASK_RUNNING);
                                        break;
                                }
@@ -2160,7 +2190,7 @@ int ploop_worker(void *data)
 #endif
                if (kthread_should_stop()) {
                        wait_event_interruptible(ploop->dispatcher_wq_data,
-                                               
(!ploop_runners_have_pending(ploop)));
+                                       (!ploop_runners_have_pending(ploop)));
                        __set_current_state(TASK_RUNNING);
                        break;
                }
@@ -2184,7 +2214,7 @@ static void ploop_preflush_endio(struct pio *pio, void 
*orig_pio_ptr,
 static int ploop_prepare_flush(struct ploop *ploop, struct pio *pio)
 {
        struct pio *flush_pio = pio;
-       struct md_page *md, *n;
+       struct md_page *md;
        int md_inflight = 0;
        struct llist_node *pos, *t;
        unsigned long flags;
@@ -2207,6 +2237,7 @@ static int ploop_prepare_flush(struct ploop *ploop, 
struct pio *pio)
        spin_lock_irqsave(&ploop->bat_lock, flags);
        llist_for_each_safe(pos, t, ploop->wb_batch_llist.first) {
                md = list_entry((struct list_head *)pos, typeof(*md), wb_link);
+               WARN_ON(md->piwb->flush_pio);
                md_inflight++;
                md->piwb->flush_pio = flush_pio;
                set_bit(MD_HIGHPRIO, &md->status);
@@ -2225,8 +2256,7 @@ static void ploop_submit_embedded_pio(struct ploop 
*ploop, struct pio *pio)
        bool queue = true;
        LLIST_HEAD(deferred_pios);
        int ret = 0;
-       struct pio *spio, *stmp;
-       struct llist_node *pos, *t;
+       struct pio *spio;
 
        if (blk_rq_bytes(rq)) {
                pio->queue_list_id = PLOOP_LIST_PREPARE;
@@ -2249,12 +2279,12 @@ static void ploop_submit_embedded_pio(struct ploop 
*ploop, struct pio *pio)
                        return;
                if (ret > 0)
                        goto out;
-               /* Will add to prepare list and schedule work */
+               pio->queue_list_id = PLOOP_LIST_FLUSH;
        }
 
        if (pio->queue_list_id == PLOOP_LIST_FLUSH) {
                /* Let the FLUSH go last from the queue , do not run here to 
preserve ordering */
-               llist_add((struct llist_node *)(&pio->list), 
&ploop->pios[PLOOP_LIST_PREPARE]);
+               llist_add((struct llist_node *)(&pio->list), 
&ploop->pios[PLOOP_LIST_FLUSH]);
                goto out;
        }
 
@@ -2270,9 +2300,8 @@ static void ploop_submit_embedded_pio(struct ploop 
*ploop, struct pio *pio)
                        ret = ploop_process_one_deferred_bio(ploop, spio);
                        current->flags = old_flags;
                        if (ret == 1) {
-                               /* not queued add back to deferreed*/
-                               llist_add((struct llist_node *)(&spio->list),
-                                         &ploop->pios[PLOOP_LIST_DEFERRED]);
+                               /* not queued add back to deferred */
+                               llist_add((struct llist_node *)(&spio->list), 
&ploop->pios[PLOOP_LIST_DEFERRED]);
                        } else {
                                queue = false;
                        }
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index 95ed3e26f2b8..d9f79da62315 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -571,7 +571,7 @@ static int ploop_ctr(struct dm_target *ti, unsigned int 
argc, char **argv)
                goto err;
 
        ploop->nkt_runners = PLOOP_PIO_RUNNERS;
-       for (i=0; i < ploop->nkt_runners; i++) {
+       for (i = 0; i < ploop->nkt_runners; i++) {
                ploop->kt_runners[i] = ploop_worker_create(ploop, 
ploop_pio_runner, "r", i+1);
                if (!ploop->kt_runners[i])
                        goto err;
-- 
2.43.0

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to