The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will 
appear at g...@bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.6
------>
commit 8493a7bb51af6c43ab9f4e93e1268ff47abf8b02
Author: Alexander Atanasov <alexander.atana...@virtuozzo.com>
Date:   Fri Jan 24 17:35:44 2025 +0200

    dm-ploop: move from wq to kthread
    
    Move to multithreaded model and remove work queue.
    
    https://virtuozzo.atlassian.net/browse/VSTOR-91821
    Signed-off-by: Alexander Atanasov <alexander.atana...@virtuozzo.com>
    
    ======
    Patchset description:
    ploop: optimistations and scalling
    
    Ploop processes requsts in a different threads in parallel
    where possible which results in significant improvement in
    performance and makes further optimistations possible.
    
    Known bugs:
      - delayed metadata writeback is not working and is missing error handling
         - patch to disable it until fixed
      - fast path is not working - causes rcu lockups - patch to disable it
    
    Further improvements:
      - optimize md pages lookups
    
    Alexander Atanasov (50):
      dm-ploop: md_pages map all pages at creation time
      dm-ploop: Use READ_ONCE/WRITE_ONCE to access md page data
      dm-ploop: fsync after all pios are sent
      dm-ploop: move md status to use proper bitops
      dm-ploop: convert wait_list and wb_batch_llist to use lockless lists
      dm-ploop: convert enospc handling to use lockless lists
      dm-ploop: convert suspended_pios list to use lockless list
      dm-ploop: convert the rest of the lists to use llist variant
      dm-ploop: combine processing of pios thru prepare list and remove
        fsync worker
      dm-ploop: move from wq to kthread
      dm-ploop: move preparations of pios into the caller from worker
      dm-ploop: fast path execution for reads
      dm-ploop: do not use a wrapper for set_bit to make a page writeback
      dm-ploop: BAT use only one list for writeback
      dm-ploop: make md writeback timeout to be per page
      dm-ploop: add interface to disable bat writeback delay
      dm-ploop: convert wb_batch_list to lockless variant
      dm-ploop: convert high_prio to status
      dm-ploop: split cow processing into two functions
      dm-ploop: convert md page rw lock to spin lock
      dm-ploop: convert bat_rwlock to bat_lock spinlock
      dm-ploop: prepare bat updates under bat_lock
      dm-ploop: make ploop_bat_write_complete ready for parallel pio
        completion
      dm-ploop: make ploop_submit_metadata_writeback return number of
        requests sent
      dm-ploop: introduce pio runner threads
      dm-ploop: add pio list ids to be used when passing pios to runners
      dm-ploop: process pios via runners
      dm-ploop: disable metadata writeback delay
      dm-ploop: disable fast path
      dm-ploop: use lockless lists for chained cow updates list
      dm-ploop: use lockless lists for data ready pios
      dm-ploop: give runner threads better name
      dm-ploop: resize operation - add holes bitmap locking
      dm-ploop: remove unnecessary operations
      dm-ploop: use filp per thread
      dm-ploop: catch if we try to advance pio past bio end
      dm-ploop: support REQ_FUA for data pios
      dm-ploop: proplerly access nr_bat_entries
      dm-ploop: fix locking and improve error handling when submitting pios
      dm-ploop: fix how ENOTBLK is handled
      dm-ploop: sync when suspended or stopping
      dm-ploop: rework bat completion logic
      dm-ploop: rework logic in pio processing
      dm-ploop: end fsync pios in parallel
      dm-ploop: make filespace preallocations async
      dm-ploop: resubmit enospc pios from dispatcher thread
      dm-ploop: dm-ploop: simplify discard completion
      dm-ploop: use GFP_ATOMIC instead of GFP_NOIO
      dm-ploop: fix locks used in mixed context
      dm-ploop: fix how current flags are managed inside threads
    
    Andrey Zhadchenko (13):
      dm-ploop: do not flush after metadata writes
      dm-ploop: set IOCB_DSYNC on all FUA requests
      dm-ploop: remove extra ploop_cluster_is_in_top_delta()
      dm-ploop: introduce per-md page locking
      dm-ploop: reduce BAT accesses on discard completion
      dm-ploop: simplify llseek
      dm-ploop: speed up ploop_prepare_bat_update()
      dm-ploop: make new allocations immediately visible in BAT
      dm-ploop: drop ploop_cluster_is_in_top_delta()
      dm-ploop: do not wait for BAT update for non-FUA requests
      dm-ploop: add delay for metadata writeback
      dm-ploop: submit all postponed metadata on REQ_OP_FLUSH
      dm-ploop: handle REQ_PREFLUSH
    
    Feature: dm-ploop: ploop target driver
---
 drivers/md/dm-ploop-map.c    | 54 +++++++++++++++++++++++++++++++++-----
 drivers/md/dm-ploop-target.c | 62 +++++++++++++++++++++++++++++++++++---------
 drivers/md/dm-ploop.h        | 11 +++++++-
 3 files changed, 107 insertions(+), 20 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 93def46f15b4..76ef88d563a3 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -340,6 +340,11 @@ static int ploop_split_pio_to_list(struct ploop *ploop, 
struct pio *pio,
 }
 ALLOW_ERROR_INJECTION(ploop_split_pio_to_list, ERRNO);
 
+static void ploop_schedule_work(struct ploop *ploop)
+{
+       wake_up_process(ploop->kt_worker->task);
+}
+
 static void ploop_dispatch_pio(struct ploop *ploop, struct pio *pio)
 {
        struct llist_head *list = (struct llist_head 
*)&ploop->pios[pio->queue_list_id];
@@ -362,7 +367,7 @@ void ploop_dispatch_pios(struct ploop *ploop, struct pio 
*pio,
                        ploop_dispatch_pio(ploop, pio);
        }
 
-       queue_work(ploop->wq, &ploop->worker);
+       ploop_schedule_work(ploop);
 }
 
 static bool ploop_delay_if_md_busy(struct ploop *ploop, struct md_page *md,
@@ -694,7 +699,7 @@ static void ploop_complete_cow(struct ploop_cow *cow, 
blk_status_t bi_status)
 
        ploop_queue_or_fail(ploop, blk_status_to_errno(bi_status), cow_pio);
 
-       queue_work(ploop->wq, &ploop->worker);
+       ploop_schedule_work(ploop);
        ploop_free_pio_with_pages(ploop, cow->aux_pio);
        kmem_cache_free(cow_cache, cow);
 }
@@ -1147,7 +1152,7 @@ static void ploop_queue_resubmit(struct pio *pio)
 
        llist_add((struct llist_node *)(&pio->list), &ploop->llresubmit_pios);
 
-       queue_work(ploop->wq, &ploop->worker);
+       ploop_schedule_work(ploop);
 }
 
 static void ploop_check_standby_mode(struct ploop *ploop, long res)
@@ -1822,9 +1827,8 @@ static void process_ploop_fsync_work(struct ploop *ploop)
        }
 }
 
-void do_ploop_work(struct work_struct *ws)
+void do_ploop_run_work(struct ploop *ploop)
 {
-       struct ploop *ploop = container_of(ws, struct ploop, worker);
        LIST_HEAD(deferred_pios);
        struct llist_node *llembedded_pios;
        struct llist_node *lldeferred_pios;
@@ -1836,12 +1840,13 @@ void do_ploop_work(struct work_struct *ws)
        current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
 
        llembedded_pios = llist_del_all(&ploop->pios[PLOOP_LIST_PREPARE]);
+
        lldeferred_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DEFERRED]);
        lldiscard_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DISCARD]);
        llcow_pios = llist_del_all(&ploop->pios[PLOOP_LIST_COW]);
        llresubmit = llist_del_all(&ploop->llresubmit_pios);
 
-       /* add old deferred to the list */
+       /* add old deferred back to the list */
        if (lldeferred_pios) {
                struct llist_node *pos, *t;
                struct pio *pio;
@@ -1873,6 +1878,41 @@ void do_ploop_work(struct work_struct *ws)
        current->flags = old_flags;
 }
 
+void do_ploop_work(struct work_struct *ws)
+{
+       struct ploop *ploop = container_of(ws, struct ploop, worker);
+
+       do_ploop_run_work(ploop);
+}
+
+int ploop_worker(void *data)
+{
+       struct ploop_worker *worker = data;
+       struct ploop *ploop = worker->ploop;
+
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if (kthread_should_stop()) {
+                       __set_current_state(TASK_RUNNING);
+                       break;
+               }
+               if (llist_empty(&ploop->pios[PLOOP_LIST_FLUSH]) &&
+                       llist_empty(&ploop->pios[PLOOP_LIST_PREPARE]) &&
+                       llist_empty(&ploop->pios[PLOOP_LIST_DEFERRED]) &&
+                       llist_empty(&ploop->pios[PLOOP_LIST_DISCARD]) &&
+                       llist_empty(&ploop->pios[PLOOP_LIST_COW]) &&
+                       llist_empty(&ploop->llresubmit_pios)
+                       )
+                       schedule();
+
+               __set_current_state(TASK_RUNNING);
+               do_ploop_run_work(ploop);
+               cond_resched();
+       }
+       return 0;
+}
+
 static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
 {
        struct ploop_rq *prq = pio->endio_cb_data;
@@ -1893,7 +1933,7 @@ static void ploop_submit_embedded_pio(struct ploop 
*ploop, struct pio *pio)
        ploop_inc_nr_inflight(ploop, pio);
        llist_add((struct llist_node *)(&pio->list), 
&ploop->pios[PLOOP_LIST_PREPARE]);
 
-       queue_work(ploop->wq, &ploop->worker);
+       ploop_schedule_work(ploop);
 }
 
 void ploop_submit_embedded_pios(struct ploop *ploop, struct list_head *list)
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index ea9af6b6abe9..589082c8e110 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -161,11 +161,24 @@ static void ploop_destroy(struct ploop *ploop)
 {
        int i;
 
-       if (ploop->wq) {
-               flush_workqueue(ploop->wq);
-               destroy_workqueue(ploop->wq);
-               WARN_ON_ONCE(ploop_has_pending_activity(ploop));
+       if (ploop->kt_worker) {
+               wake_up_process(ploop->kt_worker->task);
+               /* try to send all pending - if we have partial io and enospc 
end bellow */
+               while (!llist_empty(&ploop->pios[PLOOP_LIST_FLUSH]) ||
+                       !llist_empty(&ploop->pios[PLOOP_LIST_PREPARE]) ||
+                       !llist_empty(&ploop->pios[PLOOP_LIST_DEFERRED]) ||
+                       !llist_empty(&ploop->pios[PLOOP_LIST_DISCARD]) ||
+                       !llist_empty(&ploop->pios[PLOOP_LIST_COW])
+                       ) {
+                       schedule();
+               }
+
+               kthread_stop(ploop->kt_worker->task);   /* waits for the thread 
to stop */
+               WARN_ON(!llist_empty(&ploop->pios[PLOOP_LIST_PREPARE]));
+               WARN_ON(!llist_empty(&ploop->llresubmit_pios));
+               kfree(ploop->kt_worker);
        }
+
        for (i = 0; i < 2; i++)
                percpu_ref_exit(&ploop->inflight_bios_ref[i]);
        /* Nobody uses it after destroy_workqueue() */
@@ -173,6 +186,7 @@ static void ploop_destroy(struct ploop *ploop)
                if (ploop->deltas[ploop->nr_deltas].file)
                        fput(ploop->deltas[ploop->nr_deltas].file);
        }
+       WARN_ON(ploop_has_pending_activity(ploop));
        WARN_ON(!ploop_empty_htable(ploop->exclusive_pios));
        WARN_ON(!ploop_empty_htable(ploop->inflight_pios));
        kfree(ploop->inflight_pios);
@@ -330,6 +344,33 @@ ALLOW_ERROR_INJECTION(ploop_add_deltas_stack, ERRNO);
                argc--;                                         \
                argv++;                                         \
        } while (0);
+
+static struct ploop_worker *ploop_worker_create(struct ploop *ploop)
+{
+       struct ploop_worker *worker;
+       struct task_struct *task;
+
+       worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
+       if (!worker)
+               return NULL;
+
+       worker->ploop = ploop;
+       task = kthread_create(ploop_worker, worker, "ploop-%d-0",
+                               current->pid);
+
+       if (IS_ERR(task))
+               goto out_err;
+       worker->task = task;
+
+       wake_up_process(task);
+
+       return worker;
+
+out_err:
+       kfree(worker);
+       return NULL;
+}
+
 /*
  * <data dev>
  */
@@ -337,7 +378,6 @@ static int ploop_ctr(struct dm_target *ti, unsigned int 
argc, char **argv)
 {
        percpu_ref_func_t *release;
        struct ploop *ploop;
-       unsigned int flags;
        int i, ret;
 
        if (argc < 2)
@@ -397,13 +437,6 @@ static int ploop_ctr(struct dm_target *ti, unsigned int 
argc, char **argv)
                }
        }
 
-       flags = WQ_MEM_RECLAIM|WQ_HIGHPRI|WQ_UNBOUND;
-       ploop->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, flags, 0);
-       if (!ploop->wq) {
-               ret = -ENOMEM;
-               goto err;
-       }
-
        ti->private = ploop;
        ploop->ti = ti;
 
@@ -474,6 +507,11 @@ static int ploop_ctr(struct dm_target *ti, unsigned int 
argc, char **argv)
        if (argc <= 0)
                goto err;
 
+
+       ploop->kt_worker = ploop_worker_create(ploop);
+       if (!ploop->kt_worker)
+               goto err;
+
        ret = ploop_add_deltas_stack(ploop, &argv[0], argc);
        if (ret)
                goto err;
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 1ba91cbc4f04..64d8eac4ef84 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -138,6 +138,12 @@ enum {
        PLOOP_LIST_INVALID = PLOOP_LIST_COUNT,
 };
 
+struct ploop_worker {
+       struct ploop            *ploop;
+       struct task_struct      *task;
+       u64                     kcov_handle;
+};
+
 struct ploop {
        struct dm_target *ti;
 #define PLOOP_PRQ_POOL_SIZE 512 /* Twice nr_requests from blk_mq_init_sched() 
*/
@@ -179,10 +185,10 @@ struct ploop {
         */
        struct hlist_head *exclusive_pios;
 
-       struct workqueue_struct *wq;
        struct work_struct worker;
        struct work_struct event_work;
 
+       struct ploop_worker *kt_worker;
        struct completion inflight_bios_ref_comp;
        struct percpu_ref inflight_bios_ref[2];
        bool inflight_ref_comp_pending;
@@ -598,4 +604,7 @@ extern void ploop_call_rw_iter(struct file *file, loff_t 
pos, unsigned rw,
                               struct iov_iter *iter, struct pio *pio);
 extern void ploop_enospc_timer(struct timer_list *timer);
 extern loff_t ploop_llseek_hole(struct dm_target *ti, loff_t offset, int 
whence);
+
+int ploop_worker(void *data);
+
 #endif /* __DM_PLOOP_H */
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to