The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will 
appear at g...@bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.24
------>
commit 1a12832fd8641bf9e34ac98f3602c1b6f1a312d0
Author: Alexander Atanasov <alexander.atana...@virtuozzo.com>
Date:   Wed Apr 2 14:42:07 2025 +0300

    dm-ploop: fix resize and grow to use the new way of updating md pages
    
    We missed the fact that resize/grow touch md0 page to update ploop
    parameters, other pages are not linked so there is no issue.
    
    But in case there is a concurrent update to md0 piwb is not handled
    correctly, also using sync updates in parallel is not okay.
    
    To fix this update code to use the new mechanism with MD_UPDATING
    flag and instead of using sync operations pass the updates to
    runner threads.
    
    https://virtuozzo.atlassian.net/browse/VSTOR-101871
    Signed-off-by: Alexander Atanasov <alexander.atana...@virtuozzo.com>
    Reviewed-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
    
    Feature: dm-ploop: ploop target driver
---
 drivers/md/dm-ploop-cmd.c | 39 ++++++++++++++++++++++++++++++++-------
 drivers/md/dm-ploop-map.c | 36 +++++++++++++++++++++++++++---------
 drivers/md/dm-ploop.h     |  7 ++++++-
 3 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index d2eb4797ab6e..7d79b900eb1a 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -286,6 +286,8 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
        struct md_page *md;
        bool is_locked;
        int ret = 0;
+       int tries = 5;
+       int add_for_wb = 0;
 
        dst_clu = cmd->resize.dst_clu;
 
@@ -308,6 +310,7 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
        }
        spin_unlock_irq(&ploop->bat_lock);
 
+reread:
        /* Read full clu sync */
        ret = ploop_read_cluster_sync(ploop, pio, dst_clu);
        if (ret < 0) {
@@ -316,8 +319,13 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
        }
 
        ret = ploop_prepare_reloc_index_wb(ploop, &md, clu, &new_dst,
-                                          ploop_top_delta(ploop)->file);
+                                          ploop_top_delta(ploop)->file,
+                                          &add_for_wb);
        if (ret < 0) {
+               if (ret == -EBUSY && tries-- > 0) {
+                       PL_ERR("md0 busy, retry:%d\n", tries);
+                       goto reread;
+               }
                PL_ERR("reloc: can't prepare it: %d", ret);
                goto out;
        }
@@ -332,13 +340,16 @@ static int ploop_grow_relocate_cluster(struct ploop 
*ploop,
                goto out;
        }
 
-       set_bit(MD_WRITEBACK, &md->status);
        init_completion(&comp);
        piwb->comp = &comp;
        piwb->comp_bi_status = &bi_status;
        /* Write new index on disk */
-       ploop_index_wb_submit(ploop, piwb);
+       ploop_disable_writeback_delay(ploop);
+       if (add_for_wb)
+               ploop_add_dirty_for_wb(ploop, md);
+       clear_bit(MD_UPDATING, &md->status);
        wait_for_completion(&comp);
+       ploop_enable_writeback_delay(ploop);
 
        ret = blk_status_to_errno(bi_status);
        if (ret) {
@@ -378,12 +389,22 @@ static int ploop_grow_update_header(struct ploop *ploop,
        struct md_page *md;
        u64 sectors;
        int ret;
+       int tries = 5;
+       int add_for_wb = false;
 
+retry:
        /* hdr is in the same page as bat_entries[0] index */
        ret = ploop_prepare_reloc_index_wb(ploop, &md, 0, NULL,
-                                          ploop_top_delta(ploop)->file);
-       if (ret)
+                                          ploop_top_delta(ploop)->file,
+                                          &add_for_wb);
+       if (ret) {
+               if (ret == -EBUSY && tries-- > 0) {
+                       PL_ERR("md0 busy, retry:%d\n", tries);
+                       schedule();
+                       goto retry;
+               }
                return ret;
+       }
        piwb = md->piwb;
 
        size = (PLOOP_MAP_OFFSET + cmd->resize.nr_bat_entries);
@@ -398,12 +419,16 @@ static int ploop_grow_update_header(struct ploop *ploop,
        offset = hdr->m_FirstBlockOffset = cpu_to_le32(first_block_off);
        kunmap_local(hdr);
 
-       set_bit(MD_WRITEBACK, &md->status);
        init_completion(&comp);
        piwb->comp = &comp;
        piwb->comp_bi_status = &bi_status;
-       ploop_index_wb_submit(ploop, piwb);
+
+       ploop_disable_writeback_delay(ploop);
+       if (add_for_wb)
+               ploop_add_dirty_for_wb(ploop, md);
+       clear_bit(MD_UPDATING, &md->status);
        wait_for_completion(&comp);
+       ploop_enable_writeback_delay(ploop);
 
        ret = blk_status_to_errno(bi_status);
        if (!ret) {
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index f0706a03ce61..6dd94af5fd94 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -613,11 +613,6 @@ static void ploop_unlink_completed_pio(struct ploop 
*ploop, struct pio *pio)
                ploop_dispatch_pios(ploop, NULL, &pio_list);
 }
 
-static void ploop_add_dirty_for_wb(struct ploop *ploop, struct md_page *md)
-{
-       llist_add((struct llist_node *)&md->wb_link, &ploop->wb_batch_llist);
-}
-
 static bool ploop_md_make_dirty(struct ploop *ploop, struct md_page *md)
 {
        bool new = false;
@@ -1031,6 +1026,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, 
struct md_page *md,
        struct pio *pio;
        map_index_t *to;
        u8 level;
+       int ret = -ENOMEM;
 
        lockdep_assert_held(&ploop->bat_lock);
 
@@ -1050,6 +1046,9 @@ static int ploop_prepare_bat_update(struct ploop *ploop, 
struct md_page *md,
        if (WARN_ON(md->piwb)) {
                PL_ERR("md %p has piwb: %p type:%d ourtype:%d\n",
                        md, md->piwb, md->piwb->type, type);
+               spin_unlock(&md->md_lock);
+               ret = -EBUSY;
+               goto err;
        }
        md->piwb = piwb;
        piwb->md = md;
@@ -1094,7 +1093,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, 
struct md_page *md,
        return 0;
 err:
        ploop_free_piwb(piwb);
-       return -ENOMEM;
+       return ret;
 }
 
 void ploop_break_bat_update(struct ploop *ploop, struct md_page *md,
@@ -2708,31 +2707,44 @@ static void ploop_handle_cleanup(struct ploop *ploop, 
struct pio *pio)
  * another index instead of existing. If so, management of
  * old bat_entries[@clu] and of related holes_bitmap bit
  * is caller duty.
+ * Caller must clear MD_UPDATING and comply to add_for_wb
  */
 int ploop_prepare_reloc_index_wb(struct ploop *ploop,
                                 struct md_page **ret_md,
                                 u32 clu, u32 *dst_clu,
-                                struct file *file)
+                                struct file *file,
+                                int *add_for_wb)
 {
        enum piwb_type type = PIWB_TYPE_ALLOC;
        u32 page_id = ploop_bat_clu_to_page_nr(clu);
        struct md_page *md = ploop_md_page_find(ploop, page_id);
        struct ploop_index_wb *piwb;
        int err;
+       bool add_to_wblist;
 
        if (dst_clu)
                type = PIWB_TYPE_RELOC;
 
        err = -EIO;
+
        spin_lock_irq(&ploop->bat_lock);
-       if (test_bit(MD_DIRTY, &md->status) || test_bit(MD_WRITEBACK, 
&md->status)) {
-               PL_ERR("Unexpected md status: %lx", md->status);
+       spin_lock(&md->md_lock);
+       /* Keep this test to be the same as in delay_if_md_busy */
+       if (test_bit(MD_WRITEBACK, &md->status) || test_bit(MD_UPDATING, 
&md->status)) {
+               err = -EBUSY;
+               spin_unlock(&md->md_lock);
                goto out_error;
+       } else {
+               set_bit(MD_UPDATING, &md->status);
        }
+       spin_unlock(&md->md_lock);
+
        err = ploop_prepare_bat_update(ploop, md, type);
        if (err)
                goto out_error;
 
+       add_to_wblist = ploop_md_make_dirty(ploop, md);
+
        piwb = md->piwb;
 
        if (dst_clu) {
@@ -2750,12 +2762,18 @@ int ploop_prepare_reloc_index_wb(struct ploop *ploop,
        spin_unlock_irq(&ploop->bat_lock);
 
        *ret_md = md;
+       *add_for_wb = add_to_wblist ? 1 : 0;
+
        return 0;
 
 out_reset:
        ploop_break_bat_update(ploop, md, piwb);
 out_error:
+       if (add_to_wblist)
+               clear_bit(MD_DIRTY, &md->status);
+       clear_bit(MD_UPDATING, &md->status);
        spin_unlock_irq(&ploop->bat_lock);
+
        return err;
 }
 ALLOW_ERROR_INJECTION(ploop_prepare_reloc_index_wb, ERRNO);
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 46450cac8c7d..db4d92c9679a 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -578,6 +578,11 @@ static inline const char *ploop_device_name(struct ploop 
*ploop)
        return ploop->ti->table->md->disk->disk_name;
 }
 
+static inline void ploop_add_dirty_for_wb(struct ploop *ploop, struct md_page 
*md)
+{
+       llist_add((struct llist_node *)&md->wb_link, &ploop->wb_batch_llist);
+}
+
 #define PL_FMT(fmt) "ploop: %s: " fmt "\n"
 #define PL_ERR(fmt, ...) pr_err(PL_FMT(fmt), ploop_device_name(ploop), 
##__VA_ARGS__)
 #define PL_ERR_ONCE(fmt, ...) pr_err_once(PL_FMT(fmt), 
ploop_device_name(ploop), ##__VA_ARGS__)
@@ -612,7 +617,7 @@ extern void ploop_map_and_submit_rw(struct ploop *ploop, 
u32 dst_clu,
                                    struct pio *pio, u8 level);
 extern int ploop_prepare_reloc_index_wb(struct ploop *ploop,
                                        struct md_page **ret_md, u32 clu, u32 
*dst_clu,
-                                       struct file *file);
+                                       struct file *file, int *add_for_wb);
 extern void ploop_break_bat_update(struct ploop *ploop, struct md_page *md,
                                   struct ploop_index_wb *piwb);
 extern void ploop_index_wb_submit(struct ploop *, struct ploop_index_wb *);
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to