Reviewed-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>

On 4/2/25 13:42, Alexander Atanasov wrote:
We missed the fact that resize/grow touch md0 page to update ploop
parameters, other pages are not linked so there is no issue.
But in case there is a concurrent update to md0 piwb is not handled
correctly, also using sync updates in parallel is not okay.
To fix this update code to use the new mechanism with MD_UPDATING
flag and instead of using sync operations pass the updates to
runner threads.

https://virtuozzo.atlassian.net/browse/VSTOR-101871
Signed-off-by: Alexander Atanasov <alexander.atana...@virtuozzo.com>
---
  drivers/md/dm-ploop-cmd.c | 39 ++++++++++++++++++++++++++++++++-------
  drivers/md/dm-ploop-map.c | 36 +++++++++++++++++++++++++++---------
  drivers/md/dm-ploop.h     |  7 ++++++-
  3 files changed, 65 insertions(+), 17 deletions(-)

v1->v2: added missed hunk
v2->v3: remove the check for dirty bit, we want to allow updates
of dirty pages but only when writeback is not started

diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index d2eb4797ab6e..7d79b900eb1a 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -286,6 +286,8 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
        struct md_page *md;
        bool is_locked;
        int ret = 0;
+       int tries = 5;
+       int add_for_wb = 0;
dst_clu = cmd->resize.dst_clu; @@ -308,6 +310,7 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
        }
        spin_unlock_irq(&ploop->bat_lock);
+reread:
        /* Read full clu sync */
        ret = ploop_read_cluster_sync(ploop, pio, dst_clu);
        if (ret < 0) {
@@ -316,8 +319,13 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
        }
ret = ploop_prepare_reloc_index_wb(ploop, &md, clu, &new_dst,
-                                          ploop_top_delta(ploop)->file);
+                                          ploop_top_delta(ploop)->file,
+                                          &add_for_wb);
        if (ret < 0) {
+               if (ret == -EBUSY && tries-- > 0) {
+                       PL_ERR("md0 busy, retry:%d\n", tries);
+                       goto reread;
+               }
                PL_ERR("reloc: can't prepare it: %d", ret);
                goto out;
        }
@@ -332,13 +340,16 @@ static int ploop_grow_relocate_cluster(struct ploop 
*ploop,
                goto out;
        }
- set_bit(MD_WRITEBACK, &md->status);
        init_completion(&comp);
        piwb->comp = &comp;
        piwb->comp_bi_status = &bi_status;
        /* Write new index on disk */
-       ploop_index_wb_submit(ploop, piwb);
+       ploop_disable_writeback_delay(ploop);
+       if (add_for_wb)
+               ploop_add_dirty_for_wb(ploop, md);
+       clear_bit(MD_UPDATING, &md->status);
        wait_for_completion(&comp);
+       ploop_enable_writeback_delay(ploop);
ret = blk_status_to_errno(bi_status);
        if (ret) {
@@ -378,12 +389,22 @@ static int ploop_grow_update_header(struct ploop *ploop,
        struct md_page *md;
        u64 sectors;
        int ret;
+       int tries = 5;
+       int add_for_wb = false;
+retry:
        /* hdr is in the same page as bat_entries[0] index */
        ret = ploop_prepare_reloc_index_wb(ploop, &md, 0, NULL,
-                                          ploop_top_delta(ploop)->file);
-       if (ret)
+                                          ploop_top_delta(ploop)->file,
+                                          &add_for_wb);
+       if (ret) {
+               if (ret == -EBUSY && tries-- > 0) {
+                       PL_ERR("md0 busy, retry:%d\n", tries);
+                       schedule();
+                       goto retry;
+               }
                return ret;
+       }
        piwb = md->piwb;
size = (PLOOP_MAP_OFFSET + cmd->resize.nr_bat_entries);
@@ -398,12 +419,16 @@ static int ploop_grow_update_header(struct ploop *ploop,
        offset = hdr->m_FirstBlockOffset = cpu_to_le32(first_block_off);
        kunmap_local(hdr);
- set_bit(MD_WRITEBACK, &md->status);
        init_completion(&comp);
        piwb->comp = &comp;
        piwb->comp_bi_status = &bi_status;
-       ploop_index_wb_submit(ploop, piwb);
+
+       ploop_disable_writeback_delay(ploop);
+       if (add_for_wb)
+               ploop_add_dirty_for_wb(ploop, md);
+       clear_bit(MD_UPDATING, &md->status);
        wait_for_completion(&comp);
+       ploop_enable_writeback_delay(ploop);
ret = blk_status_to_errno(bi_status);
        if (!ret) {
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index ef280a8b0f90..76544cdb476e 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -613,11 +613,6 @@ static void ploop_unlink_completed_pio(struct ploop 
*ploop, struct pio *pio)
                ploop_dispatch_pios(ploop, NULL, &pio_list);
  }
-static void ploop_add_dirty_for_wb(struct ploop *ploop, struct md_page *md)
-{
-       llist_add((struct llist_node *)&md->wb_link, &ploop->wb_batch_llist);
-}
-
  static bool ploop_md_make_dirty(struct ploop *ploop, struct md_page *md)
  {
        bool new = false;
@@ -1031,6 +1026,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, 
struct md_page *md,
        struct pio *pio;
        map_index_t *to;
        u8 level;
+       int ret = -ENOMEM;
lockdep_assert_held(&ploop->bat_lock); @@ -1050,6 +1046,9 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
        if (WARN_ON(md->piwb)) {
                PL_ERR("md %p has piwb: %p type:%d ourtype:%d\n",
                        md, md->piwb, md->piwb->type, type);
+               spin_unlock(&md->md_lock);
+               ret = -EBUSY;
+               goto err;
        }
        md->piwb = piwb;
        piwb->md = md;
@@ -1094,7 +1093,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, 
struct md_page *md,
        return 0;
  err:
        ploop_free_piwb(piwb);
-       return -ENOMEM;
+       return ret;
  }
void ploop_break_bat_update(struct ploop *ploop, struct md_page *md,
@@ -2692,31 +2691,44 @@ static void ploop_handle_cleanup(struct ploop *ploop, 
struct pio *pio)
   * another index instead of existing. If so, management of
   * old bat_entries[@clu] and of related holes_bitmap bit
   * is caller duty.
+ * Caller must clear MD_UPDATING and comply to add_for_wb
   */
  int ploop_prepare_reloc_index_wb(struct ploop *ploop,
                                 struct md_page **ret_md,
                                 u32 clu, u32 *dst_clu,
-                                struct file *file)
+                                struct file *file,
+                                int *add_for_wb)
  {
        enum piwb_type type = PIWB_TYPE_ALLOC;
        u32 page_id = ploop_bat_clu_to_page_nr(clu);
        struct md_page *md = ploop_md_page_find(ploop, page_id);
        struct ploop_index_wb *piwb;
        int err;
+       bool add_to_wblist;
if (dst_clu)
                type = PIWB_TYPE_RELOC;
err = -EIO;
+
        spin_lock_irq(&ploop->bat_lock);
-       if (test_bit(MD_DIRTY, &md->status) || test_bit(MD_WRITEBACK, 
&md->status)) {
-               PL_ERR("Unexpected md status: %lx", md->status);
+       spin_lock(&md->md_lock);
+       /* Keep this test to be the same as in delay_if_md_busy */
+       if (test_bit(MD_WRITEBACK, &md->status) || test_bit(MD_UPDATING, 
&md->status)) {
+               err = -EBUSY;
+               spin_unlock(&md->md_lock);
                goto out_error;
+       } else {
+               set_bit(MD_UPDATING, &md->status);
        }
+       spin_unlock(&md->md_lock);
+
        err = ploop_prepare_bat_update(ploop, md, type);
        if (err)
                goto out_error;
+ add_to_wblist = ploop_md_make_dirty(ploop, md);
+
        piwb = md->piwb;
if (dst_clu) {
@@ -2734,12 +2746,18 @@ int ploop_prepare_reloc_index_wb(struct ploop *ploop,
        spin_unlock_irq(&ploop->bat_lock);
*ret_md = md;
+       *add_for_wb = add_to_wblist ? 1 : 0;
+
        return 0;
out_reset:
        ploop_break_bat_update(ploop, md, piwb);
  out_error:
+       if (add_to_wblist)
+               clear_bit(MD_DIRTY, &md->status);
+       clear_bit(MD_UPDATING, &md->status);
        spin_unlock_irq(&ploop->bat_lock);
+
        return err;
  }
  ALLOW_ERROR_INJECTION(ploop_prepare_reloc_index_wb, ERRNO);
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 46450cac8c7d..db4d92c9679a 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -578,6 +578,11 @@ static inline const char *ploop_device_name(struct ploop 
*ploop)
        return ploop->ti->table->md->disk->disk_name;
  }
+static inline void ploop_add_dirty_for_wb(struct ploop *ploop, struct md_page *md)
+{
+       llist_add((struct llist_node *)&md->wb_link, &ploop->wb_batch_llist);
+}
+
  #define PL_FMT(fmt) "ploop: %s: " fmt "\n"
  #define PL_ERR(fmt, ...) pr_err(PL_FMT(fmt), ploop_device_name(ploop), 
##__VA_ARGS__)
  #define PL_ERR_ONCE(fmt, ...) pr_err_once(PL_FMT(fmt), 
ploop_device_name(ploop), ##__VA_ARGS__)
@@ -612,7 +617,7 @@ extern void ploop_map_and_submit_rw(struct ploop *ploop, 
u32 dst_clu,
                                    struct pio *pio, u8 level);
  extern int ploop_prepare_reloc_index_wb(struct ploop *ploop,
                                        struct md_page **ret_md, u32 clu, u32 
*dst_clu,
-                                       struct file *file);
+                                       struct file *file, int *add_for_wb);
  extern void ploop_break_bat_update(struct ploop *ploop, struct md_page *md,
                                   struct ploop_index_wb *piwb);
  extern void ploop_index_wb_submit(struct ploop *, struct ploop_index_wb *);

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to