We missed the fact that resize/grow touch md0 page to update ploop parameters, other pages are not linked so there is no issue. But in case there is a concurrent update to md0 piwb is not handled correctly, also using sync updates in parallel is not okay. To fix this update code to use the new mechanism with MD_UPDATING flag and instead of using sync operations pass the updates to runner threads.
https://virtuozzo.atlassian.net/browse/VSTOR-101871 Signed-off-by: Alexander Atanasov <alexander.atana...@virtuozzo.com> --- drivers/md/dm-ploop-cmd.c | 39 ++++++++++++++++++++++++++++++++------- drivers/md/dm-ploop-map.c | 37 ++++++++++++++++++++++++++++--------- drivers/md/dm-ploop.h | 7 ++++++- 3 files changed, 66 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c index d2eb4797ab6e..7d79b900eb1a 100644 --- a/drivers/md/dm-ploop-cmd.c +++ b/drivers/md/dm-ploop-cmd.c @@ -286,6 +286,8 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop, struct md_page *md; bool is_locked; int ret = 0; + int tries = 5; + int add_for_wb = 0; dst_clu = cmd->resize.dst_clu; @@ -308,6 +310,7 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop, } spin_unlock_irq(&ploop->bat_lock); +reread: /* Read full clu sync */ ret = ploop_read_cluster_sync(ploop, pio, dst_clu); if (ret < 0) { @@ -316,8 +319,13 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop, } ret = ploop_prepare_reloc_index_wb(ploop, &md, clu, &new_dst, - ploop_top_delta(ploop)->file); + ploop_top_delta(ploop)->file, + &add_for_wb); if (ret < 0) { + if (ret == -EBUSY && tries-- > 0) { + PL_ERR("md0 busy, retry:%d\n", tries); + goto reread; + } PL_ERR("reloc: can't prepare it: %d", ret); goto out; } @@ -332,13 +340,16 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop, goto out; } - set_bit(MD_WRITEBACK, &md->status); init_completion(&comp); piwb->comp = ∁ piwb->comp_bi_status = &bi_status; /* Write new index on disk */ - ploop_index_wb_submit(ploop, piwb); + ploop_disable_writeback_delay(ploop); + if (add_for_wb) + ploop_add_dirty_for_wb(ploop, md); + clear_bit(MD_UPDATING, &md->status); wait_for_completion(&comp); + ploop_enable_writeback_delay(ploop); ret = blk_status_to_errno(bi_status); if (ret) { @@ -378,12 +389,22 @@ static int ploop_grow_update_header(struct ploop *ploop, struct md_page *md; u64 sectors; int ret; + int tries = 5; + int add_for_wb = false; +retry: /* hdr is in the same page as bat_entries[0] index */ ret = ploop_prepare_reloc_index_wb(ploop, &md, 0, NULL, - ploop_top_delta(ploop)->file); - if (ret) + ploop_top_delta(ploop)->file, + &add_for_wb); + if (ret) { + if (ret == -EBUSY && tries-- > 0) { + PL_ERR("md0 busy, retry:%d\n", tries); + schedule(); + goto retry; + } return ret; + } piwb = md->piwb; size = (PLOOP_MAP_OFFSET + cmd->resize.nr_bat_entries); @@ -398,12 +419,16 @@ static int ploop_grow_update_header(struct ploop *ploop, offset = hdr->m_FirstBlockOffset = cpu_to_le32(first_block_off); kunmap_local(hdr); - set_bit(MD_WRITEBACK, &md->status); init_completion(&comp); piwb->comp = ∁ piwb->comp_bi_status = &bi_status; - ploop_index_wb_submit(ploop, piwb); + + ploop_disable_writeback_delay(ploop); + if (add_for_wb) + ploop_add_dirty_for_wb(ploop, md); + clear_bit(MD_UPDATING, &md->status); wait_for_completion(&comp); + ploop_enable_writeback_delay(ploop); ret = blk_status_to_errno(bi_status); if (!ret) { diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c index ef280a8b0f90..04e71c851b64 100644 --- a/drivers/md/dm-ploop-map.c +++ b/drivers/md/dm-ploop-map.c @@ -613,11 +613,6 @@ static void ploop_unlink_completed_pio(struct ploop *ploop, struct pio *pio) ploop_dispatch_pios(ploop, NULL, &pio_list); } -static void ploop_add_dirty_for_wb(struct ploop *ploop, struct md_page *md) -{ - llist_add((struct llist_node *)&md->wb_link, &ploop->wb_batch_llist); -} - static bool ploop_md_make_dirty(struct ploop *ploop, struct md_page *md) { bool new = false; @@ -1031,6 +1026,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md, struct pio *pio; map_index_t *to; u8 level; + int ret = -ENOMEM; lockdep_assert_held(&ploop->bat_lock); @@ -1050,6 +1046,9 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md, if (WARN_ON(md->piwb)) { PL_ERR("md %p has piwb: %p type:%d ourtype:%d\n", md, md->piwb, md->piwb->type, type); + spin_unlock(&md->md_lock); + ret = -EBUSY; + goto err; } md->piwb = piwb; piwb->md = md; @@ -1094,7 +1093,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md, return 0; err: ploop_free_piwb(piwb); - return -ENOMEM; + return ret; } void ploop_break_bat_update(struct ploop *ploop, struct md_page *md, @@ -2692,31 +2691,45 @@ static void ploop_handle_cleanup(struct ploop *ploop, struct pio *pio) * another index instead of existing. If so, management of * old bat_entries[@clu] and of related holes_bitmap bit * is caller duty. + * Caller must clear MD_UPDATING and comply to add_for_wb */ int ploop_prepare_reloc_index_wb(struct ploop *ploop, struct md_page **ret_md, u32 clu, u32 *dst_clu, - struct file *file) + struct file *file, + int *add_for_wb) { enum piwb_type type = PIWB_TYPE_ALLOC; u32 page_id = ploop_bat_clu_to_page_nr(clu); struct md_page *md = ploop_md_page_find(ploop, page_id); struct ploop_index_wb *piwb; int err; + bool add_to_wblist; if (dst_clu) type = PIWB_TYPE_RELOC; err = -EIO; + + spin_lock_irq(&ploop->bat_lock); - if (test_bit(MD_DIRTY, &md->status) || test_bit(MD_WRITEBACK, &md->status)) { - PL_ERR("Unexpected md status: %lx", md->status); + spin_lock(&md->md_lock); + if (test_bit(MD_DIRTY, &md->status) || test_bit(MD_WRITEBACK, &md->status) + || test_bit(MD_UPDATING, &md->status)) { + err = -EBUSY; + spin_unlock(&md->md_lock); goto out_error; + } else { + set_bit(MD_UPDATING, &md->status); } + spin_unlock(&md->md_lock); + err = ploop_prepare_bat_update(ploop, md, type); if (err) goto out_error; + add_to_wblist = ploop_md_make_dirty(ploop, md); + piwb = md->piwb; if (dst_clu) { @@ -2734,12 +2747,18 @@ int ploop_prepare_reloc_index_wb(struct ploop *ploop, spin_unlock_irq(&ploop->bat_lock); *ret_md = md; + *add_for_wb = add_to_wblist ? 1 : 0; + return 0; out_reset: ploop_break_bat_update(ploop, md, piwb); out_error: + if (add_to_wblist) + clear_bit(MD_DIRTY, &md->status); + clear_bit(MD_UPDATING, &md->status); spin_unlock_irq(&ploop->bat_lock); + return err; } ALLOW_ERROR_INJECTION(ploop_prepare_reloc_index_wb, ERRNO); diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h index 46450cac8c7d..db4d92c9679a 100644 --- a/drivers/md/dm-ploop.h +++ b/drivers/md/dm-ploop.h @@ -578,6 +578,11 @@ static inline const char *ploop_device_name(struct ploop *ploop) return ploop->ti->table->md->disk->disk_name; } +static inline void ploop_add_dirty_for_wb(struct ploop *ploop, struct md_page *md) +{ + llist_add((struct llist_node *)&md->wb_link, &ploop->wb_batch_llist); +} + #define PL_FMT(fmt) "ploop: %s: " fmt "\n" #define PL_ERR(fmt, ...) pr_err(PL_FMT(fmt), ploop_device_name(ploop), ##__VA_ARGS__) #define PL_ERR_ONCE(fmt, ...) pr_err_once(PL_FMT(fmt), ploop_device_name(ploop), ##__VA_ARGS__) @@ -612,7 +617,7 @@ extern void ploop_map_and_submit_rw(struct ploop *ploop, u32 dst_clu, struct pio *pio, u8 level); extern int ploop_prepare_reloc_index_wb(struct ploop *ploop, struct md_page **ret_md, u32 clu, u32 *dst_clu, - struct file *file); + struct file *file, int *add_for_wb); extern void ploop_break_bat_update(struct ploop *ploop, struct md_page *md, struct ploop_index_wb *piwb); extern void ploop_index_wb_submit(struct ploop *, struct ploop_index_wb *); -- 2.43.0 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel