We missed the fact that resize/grow touch md0 page to update ploop
parameters, other pages are not linked so there is no issue.
But in case there is a concurrent update to md0 piwb is not handled
correctly, also using sync updates in parallel is not okay.
To fix this update code to use the new mechanism with MD_UPDATING
flag and instead of using sync operations pass the updates to
runner threads.
https://virtuozzo.atlassian.net/browse/VSTOR-101871
Signed-off-by: Alexander Atanasov <alexander.atana...@virtuozzo.com>
---
drivers/md/dm-ploop-cmd.c | 39 ++++++++++++++++++++++++++++++++-------
drivers/md/dm-ploop-map.c | 36 +++++++++++++++++++++++++++---------
drivers/md/dm-ploop.h | 7 ++++++-
3 files changed, 65 insertions(+), 17 deletions(-)
v1->v2: added missed hunk
v2->v3: remove the check for dirty bit, we want to allow updates
of dirty pages but only when writeback is not started
diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index d2eb4797ab6e..7d79b900eb1a 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -286,6 +286,8 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
struct md_page *md;
bool is_locked;
int ret = 0;
+ int tries = 5;
+ int add_for_wb = 0;
dst_clu = cmd->resize.dst_clu;
@@ -308,6 +310,7 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
}
spin_unlock_irq(&ploop->bat_lock);
+reread:
/* Read full clu sync */
ret = ploop_read_cluster_sync(ploop, pio, dst_clu);
if (ret < 0) {
@@ -316,8 +319,13 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
}
ret = ploop_prepare_reloc_index_wb(ploop, &md, clu, &new_dst,
- ploop_top_delta(ploop)->file);
+ ploop_top_delta(ploop)->file,
+ &add_for_wb);
if (ret < 0) {
+ if (ret == -EBUSY && tries-- > 0) {
+ PL_ERR("md0 busy, retry:%d\n", tries);
+ goto reread;
+ }
PL_ERR("reloc: can't prepare it: %d", ret);
goto out;
}
@@ -332,13 +340,16 @@ static int ploop_grow_relocate_cluster(struct ploop
*ploop,
goto out;
}
- set_bit(MD_WRITEBACK, &md->status);
init_completion(&comp);
piwb->comp = ∁
piwb->comp_bi_status = &bi_status;
/* Write new index on disk */
- ploop_index_wb_submit(ploop, piwb);
+ ploop_disable_writeback_delay(ploop);
+ if (add_for_wb)
+ ploop_add_dirty_for_wb(ploop, md);
+ clear_bit(MD_UPDATING, &md->status);
wait_for_completion(&comp);
+ ploop_enable_writeback_delay(ploop);
ret = blk_status_to_errno(bi_status);
if (ret) {
@@ -378,12 +389,22 @@ static int ploop_grow_update_header(struct ploop *ploop,
struct md_page *md;
u64 sectors;
int ret;
+ int tries = 5;
+ int add_for_wb = false;
+retry:
/* hdr is in the same page as bat_entries[0] index */
ret = ploop_prepare_reloc_index_wb(ploop, &md, 0, NULL,
- ploop_top_delta(ploop)->file);
- if (ret)
+ ploop_top_delta(ploop)->file,
+ &add_for_wb);
+ if (ret) {
+ if (ret == -EBUSY && tries-- > 0) {
+ PL_ERR("md0 busy, retry:%d\n", tries);
+ schedule();
+ goto retry;
+ }
return ret;
+ }
piwb = md->piwb;
size = (PLOOP_MAP_OFFSET + cmd->resize.nr_bat_entries);
@@ -398,12 +419,16 @@ static int ploop_grow_update_header(struct ploop *ploop,
offset = hdr->m_FirstBlockOffset = cpu_to_le32(first_block_off);
kunmap_local(hdr);
- set_bit(MD_WRITEBACK, &md->status);
init_completion(&comp);
piwb->comp = ∁
piwb->comp_bi_status = &bi_status;
- ploop_index_wb_submit(ploop, piwb);
+
+ ploop_disable_writeback_delay(ploop);
+ if (add_for_wb)
+ ploop_add_dirty_for_wb(ploop, md);
+ clear_bit(MD_UPDATING, &md->status);
wait_for_completion(&comp);
+ ploop_enable_writeback_delay(ploop);
ret = blk_status_to_errno(bi_status);
if (!ret) {
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index ef280a8b0f90..76544cdb476e 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -613,11 +613,6 @@ static void ploop_unlink_completed_pio(struct ploop
*ploop, struct pio *pio)
ploop_dispatch_pios(ploop, NULL, &pio_list);
}
-static void ploop_add_dirty_for_wb(struct ploop *ploop, struct md_page *md)
-{
- llist_add((struct llist_node *)&md->wb_link, &ploop->wb_batch_llist);
-}
-
static bool ploop_md_make_dirty(struct ploop *ploop, struct md_page *md)
{
bool new = false;
@@ -1031,6 +1026,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop,
struct md_page *md,
struct pio *pio;
map_index_t *to;
u8 level;
+ int ret = -ENOMEM;
lockdep_assert_held(&ploop->bat_lock);
@@ -1050,6 +1046,9 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
if (WARN_ON(md->piwb)) {
PL_ERR("md %p has piwb: %p type:%d ourtype:%d\n",
md, md->piwb, md->piwb->type, type);
+ spin_unlock(&md->md_lock);
+ ret = -EBUSY;
+ goto err;
}
md->piwb = piwb;
piwb->md = md;
@@ -1094,7 +1093,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop,
struct md_page *md,
return 0;
err:
ploop_free_piwb(piwb);
- return -ENOMEM;
+ return ret;
}
void ploop_break_bat_update(struct ploop *ploop, struct md_page *md,
@@ -2692,31 +2691,44 @@ static void ploop_handle_cleanup(struct ploop *ploop,
struct pio *pio)
* another index instead of existing. If so, management of
* old bat_entries[@clu] and of related holes_bitmap bit
* is caller duty.
+ * Caller must clear MD_UPDATING and comply to add_for_wb
*/
int ploop_prepare_reloc_index_wb(struct ploop *ploop,
struct md_page **ret_md,
u32 clu, u32 *dst_clu,
- struct file *file)
+ struct file *file,
+ int *add_for_wb)
{
enum piwb_type type = PIWB_TYPE_ALLOC;
u32 page_id = ploop_bat_clu_to_page_nr(clu);
struct md_page *md = ploop_md_page_find(ploop, page_id);
struct ploop_index_wb *piwb;
int err;
+ bool add_to_wblist;
if (dst_clu)
type = PIWB_TYPE_RELOC;
err = -EIO;
+
spin_lock_irq(&ploop->bat_lock);
- if (test_bit(MD_DIRTY, &md->status) || test_bit(MD_WRITEBACK,
&md->status)) {
- PL_ERR("Unexpected md status: %lx", md->status);
+ spin_lock(&md->md_lock);
+ /* Keep this test to be the same as in delay_if_md_busy */
+ if (test_bit(MD_WRITEBACK, &md->status) || test_bit(MD_UPDATING,
&md->status)) {
+ err = -EBUSY;
+ spin_unlock(&md->md_lock);
goto out_error;
+ } else {
+ set_bit(MD_UPDATING, &md->status);
}
+ spin_unlock(&md->md_lock);
+
err = ploop_prepare_bat_update(ploop, md, type);
if (err)
goto out_error;
+ add_to_wblist = ploop_md_make_dirty(ploop, md);
+
piwb = md->piwb;
if (dst_clu) {
@@ -2734,12 +2746,18 @@ int ploop_prepare_reloc_index_wb(struct ploop *ploop,
spin_unlock_irq(&ploop->bat_lock);
*ret_md = md;
+ *add_for_wb = add_to_wblist ? 1 : 0;
+
return 0;
out_reset:
ploop_break_bat_update(ploop, md, piwb);
out_error:
+ if (add_to_wblist)
+ clear_bit(MD_DIRTY, &md->status);
+ clear_bit(MD_UPDATING, &md->status);
spin_unlock_irq(&ploop->bat_lock);
+
return err;
}
ALLOW_ERROR_INJECTION(ploop_prepare_reloc_index_wb, ERRNO);
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 46450cac8c7d..db4d92c9679a 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -578,6 +578,11 @@ static inline const char *ploop_device_name(struct ploop
*ploop)
return ploop->ti->table->md->disk->disk_name;
}
+static inline void ploop_add_dirty_for_wb(struct ploop *ploop, struct md_page *md)
+{
+ llist_add((struct llist_node *)&md->wb_link, &ploop->wb_batch_llist);
+}
+
#define PL_FMT(fmt) "ploop: %s: " fmt "\n"
#define PL_ERR(fmt, ...) pr_err(PL_FMT(fmt), ploop_device_name(ploop),
##__VA_ARGS__)
#define PL_ERR_ONCE(fmt, ...) pr_err_once(PL_FMT(fmt),
ploop_device_name(ploop), ##__VA_ARGS__)
@@ -612,7 +617,7 @@ extern void ploop_map_and_submit_rw(struct ploop *ploop,
u32 dst_clu,
struct pio *pio, u8 level);
extern int ploop_prepare_reloc_index_wb(struct ploop *ploop,
struct md_page **ret_md, u32 clu, u32
*dst_clu,
- struct file *file);
+ struct file *file, int *add_for_wb);
extern void ploop_break_bat_update(struct ploop *ploop, struct md_page *md,
struct ploop_index_wb *piwb);
extern void ploop_index_wb_submit(struct ploop *, struct ploop_index_wb *);