On 6/21/24 17:36, Alexander Atanasov wrote:
On 21.06.24 14:10, Andrey Zhadchenko wrote:


On 5/20/24 13:49, Alexander Atanasov wrote:
It is possible that BAT is corrupted on disk but
we have a good copy of it in RAM. Add dmsetup
message to write it out to file write_bat_to_file.
This is very basic interface and it must be used
as a last resort.
^^^^^^^^^^^^^^^^^^^^

There are even more posibilities this function to fail.
But as a last resort means we have a major bug and try to do best we can. The user of this interface must ensure that device is atleast
suspended and there are no operations running on the device.


Even if it is some last resort, kernel crashes should be avoided, so we will not accidentally explode live node with few vms.




https://virtuozzo.atlassian.net/browse/PSBM-156501
Signed-off-by: Alexander Atanasov <alexander.atana...@virtuozzo.com>
---
  drivers/md/dm-ploop-cmd.c | 48 +++++++++++++++++++++++++++++++++++++++
  drivers/md/dm-ploop-map.c |  4 ++--
  drivers/md/dm-ploop.h     |  3 +++
  3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index 88519f590148b..71f854c509f82 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -280,6 +280,45 @@ static void ploop_make_md_wb(struct ploop *ploop, struct md_page *md)
      write_unlock_irq(&ploop->bat_rwlock);
  }
+/* write all md to disk */
+static u32 ploop_rewrite_bat_from_memory(struct ploop *ploop)
+{
+    struct rb_node *node;
+    struct md_page *md;
+    struct ploop_index_wb *piwb;
+    blk_status_t bi_status;
+    DECLARE_COMPLETION_ONSTACK(comp);
+    int err;
+
+    ploop_suspend_submitting_pios(ploop);
1. It may fail
2. Deeper in ploop_inflight_bios_ref_switch() there is the following comment:
  * It is called from kwork only, so this can't be executed in parallel.
Could it affect us somehow?

+    ploop_for_each_md_page(ploop, md, node) {
+        /* we have the page and want to write so prepare bat update */
+        err = ploop_prepare_bat_update(ploop, md, PIWB_TYPE_ALLOC);
I see that suspend_submitting_pios() waits for all in-flight IO. But I am not sure that writeback is included, could you clarify it? ploop_prepare_bat_update() does no checks, so if there is existing writeback we will probably explode
+        /* does wb_init ploop_index_wb_init */
+        if (err) {
+            /* most likely no memory, notify user he may retry */
+            printk(KERN_ERR "MD prepare WB failed: %d\n", err);
+        }
+        /* mark md for writeback */
+        ploop_make_md_wb(ploop, md);
+        /* piwb is prepared in md->piwb */
+        piwb = md->piwb;
+        init_completion(&comp);
+        piwb->comp = &comp;
+        piwb->comp_bi_status = &bi_status;
+        /* Write index to disk */
+        ploop_index_wb_submit(ploop, piwb);
+        wait_for_completion(&comp);
+        /* piwb is freed on completion */
+        if (bi_status) {
+            printk(KERN_ERR "MD WB failed: %d\n", blk_status_to_errno(bi_status));
PL_ERR would probably be a bit better
+        }
+    }
+    ploop_resume_submitting_pios(ploop);
+
+    return 0;
+}
+
  static int ploop_grow_relocate_cluster(struct ploop *ploop,
                         struct ploop_cmd *cmd)
  {
@@ -1086,6 +1125,11 @@ static int ploop_check_delta_before_flip(struct ploop *ploop, struct file *file)
      return ret;
  }
+static int ploop_write_bat_to_file(struct ploop *ploop)
Why not just call ploop_rewrite_bat_from_memory() instead? This function seems to be excessive
+{
+    return ploop_rewrite_bat_from_memory(ploop);
+}
+
  static int ploop_flip_upper_deltas(struct ploop *ploop)
  {
      struct file *file;
@@ -1197,6 +1241,10 @@ int ploop_message(struct dm_target *ti, unsigned int argc, char **argv,
          if (argc != 1)
              goto unlock;
          ret = ploop_flip_upper_deltas(ploop);
+    } else if (!strcmp(argv[0], "write_bat_to_file")) {
+        if (argc != 1)
+            goto unlock;
+        ret = ploop_write_bat_to_file(ploop);
      } else {
          ret = -ENOTSUPP;
      }
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index ea04b3e53567e..7e16005f1e5c9 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -813,7 +813,7 @@ static void ploop_advance_local_after_bat_wb(struct ploop *ploop,
          ploop_dispatch_pios(ploop, NULL, &list);
  }
-static void ploop_free_piwb(struct ploop_index_wb *piwb)
+void ploop_free_piwb(struct ploop_index_wb *piwb)
  {
      ploop_free_pio(piwb->ploop, piwb->pio);
      put_page(piwb->bat_page);
@@ -889,7 +889,7 @@ static void ploop_bat_write_complete(struct ploop_index_wb *piwb,
      ploop_put_piwb(piwb);
  }
-static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
+int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
                      enum piwb_type type)
  {
      u32 i, off, last, *bat_entries;
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index db36687c81696..67dbf8abd4e41 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -559,6 +559,8 @@ extern void ploop_free_md_page(struct md_page *md);
  extern void ploop_free_md_pages_tree(struct rb_root *root);
  extern bool ploop_try_update_bat_entry(struct ploop *ploop, u32 clu,
                         u8 level, u32 dst_clu);
+extern int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
+                    enum piwb_type type);
  extern int ploop_add_delta(struct ploop *ploop, u32 level,
                 struct file *file, bool is_raw);
@@ -597,6 +599,7 @@ extern int ploop_read_delta_metadata(struct ploop *ploop, struct file *file,
                       struct rb_root *md_root, u32 *delta_nr_be);
  extern void ploop_index_wb_init(struct ploop_index_wb *piwb,
                  struct ploop *ploop);
+extern void ploop_free_piwb(struct ploop_index_wb *piwb);
  extern void ploop_call_rw_iter(struct file *file, loff_t pos, unsigned rw,
                     struct iov_iter *iter, struct pio *pio);
  extern void ploop_enospc_timer(struct timer_list *timer);

Best regards,
Andrey
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to