Re: [PATCH v4 17/30] qcow2: Add subcluster support to calculate_l2_meta()

Vladimir Sementsov-Ogievskiy Wed, 15 Apr 2020 01:39:59 -0700

17.03.2020 21:16, Alberto Garcia wrote:

If an image has subclusters then there are more copy-on-write
scenarios that we need to consider. Let's say we have a write request
from the middle of subcluster #3 until the end of the cluster:


    - If the cluster is new, then subclusters #0 to #3 from the old
      cluster must be copied into the new one.

    - If the cluster is new but the old cluster was unallocated, then
      only subcluster #3 needs copy-on-write. #0 to #2 are marked as
      unallocated in the bitmap of the new L2 entry.

    - If we are overwriting an old cluster and subcluster #3 is
      unallocated or has the all-zeroes bit set then we need
      copy-on-write on subcluster #3.

    - If we are overwriting an old cluster and subcluster #3 was
      allocated then there is no need to copy-on-write.

Signed-off-by: Alberto Garcia <be...@igalia.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
  block/qcow2-cluster.c | 140 +++++++++++++++++++++++++++++++++---------
  1 file changed, 110 insertions(+), 30 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 8cdf8a23b6..c6f3cc9237 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1061,56 +1061,128 @@ void qcow2_alloc_cluster_abort(BlockDriverState *bs, 
QCowL2Meta *m)
   * If @keep_old is true it means that the clusters were already
   * allocated and will be overwritten. If false then the clusters are
   * new and we have to decrease the reference count of the old ones.
+ *
+ * Returns 1 on success, -errno on failure (in order to match the
+ * return value of handle_copied() and handle_alloc()).


Hmm, honestly, I don't like this idea. handle_copied and handle_alloc has 
special return code semantics. Here no reason for special semantics, just 
classic error/success. Introducing new semantics (I think, no similar functions 
are in qcow2-cluster.c and may be in the whole qcow2 subsystem) just because 
the function is used only on return-1 paths of its callers, to save several 
lines of code - this doesn't seem good reason for me.

Or, may be the reason will appear in the following patches? I'll see.

   */
-static void calculate_l2_meta(BlockDriverState *bs,
-                              uint64_t host_cluster_offset,
-                              uint64_t guest_offset, unsigned bytes,
-                              uint64_t *l2_slice, QCowL2Meta **m, bool 
keep_old)
+static int calculate_l2_meta(BlockDriverState *bs, uint64_t 
host_cluster_offset,
+                             uint64_t guest_offset, unsigned bytes,
+                             uint64_t *l2_slice, QCowL2Meta **m, bool keep_old)
  {
      BDRVQcow2State *s = bs->opaque;
-    int l2_index = offset_to_l2_slice_index(s, guest_offset);
-    uint64_t l2_entry;
+    int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset);
+    uint64_t l2_entry, l2_bitmap;
      unsigned cow_start_from, cow_end_to;
      unsigned cow_start_to = offset_into_cluster(s, guest_offset);
      unsigned cow_end_from = cow_start_to + bytes;
      unsigned nb_clusters = size_to_clusters(s, cow_end_from);
      QCowL2Meta *old_m = *m;
-    QCow2ClusterType type;
+    QCow2SubclusterType type;

assert(nb_clusters <= s->l2_slice_size - l2_index);- /* Return if there's no COW (all clusters are normal and we keep them) */

+    /* Return if there's no COW (all subclusters are normal and we are
+     * keeping the clusters) */
      if (keep_old) {
+        unsigned first_sc = cow_start_to / s->subcluster_size;
+        unsigned last_sc = (cow_end_from - 1) / s->subcluster_size;
          int i;
-        for (i = 0; i < nb_clusters; i++) {
-            l2_entry = get_l2_entry(s, l2_slice, l2_index + i);
-            if (qcow2_get_cluster_type(bs, l2_entry) != QCOW2_CLUSTER_NORMAL) {
+        for (i = first_sc; i <= last_sc; i++) {
+            unsigned c = i / s->subclusters_per_cluster;
+            unsigned sc = i % s->subclusters_per_cluster;
+            l2_entry = get_l2_entry(s, l2_slice, l2_index + c);
+            l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + c);
+            type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc);
+            if (type == QCOW2_SUBCLUSTER_INVALID) {
+                l2_index += c; /* Point to the invalid entry */
+                goto fail;
+            }
+            if (type != QCOW2_SUBCLUSTER_NORMAL) {
                  break;
              }
          }
-        if (i == nb_clusters) {
-            return;
+        if (i == last_sc + 1) {
+            return 1;
          }
      }

/* Get the L2 entry of the first cluster */

      l2_entry = get_l2_entry(s, l2_slice, l2_index);
-    type = qcow2_get_cluster_type(bs, l2_entry);
+    l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index);
+    sc_index = offset_to_sc_index(s, guest_offset);
+    type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index);

- if (type == QCOW2_CLUSTER_NORMAL && keep_old) {

-        cow_start_from = cow_start_to;
+    if (type == QCOW2_SUBCLUSTER_INVALID) {
+        goto fail;
+    }
+
+    if (!keep_old) {
+        switch (type) {
+        case QCOW2_SUBCLUSTER_NORMAL:
+        case QCOW2_SUBCLUSTER_COMPRESSED:
+        case QCOW2_SUBCLUSTER_ZERO_ALLOC:
+        case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
+            cow_start_from = 0;
+            break;
+        case QCOW2_SUBCLUSTER_ZERO_PLAIN:
+        case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
+            cow_start_from = sc_index << s->subcluster_bits;
+            break;
+        default:
+            g_assert_not_reached();
+        }
      } else {
-        cow_start_from = 0;
+        switch (type) {
+        case QCOW2_SUBCLUSTER_NORMAL:
+            cow_start_from = cow_start_to;
+            break;
+        case QCOW2_SUBCLUSTER_ZERO_ALLOC:
+        case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
+            cow_start_from = sc_index << s->subcluster_bits;
+            break;
+        default:
+            g_assert_not_reached();
+        }
      }

/* Get the L2 entry of the last cluster */

-    l2_entry = get_l2_entry(s, l2_slice, l2_index + nb_clusters - 1);
-    type = qcow2_get_cluster_type(bs, l2_entry);
+    l2_index += nb_clusters - 1;
+    l2_entry = get_l2_entry(s, l2_slice, l2_index);
+    l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index);
+    sc_index = offset_to_sc_index(s, guest_offset + bytes - 1);
+    type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index);

- if (type == QCOW2_CLUSTER_NORMAL && keep_old) {

-        cow_end_to = cow_end_from;
+    if (type == QCOW2_SUBCLUSTER_INVALID) {
+        goto fail;
+    }
+
+    if (!keep_old) {
+        switch (type) {


Hmm, big part of code mostly copied from handling first sub-cluster.. But I'm 
not sure that it worth refactoring now, may be later..

+        case QCOW2_SUBCLUSTER_NORMAL:
+        case QCOW2_SUBCLUSTER_COMPRESSED:
+        case QCOW2_SUBCLUSTER_ZERO_ALLOC:
+        case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
+            cow_end_to = ROUND_UP(cow_end_from, s->cluster_size);


Hmm. Interesting, actually, we don't need to COW  
QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC subclusters in cow-area.. But this need more 
modifications to cow-handling.

+            break;
+        case QCOW2_SUBCLUSTER_ZERO_PLAIN:
+        case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
+            cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size);



This is because in new cluster we can made previous subclusters unallocated, 
and don't copy from backing.
Hmm, actually, we should not just make them unallocated, but copy part of 
bitmap from original l2-entry.. I need to keep it in mind for next patches.

+            break;
+        default:
+            g_assert_not_reached();
+        }
      } else {
-        cow_end_to = ROUND_UP(cow_end_from, s->cluster_size);
+        switch (type) {
+        case QCOW2_SUBCLUSTER_NORMAL:
+            cow_end_to = cow_end_from;
+            break;
+        case QCOW2_SUBCLUSTER_ZERO_ALLOC:
+        case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
+            cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size);
+            break;
+        default:
+            g_assert_not_reached();
+        }
      }

*m = g_malloc0(sizeof(**m));

@@ -1135,6 +1207,18 @@ static void calculate_l2_meta(BlockDriverState *bs,

qemu_co_queue_init(&(*m)->dependent_requests);

      QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
+
+fail:


maybe, s/fail/out/

+    if (type == QCOW2_SUBCLUSTER_INVALID) {
+        uint64_t l1_index = offset_to_l1_index(s, guest_offset);
+        uint64_t l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+        qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster entry found 
"
+                                " (L2 offset: %#" PRIx64 ", L2 index: %#x)",
+                                l2_offset, l2_index);
+        return -EIO;
+    }
+
+    return 1;
  }

/*

@@ -1352,10 +1436,8 @@ static int handle_copied(BlockDriverState *bs, uint64_t 
guest_offset,
                   - offset_into_cluster(s, guest_offset));
          assert(*bytes != 0);

- calculate_l2_meta(bs, cluster_offset & L2E_OFFSET_MASK, guest_offset,

-                          *bytes, l2_slice, m, true);
-
-        ret = 1;
+        ret = calculate_l2_meta(bs, cluster_offset & L2E_OFFSET_MASK,
+                                guest_offset, *bytes, l2_slice, m, true);
      } else {
          ret = 0;
      }
@@ -1530,10 +1612,8 @@ static int handle_alloc(BlockDriverState *bs, uint64_t 
guest_offset,
      *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset));
      assert(*bytes != 0);

- calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes, l2_slice,

-                      m, false);
-
-    ret = 1;
+    ret = calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes,
+                            l2_slice, m, false);

out:

      qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);


Anyway, patch should work as intended, so. if you want to keep it as is:
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>


--
Best regards,
Vladimir

Re: [PATCH v4 17/30] qcow2: Add subcluster support to calculate_l2_meta()

Reply via email to