17.03.2020 21:16, Alberto Garcia wrote:
If an image has subclusters then there are more copy-on-write
scenarios that we need to consider. Let's say we have a write request
from the middle of subcluster #3 until the end of the cluster:
- If the cluster is new, then subclusters #0 to #3 from the old
cluster must be copied into the new one.
- If the cluster is new but the old cluster was unallocated, then
only subcluster #3 needs copy-on-write. #0 to #2 are marked as
unallocated in the bitmap of the new L2 entry.
- If we are overwriting an old cluster and subcluster #3 is
unallocated or has the all-zeroes bit set then we need
copy-on-write on subcluster #3.
- If we are overwriting an old cluster and subcluster #3 was
allocated then there is no need to copy-on-write.
Signed-off-by: Alberto Garcia <be...@igalia.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
block/qcow2-cluster.c | 140 +++++++++++++++++++++++++++++++++---------
1 file changed, 110 insertions(+), 30 deletions(-)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 8cdf8a23b6..c6f3cc9237 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1061,56 +1061,128 @@ void qcow2_alloc_cluster_abort(BlockDriverState *bs,
QCowL2Meta *m)
* If @keep_old is true it means that the clusters were already
* allocated and will be overwritten. If false then the clusters are
* new and we have to decrease the reference count of the old ones.
+ *
+ * Returns 1 on success, -errno on failure (in order to match the
+ * return value of handle_copied() and handle_alloc()).
Hmm, honestly, I don't like this idea. handle_copied and handle_alloc has
special return code semantics. Here no reason for special semantics, just
classic error/success. Introducing new semantics (I think, no similar functions
are in qcow2-cluster.c and may be in the whole qcow2 subsystem) just because
the function is used only on return-1 paths of its callers, to save several
lines of code - this doesn't seem good reason for me.
Or, may be the reason will appear in the following patches? I'll see.
*/
-static void calculate_l2_meta(BlockDriverState *bs,
- uint64_t host_cluster_offset,
- uint64_t guest_offset, unsigned bytes,
- uint64_t *l2_slice, QCowL2Meta **m, bool
keep_old)
+static int calculate_l2_meta(BlockDriverState *bs, uint64_t
host_cluster_offset,
+ uint64_t guest_offset, unsigned bytes,
+ uint64_t *l2_slice, QCowL2Meta **m, bool keep_old)
{
BDRVQcow2State *s = bs->opaque;
- int l2_index = offset_to_l2_slice_index(s, guest_offset);
- uint64_t l2_entry;
+ int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset);
+ uint64_t l2_entry, l2_bitmap;
unsigned cow_start_from, cow_end_to;
unsigned cow_start_to = offset_into_cluster(s, guest_offset);
unsigned cow_end_from = cow_start_to + bytes;
unsigned nb_clusters = size_to_clusters(s, cow_end_from);
QCowL2Meta *old_m = *m;
- QCow2ClusterType type;
+ QCow2SubclusterType type;
assert(nb_clusters <= s->l2_slice_size - l2_index);
- /* Return if there's no COW (all clusters are normal and we keep them) */
+ /* Return if there's no COW (all subclusters are normal and we are
+ * keeping the clusters) */
if (keep_old) {
+ unsigned first_sc = cow_start_to / s->subcluster_size;
+ unsigned last_sc = (cow_end_from - 1) / s->subcluster_size;
int i;
- for (i = 0; i < nb_clusters; i++) {
- l2_entry = get_l2_entry(s, l2_slice, l2_index + i);
- if (qcow2_get_cluster_type(bs, l2_entry) != QCOW2_CLUSTER_NORMAL) {
+ for (i = first_sc; i <= last_sc; i++) {
+ unsigned c = i / s->subclusters_per_cluster;
+ unsigned sc = i % s->subclusters_per_cluster;
+ l2_entry = get_l2_entry(s, l2_slice, l2_index + c);
+ l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + c);
+ type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc);
+ if (type == QCOW2_SUBCLUSTER_INVALID) {
+ l2_index += c; /* Point to the invalid entry */
+ goto fail;
+ }
+ if (type != QCOW2_SUBCLUSTER_NORMAL) {
break;
}
}
- if (i == nb_clusters) {
- return;
+ if (i == last_sc + 1) {
+ return 1;
}
}
/* Get the L2 entry of the first cluster */
l2_entry = get_l2_entry(s, l2_slice, l2_index);
- type = qcow2_get_cluster_type(bs, l2_entry);
+ l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index);
+ sc_index = offset_to_sc_index(s, guest_offset);
+ type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index);
- if (type == QCOW2_CLUSTER_NORMAL && keep_old) {
- cow_start_from = cow_start_to;
+ if (type == QCOW2_SUBCLUSTER_INVALID) {
+ goto fail;
+ }
+
+ if (!keep_old) {
+ switch (type) {
+ case QCOW2_SUBCLUSTER_NORMAL:
+ case QCOW2_SUBCLUSTER_COMPRESSED:
+ case QCOW2_SUBCLUSTER_ZERO_ALLOC:
+ case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
+ cow_start_from = 0;
+ break;
+ case QCOW2_SUBCLUSTER_ZERO_PLAIN:
+ case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
+ cow_start_from = sc_index << s->subcluster_bits;
+ break;
+ default:
+ g_assert_not_reached();
+ }
} else {
- cow_start_from = 0;
+ switch (type) {
+ case QCOW2_SUBCLUSTER_NORMAL:
+ cow_start_from = cow_start_to;
+ break;
+ case QCOW2_SUBCLUSTER_ZERO_ALLOC:
+ case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
+ cow_start_from = sc_index << s->subcluster_bits;
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
/* Get the L2 entry of the last cluster */
- l2_entry = get_l2_entry(s, l2_slice, l2_index + nb_clusters - 1);
- type = qcow2_get_cluster_type(bs, l2_entry);
+ l2_index += nb_clusters - 1;
+ l2_entry = get_l2_entry(s, l2_slice, l2_index);
+ l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index);
+ sc_index = offset_to_sc_index(s, guest_offset + bytes - 1);
+ type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index);
- if (type == QCOW2_CLUSTER_NORMAL && keep_old) {
- cow_end_to = cow_end_from;
+ if (type == QCOW2_SUBCLUSTER_INVALID) {
+ goto fail;
+ }
+
+ if (!keep_old) {
+ switch (type) {
Hmm, big part of code mostly copied from handling first sub-cluster.. But I'm
not sure that it worth refactoring now, may be later..
+ case QCOW2_SUBCLUSTER_NORMAL:
+ case QCOW2_SUBCLUSTER_COMPRESSED:
+ case QCOW2_SUBCLUSTER_ZERO_ALLOC:
+ case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
+ cow_end_to = ROUND_UP(cow_end_from, s->cluster_size);
Hmm. Interesting, actually, we don't need to COW
QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC subclusters in cow-area.. But this need more
modifications to cow-handling.
+ break;
+ case QCOW2_SUBCLUSTER_ZERO_PLAIN:
+ case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
+ cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size);
This is because in new cluster we can made previous subclusters unallocated,
and don't copy from backing.
Hmm, actually, we should not just make them unallocated, but copy part of
bitmap from original l2-entry.. I need to keep it in mind for next patches.
+ break;
+ default:
+ g_assert_not_reached();
+ }
} else {
- cow_end_to = ROUND_UP(cow_end_from, s->cluster_size);
+ switch (type) {
+ case QCOW2_SUBCLUSTER_NORMAL:
+ cow_end_to = cow_end_from;
+ break;
+ case QCOW2_SUBCLUSTER_ZERO_ALLOC:
+ case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
+ cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
*m = g_malloc0(sizeof(**m));
@@ -1135,6 +1207,18 @@ static void calculate_l2_meta(BlockDriverState *bs,
qemu_co_queue_init(&(*m)->dependent_requests);
QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
+
+fail:
maybe, s/fail/out/
+ if (type == QCOW2_SUBCLUSTER_INVALID) {
+ uint64_t l1_index = offset_to_l1_index(s, guest_offset);
+ uint64_t l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+ qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster entry found
"
+ " (L2 offset: %#" PRIx64 ", L2 index: %#x)",
+ l2_offset, l2_index);
+ return -EIO;
+ }
+
+ return 1;
}
/*
@@ -1352,10 +1436,8 @@ static int handle_copied(BlockDriverState *bs, uint64_t
guest_offset,
- offset_into_cluster(s, guest_offset));
assert(*bytes != 0);
- calculate_l2_meta(bs, cluster_offset & L2E_OFFSET_MASK, guest_offset,
- *bytes, l2_slice, m, true);
-
- ret = 1;
+ ret = calculate_l2_meta(bs, cluster_offset & L2E_OFFSET_MASK,
+ guest_offset, *bytes, l2_slice, m, true);
} else {
ret = 0;
}
@@ -1530,10 +1612,8 @@ static int handle_alloc(BlockDriverState *bs, uint64_t
guest_offset,
*bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset));
assert(*bytes != 0);
- calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes, l2_slice,
- m, false);
-
- ret = 1;
+ ret = calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes,
+ l2_slice, m, false);
out:
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
Anyway, patch should work as intended, so. if you want to keep it as is:
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
--
Best regards,
Vladimir