Since checking an images refcounts already visits every cluster, it's basically free to also check that the all-zero bit is correctly set. Only check for the active L1 table, and only output an error on the first non-zero cluster found.
Signed-off-by: Eric Blake <ebl...@redhat.com> --- block/qcow2-refcount.c | 60 +++++++++++++++++++++++++++++++++++--- tests/qemu-iotests/060.out | 6 ++-- tests/qemu-iotests/285 | 17 +++++++++++ tests/qemu-iotests/285.out | 20 +++++++++++++ 4 files changed, 97 insertions(+), 6 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index f67ac6b2d893..95c8101df365 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1583,6 +1583,7 @@ int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res, /* Flags for check_refcounts_l1() and check_refcounts_l2() */ enum { CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */ + CHECK_ALL_ZERO = 0x4, /* check autoclear all_zero bit */ }; /* @@ -1596,12 +1597,14 @@ enum { static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, void **refcount_table, int64_t *refcount_table_size, int64_t l2_offset, - int flags, BdrvCheckMode fix, bool active) + int flags, BdrvCheckMode fix, bool active, + bool *all_zero) { BDRVQcow2State *s = bs->opaque; uint64_t *l2_table, l2_entry; uint64_t next_contiguous_offset = 0; int i, l2_size, nb_csectors, ret; + bool check_all_zero; /* Read L2 table from disk */ l2_size = s->l2_size * sizeof(uint64_t); @@ -1615,8 +1618,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, } /* Do the actual checks */ - for(i = 0; i < s->l2_size; i++) { + for (i = 0; i < s->l2_size; i++) { l2_entry = be64_to_cpu(l2_table[i]); + check_all_zero = *all_zero; switch (qcow2_get_cluster_type(bs, l2_entry)) { case QCOW2_CLUSTER_COMPRESSED: @@ -1662,6 +1666,8 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, break; case QCOW2_CLUSTER_ZERO_ALLOC: + check_all_zero = false; + /* fall through */ case QCOW2_CLUSTER_NORMAL: { uint64_t offset = l2_entry & L2E_OFFSET_MASK; @@ -1740,12 +1746,51 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, } case QCOW2_CLUSTER_ZERO_PLAIN: + check_all_zero = false; + break; + case QCOW2_CLUSTER_UNALLOCATED: + if (!bs->backing) { + check_all_zero = false; + } break; default: abort(); } + + if (check_all_zero) { + fprintf(stderr, "%s: all zero bit set but L2 table at offset " + "0x%" PRIx64" contains non-zero cluster at entry %d\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", + l2_offset, i); + *all_zero = false; + if (fix & BDRV_FIX_ERRORS) { + uint64_t feat; + + ret = bdrv_pread(bs->file, + offsetof(QCowHeader, autoclear_features), + &feat, sizeof(feat)); + if (ret >= 0) { + feat &= ~cpu_to_be64(QCOW2_AUTOCLEAR_ALL_ZERO); + ret = bdrv_pwrite(bs->file, + offsetof(QCowHeader, autoclear_features), + &feat, sizeof(feat)); + } + if (ret < 0) { + fprintf(stderr, + "ERROR: Failed to update all zero bit: %s\n", + strerror(-ret)); + res->check_errors++; + /* Continue checking the rest of this L2 table */ + } else { + res->corruptions_fixed++; + } + s->autoclear_features &= ~QCOW2_AUTOCLEAR_ALL_ZERO; + } else { + res->corruptions++; + } + } } g_free(l2_table); @@ -1774,6 +1819,12 @@ static int check_refcounts_l1(BlockDriverState *bs, BDRVQcow2State *s = bs->opaque; uint64_t *l1_table = NULL, l2_offset, l1_size2; int i, ret; + bool all_zero = false; + + if (flags & CHECK_ALL_ZERO && + s->autoclear_features & QCOW2_AUTOCLEAR_ALL_ZERO) { + all_zero = true; + } l1_size2 = l1_size * sizeof(uint64_t); @@ -1825,7 +1876,7 @@ static int check_refcounts_l1(BlockDriverState *bs, /* Process and check L2 entries */ ret = check_refcounts_l2(bs, res, refcount_table, refcount_table_size, l2_offset, flags, - fix, active); + fix, active, &all_zero); if (ret < 0) { goto fail; } @@ -2114,7 +2165,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res, /* current L1 table */ ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters, - s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO, + s->l1_table_offset, s->l1_size, + CHECK_FRAG_INFO | CHECK_ALL_ZERO, fix, true); if (ret < 0) { return ret; diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index d27692a33c0d..d82aca458544 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -3,9 +3,10 @@ QA output created by 060 === Testing L2 reference into L1 === Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +ERROR: all zero bit set but L2 table at offset 0x30000 contains non-zero cluster at entry 0 ERROR cluster 3 refcount=1 reference=3 -1 errors were found on the image. +2 errors were found on the image. Data may be corrupted, or further writes to the image may corrupt it. incompatible_features [] qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with active L1 table); further corruption events will be suppressed @@ -28,10 +29,11 @@ read 512/512 bytes at offset 0 === Testing cluster data reference into refcount block === Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +ERROR: all zero bit set but L2 table at offset 0x40000 contains non-zero cluster at entry 0 ERROR refcount block 0 refcount=2 ERROR cluster 2 refcount=1 reference=2 -2 errors were found on the image. +3 errors were found on the image. Data may be corrupted, or further writes to the image may corrupt it. incompatible_features [] qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with refcount block); further corruption events will be suppressed diff --git a/tests/qemu-iotests/285 b/tests/qemu-iotests/285 index 66037af237a1..c435bb57d749 100755 --- a/tests/qemu-iotests/285 +++ b/tests/qemu-iotests/285 @@ -101,6 +101,23 @@ $QEMU_IMG snapshot -l snap "$TEST_IMG" $QEMU_IMG info "$TEST_IMG" | _filter_img_info --format-specific \ | _filter_date | _filter_vmstate_size +echo +echo "=== qemu-img check ===" +echo + +_make_test_img 32M +$QEMU_IO -c 'w -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io +# Image should be clean +_check_test_img +# Manually corrupt the image by setting the bit +$PYTHON qcow2.py "$TEST_IMG" set-feature-bit autoclear 2 +# check should detect the problem +_check_test_img +# repair should fix it +_check_test_img -r all +# the image should be clean again +_check_test_img + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/285.out b/tests/qemu-iotests/285.out index e43ff9906b5f..b28c9e266bf6 100644 --- a/tests/qemu-iotests/285.out +++ b/tests/qemu-iotests/285.out @@ -254,4 +254,24 @@ Format specific information: lazy refcounts: false refcount bits: 16 corrupt: false + +=== qemu-img check === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +ERROR: all zero bit set but L2 table at offset 0x40000 contains non-zero cluster at entry 0 + +1 errors were found on the image. +Data may be corrupted, or further writes to the image may corrupt it. +Repairing: all zero bit set but L2 table at offset 0x40000 contains non-zero cluster at entry 0 +The following inconsistencies were found and repaired: + + 0 leaked clusters + 1 corruptions + +Double checking the fixed image now... +No errors were found on the image. +No errors were found on the image. *** done -- 2.24.1