Am 29.08.2013 13:36, schrieb Kevin Wolf:
Am 28.08.2013 um 16:55 hat Max Reitz geschrieben:
The qcow2_check_refcounts function has been extended to be able to fix
OFLAG_COPIED errors and multiple references on refcount blocks.
If no corruptions remain after an image repair (and no errors have been
encountered), clear the corrupt flag in qcow2_check.
Signed-off-by: Max Reitz <mre...@redhat.com>
This would be easier to review if you kept the code changes of the
actual check (mostly code movement, I guess) and the repair of each type
of errors in separate patches.
block/qcow2-cluster.c | 4 +-
block/qcow2-refcount.c | 249 ++++++++++++++++++++++++++++++++++++++++++-------
block/qcow2.c | 6 +-
block/qcow2.h | 1 +
include/block/block.h | 1 +
5 files changed, 222 insertions(+), 39 deletions(-)
@@ -1240,7 +1243,8 @@ int qcow2_check_refcounts(BlockDriverState *bs,
BdrvCheckResult *res,
{
BDRVQcowState *s = bs->opaque;
int64_t size, i, highest_cluster;
- int nb_clusters, refcount1, refcount2;
+ uint64_t *l2_table = NULL;
+ int nb_clusters, refcount1, refcount2, j;
QCowSnapshot *sn;
uint16_t *refcount_table;
int ret;
@@ -1305,10 +1309,85 @@ int qcow2_check_refcounts(BlockDriverState *bs,
BdrvCheckResult *res,
inc_refcounts(bs, res, refcount_table, nb_clusters,
offset, s->cluster_size);
if (refcount_table[cluster] != 1) {
- fprintf(stderr, "ERROR refcount block %" PRId64
+ fprintf(stderr, "%s refcount block %" PRId64
" refcount=%d\n",
+ fix & BDRV_FIX_ERRORS ? "Repairing" :
+ "ERROR",
i, refcount_table[cluster]);
- res->corruptions++;
+ if (fix & BDRV_FIX_ERRORS) {
This is a quite long if block. Probably worth splitting into its own
function. (The res->corruptions++ part could be in a common fail: part
then.)
Seems reasonable.
+ int64_t new_offset;
+ void *refcount_block;
+
+ /* allocate new refcount block */
+ new_offset = qcow2_alloc_clusters(bs, s->cluster_size);
How trustworthy is qcow2_alloc_clusters() when we know that our refcount
information is corrupted? Couldn't we end up accidentally overwriting
things?
I personally don't really see any other way to do this than to just fail
without any attempt on repairing. If the refcounts are completely
broken, I don't see a reasonable way of allocating a free cluster.
Furthermore, the following pre-write check should at least prevent this
from overwriting any metadata.
+ if (new_offset < 0) {
+ fprintf(stderr, "Could not allocate new cluster\n");
+ res->corruptions++;
+ continue;
+ }
+ /* fetch current content */
+ ret = qcow2_cache_get(bs, s->refcount_block_cache, offset,
+ &refcount_block);
+ if (ret < 0) {
+ fprintf(stderr, "Could not fetch refcount block\n");
strerror(-ret) would be useful information in almost all of the error
cases.
Yes, right.
+ qcow2_free_clusters(bs, new_offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ res->corruptions++;
+ continue;
+ }
+ /* new block has not yet been entered into refcount table,
+ * therefore it is no refcount block yet (regarding this
+ * check) */
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ new_offset, s->cluster_sectors * BDRV_SECTOR_SIZE);
+ if (ret < 0) {
+ fprintf(stderr, "Could not write refcount block (would
"
+ "overlap with existing metadata)\n");
+ /* the image will be marked corrupt here, so don't even
+ * attempt on freeing the cluster */
+ res->corruptions++;
+ goto fail;
+ }
+ /* write to new block */
+ ret = bdrv_write(bs->file, new_offset >> BDRV_SECTOR_BITS,
+ refcount_block, s->cluster_sectors);
+ if (ret < 0) {
+ fprintf(stderr, "Could not write refcount block\n");
+ qcow2_free_clusters(bs, new_offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ res->corruptions++;
+ continue;
+ }
+ /* update refcount table */
+ assert(!(new_offset & (s->cluster_size - 1)));
+ s->refcount_table[i] = new_offset;
+ ret = write_reftable_entry(bs, i);
+ if (ret < 0) {
+ fprintf(stderr, "Could not update refcount table\n");
+ s->refcount_table[i] = offset;
+ qcow2_free_clusters(bs, new_offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ res->corruptions++;
+ continue;
+ }
+ qcow2_cache_put(bs, s->refcount_block_cache,
+ &refcount_block);
+ /* update refcounts */
+ if ((new_offset >> s->cluster_bits) >= nb_clusters) {
+ /* increase refcount_table size if necessary */
+ int old_nb_clusters = nb_clusters;
+ nb_clusters = (new_offset >> s->cluster_bits) + 1;
+ refcount_table = g_realloc(refcount_table,
+ nb_clusters * sizeof(uint16_t));
+ memset(&refcount_table[old_nb_clusters], 0,
(nb_clusters
+ - old_nb_clusters) * sizeof(uint16_t));
+ }
+ refcount_table[cluster]--;
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ new_offset, s->cluster_size);
res->corruptions_fixed++?
Oh, yes, I forgot that.
+ } else {
+ res->corruptions++;
+ }
}
}
}
@@ -1364,10 +1443,108 @@ int qcow2_check_refcounts(BlockDriverState *bs,
BdrvCheckResult *res,
}
}
+ l2_table = g_malloc(s->l2_size * sizeof(uint64_t));
qemu_blockalign() is better than g_malloc() because it avoids using a
bounce buffer for O_DIRECT images.
Ah, okay. I'll include that in my other series as well.
+
+ /* check OFLAG_COPIED */
+ for (i = 0; i < s->l1_size; i++) {
+ uint64_t l1_entry = s->l1_table[i];
+ uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
+ bool l2_dirty = false;
+ int refcount;
+
+ if (!l2_offset) {
+ continue;
+ }
+
+ refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
+ if (refcount < 0) {
+ /* don't print message nor increment check_errors, since the above
+ * loop will have done this already */
+ continue;
+ }
+ if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
+ fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_entry=%" PRIx64
+ " refcount=%d\n",
+ fix & BDRV_FIX_ERRORS ? "Repairing" :
+ "ERROR",
+ l1_entry, refcount);
+ if (fix & BDRV_FIX_ERRORS) {
+ s->l1_table[i] = refcount == 1
+ ? l1_entry | QCOW_OFLAG_COPIED
+ : l1_entry & ~QCOW_OFLAG_COPIED;
+ ret = qcow2_write_l1_entry(bs, i);
+ if (ret < 0) {
+ res->check_errors++;
+ goto fail;
+ }
else res->corruptions_fixed++?
+ } else {
+ res->corruptions++;
+ }
+ }
+
+ ret = bdrv_pread(bs->file, l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t));
+ if (ret != s->l2_size * sizeof(uint64_t)) {
+ fprintf(stderr, "ERROR: Could not read L2 table\n");
+ res->check_errors++;
+ if (ret >= 0) {
+ ret = -EIO;
+ }
Doesn't happen. You can just check ret < 0 instead of ret != ... in the
first place, bdrv_pread() never returns short reads.
Okay, I think I just saw it that way in some other code.
+ goto fail;
+ }
+
+ for (j = 0; j < s->l2_size; j++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[j]);
+ uint64_t data_offset;
+
+ if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_NORMAL) {
+ continue;
+ }
+
+ data_offset = l2_entry & L2E_OFFSET_MASK;
+
+ refcount = get_refcount(bs, data_offset >> s->cluster_bits);
+ if (refcount < 0) {
+ /* don't print message nor increment check_errors */
Why?
I didn't want to duplicate the whole comment from above, but maybe it
makes sense if it's not obvious:
/* don't print message nor increment check_errors, since the above
* loop will have done this already */
+ continue;
+ }
+ if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
+ fprintf(stderr, "%s OFLAG_COPIED data cluster: l2_entry=%"
+ PRIx64 " refcount=%d\n",
+ fix & BDRV_FIX_ERRORS ? "Repairing" :
+ "ERROR",
+ l2_entry, refcount);
+ if (fix & BDRV_FIX_ERRORS) {
+ l2_table[j] = cpu_to_be64(refcount == 1
+ ? l2_entry | QCOW_OFLAG_COPIED
+ : l2_entry & ~QCOW_OFLAG_COPIED);
+ l2_dirty = true;
res->corruptions_fixed++
+ } else {
+ res->corruptions++;
+ }
+ }
+ }
+
+ if (l2_dirty) {
+ ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t));
+ if (ret != s->l2_size * sizeof(uint64_t)) {
+ fprintf(stderr, "ERROR: Could not write L2 table\n");
+ res->check_errors++;
+ if (ret >= 0) {
+ ret = -EIO;
+ }
bdrv_pwrite() also doesn't return short writes.
+ goto fail;
+ }
+ }
+ }
+
+
res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
ret = 0;
fail:
+ g_free(l2_table);
g_free(refcount_table);
return ret;
Kevin
Max