Am 28.08.2013 um 16:55 hat Max Reitz geschrieben: > The qcow2_check_refcounts function has been extended to be able to fix > OFLAG_COPIED errors and multiple references on refcount blocks. > > If no corruptions remain after an image repair (and no errors have been > encountered), clear the corrupt flag in qcow2_check. > > Signed-off-by: Max Reitz <mre...@redhat.com>
This would be easier to review if you kept the code changes of the actual check (mostly code movement, I guess) and the repair of each type of errors in separate patches. > block/qcow2-cluster.c | 4 +- > block/qcow2-refcount.c | 249 > ++++++++++++++++++++++++++++++++++++++++++------- > block/qcow2.c | 6 +- > block/qcow2.h | 1 + > include/block/block.h | 1 + > 5 files changed, 222 insertions(+), 39 deletions(-) > @@ -1240,7 +1243,8 @@ int qcow2_check_refcounts(BlockDriverState *bs, > BdrvCheckResult *res, > { > BDRVQcowState *s = bs->opaque; > int64_t size, i, highest_cluster; > - int nb_clusters, refcount1, refcount2; > + uint64_t *l2_table = NULL; > + int nb_clusters, refcount1, refcount2, j; > QCowSnapshot *sn; > uint16_t *refcount_table; > int ret; > @@ -1305,10 +1309,85 @@ int qcow2_check_refcounts(BlockDriverState *bs, > BdrvCheckResult *res, > inc_refcounts(bs, res, refcount_table, nb_clusters, > offset, s->cluster_size); > if (refcount_table[cluster] != 1) { > - fprintf(stderr, "ERROR refcount block %" PRId64 > + fprintf(stderr, "%s refcount block %" PRId64 > " refcount=%d\n", > + fix & BDRV_FIX_ERRORS ? "Repairing" : > + "ERROR", > i, refcount_table[cluster]); > - res->corruptions++; > + if (fix & BDRV_FIX_ERRORS) { This is a quite long if block. Probably worth splitting into its own function. (The res->corruptions++ part could be in a common fail: part then.) > + int64_t new_offset; > + void *refcount_block; > + > + /* allocate new refcount block */ > + new_offset = qcow2_alloc_clusters(bs, s->cluster_size); How trustworthy is qcow2_alloc_clusters() when we know that our refcount information is corrupted? Couldn't we end up accidentally overwriting things? > + if (new_offset < 0) { > + fprintf(stderr, "Could not allocate new cluster\n"); > + res->corruptions++; > + continue; > + } > + /* fetch current content */ > + ret = qcow2_cache_get(bs, s->refcount_block_cache, > offset, > + &refcount_block); > + if (ret < 0) { > + fprintf(stderr, "Could not fetch refcount block\n"); strerror(-ret) would be useful information in almost all of the error cases. > + qcow2_free_clusters(bs, new_offset, s->cluster_size, > + QCOW2_DISCARD_ALWAYS); > + res->corruptions++; > + continue; > + } > + /* new block has not yet been entered into refcount > table, > + * therefore it is no refcount block yet (regarding this > + * check) */ > + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, > + new_offset, s->cluster_sectors * > BDRV_SECTOR_SIZE); > + if (ret < 0) { > + fprintf(stderr, "Could not write refcount block > (would " > + "overlap with existing metadata)\n"); > + /* the image will be marked corrupt here, so don't > even > + * attempt on freeing the cluster */ > + res->corruptions++; > + goto fail; > + } > + /* write to new block */ > + ret = bdrv_write(bs->file, new_offset >> > BDRV_SECTOR_BITS, > + refcount_block, s->cluster_sectors); > + if (ret < 0) { > + fprintf(stderr, "Could not write refcount block\n"); > + qcow2_free_clusters(bs, new_offset, s->cluster_size, > + QCOW2_DISCARD_ALWAYS); > + res->corruptions++; > + continue; > + } > + /* update refcount table */ > + assert(!(new_offset & (s->cluster_size - 1))); > + s->refcount_table[i] = new_offset; > + ret = write_reftable_entry(bs, i); > + if (ret < 0) { > + fprintf(stderr, "Could not update refcount table\n"); > + s->refcount_table[i] = offset; > + qcow2_free_clusters(bs, new_offset, s->cluster_size, > + QCOW2_DISCARD_ALWAYS); > + res->corruptions++; > + continue; > + } > + qcow2_cache_put(bs, s->refcount_block_cache, > + &refcount_block); > + /* update refcounts */ > + if ((new_offset >> s->cluster_bits) >= nb_clusters) { > + /* increase refcount_table size if necessary */ > + int old_nb_clusters = nb_clusters; > + nb_clusters = (new_offset >> s->cluster_bits) + 1; > + refcount_table = g_realloc(refcount_table, > + nb_clusters * sizeof(uint16_t)); > + memset(&refcount_table[old_nb_clusters], 0, > (nb_clusters > + - old_nb_clusters) * sizeof(uint16_t)); > + } > + refcount_table[cluster]--; > + inc_refcounts(bs, res, refcount_table, nb_clusters, > + new_offset, s->cluster_size); res->corruptions_fixed++? > + } else { > + res->corruptions++; > + } > } > } > } > @@ -1364,10 +1443,108 @@ int qcow2_check_refcounts(BlockDriverState *bs, > BdrvCheckResult *res, > } > } > > + l2_table = g_malloc(s->l2_size * sizeof(uint64_t)); qemu_blockalign() is better than g_malloc() because it avoids using a bounce buffer for O_DIRECT images. > + > + /* check OFLAG_COPIED */ > + for (i = 0; i < s->l1_size; i++) { > + uint64_t l1_entry = s->l1_table[i]; > + uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK; > + bool l2_dirty = false; > + int refcount; > + > + if (!l2_offset) { > + continue; > + } > + > + refcount = get_refcount(bs, l2_offset >> s->cluster_bits); > + if (refcount < 0) { > + /* don't print message nor increment check_errors, since the > above > + * loop will have done this already */ > + continue; > + } > + if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) { > + fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_entry=%" PRIx64 > + " refcount=%d\n", > + fix & BDRV_FIX_ERRORS ? "Repairing" : > + "ERROR", > + l1_entry, refcount); > + if (fix & BDRV_FIX_ERRORS) { > + s->l1_table[i] = refcount == 1 > + ? l1_entry | QCOW_OFLAG_COPIED > + : l1_entry & ~QCOW_OFLAG_COPIED; > + ret = qcow2_write_l1_entry(bs, i); > + if (ret < 0) { > + res->check_errors++; > + goto fail; > + } else res->corruptions_fixed++? > + } else { > + res->corruptions++; > + } > + } > + > + ret = bdrv_pread(bs->file, l2_offset, l2_table, > + s->l2_size * sizeof(uint64_t)); > + if (ret != s->l2_size * sizeof(uint64_t)) { > + fprintf(stderr, "ERROR: Could not read L2 table\n"); > + res->check_errors++; > + if (ret >= 0) { > + ret = -EIO; > + } Doesn't happen. You can just check ret < 0 instead of ret != ... in the first place, bdrv_pread() never returns short reads. > + goto fail; > + } > + > + for (j = 0; j < s->l2_size; j++) { > + uint64_t l2_entry = be64_to_cpu(l2_table[j]); > + uint64_t data_offset; > + > + if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_NORMAL) { > + continue; > + } > + > + data_offset = l2_entry & L2E_OFFSET_MASK; > + > + refcount = get_refcount(bs, data_offset >> s->cluster_bits); > + if (refcount < 0) { > + /* don't print message nor increment check_errors */ Why? > + continue; > + } > + if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) { > + fprintf(stderr, "%s OFLAG_COPIED data cluster: l2_entry=%" > + PRIx64 " refcount=%d\n", > + fix & BDRV_FIX_ERRORS ? "Repairing" : > + "ERROR", > + l2_entry, refcount); > + if (fix & BDRV_FIX_ERRORS) { > + l2_table[j] = cpu_to_be64(refcount == 1 > + ? l2_entry | QCOW_OFLAG_COPIED > + : l2_entry & ~QCOW_OFLAG_COPIED); > + l2_dirty = true; res->corruptions_fixed++ > + } else { > + res->corruptions++; > + } > + } > + } > + > + if (l2_dirty) { > + ret = bdrv_pwrite(bs->file, l2_offset, l2_table, > + s->l2_size * sizeof(uint64_t)); > + if (ret != s->l2_size * sizeof(uint64_t)) { > + fprintf(stderr, "ERROR: Could not write L2 table\n"); > + res->check_errors++; > + if (ret >= 0) { > + ret = -EIO; > + } bdrv_pwrite() also doesn't return short writes. > + goto fail; > + } > + } > + } > + > + > res->image_end_offset = (highest_cluster + 1) * s->cluster_size; > ret = 0; > > fail: > + g_free(l2_table); > g_free(refcount_table); > > return ret; Kevin