Re: [Qemu-devel] [PATCH v2 4/5] qcow2: More complete consistency check

Kevin Wolf Thu, 29 Aug 2013 04:38:05 -0700

Am 28.08.2013 um 16:55 hat Max Reitz geschrieben:
> The qcow2_check_refcounts function has been extended to be able to fix
> OFLAG_COPIED errors and multiple references on refcount blocks.
> 
> If no corruptions remain after an image repair (and no errors have been
> encountered), clear the corrupt flag in qcow2_check.
> 
> Signed-off-by: Max Reitz <mre...@redhat.com>


This would be easier to review if you kept the code changes of the
actual check (mostly code movement, I guess) and the repair of each type
of errors in separate patches.

>  block/qcow2-cluster.c  |   4 +-
>  block/qcow2-refcount.c | 249 
> ++++++++++++++++++++++++++++++++++++++++++-------
>  block/qcow2.c          |   6 +-
>  block/qcow2.h          |   1 +
>  include/block/block.h  |   1 +
>  5 files changed, 222 insertions(+), 39 deletions(-)

> @@ -1240,7 +1243,8 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
> BdrvCheckResult *res,
>  {
>      BDRVQcowState *s = bs->opaque;
>      int64_t size, i, highest_cluster;
> -    int nb_clusters, refcount1, refcount2;
> +    uint64_t *l2_table = NULL;
> +    int nb_clusters, refcount1, refcount2, j;
>      QCowSnapshot *sn;
>      uint16_t *refcount_table;
>      int ret;
> @@ -1305,10 +1309,85 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
> BdrvCheckResult *res,
>              inc_refcounts(bs, res, refcount_table, nb_clusters,
>                  offset, s->cluster_size);
>              if (refcount_table[cluster] != 1) {
> -                fprintf(stderr, "ERROR refcount block %" PRId64
> +                fprintf(stderr, "%s refcount block %" PRId64
>                      " refcount=%d\n",
> +                    fix & BDRV_FIX_ERRORS ? "Repairing" :
> +                                            "ERROR",
>                      i, refcount_table[cluster]);
> -                res->corruptions++;
> +                if (fix & BDRV_FIX_ERRORS) {

This is a quite long if block. Probably worth splitting into its own
function. (The res->corruptions++ part could be in a common fail: part
then.)

> +                    int64_t new_offset;
> +                    void *refcount_block;
> +
> +                    /* allocate new refcount block */
> +                    new_offset = qcow2_alloc_clusters(bs, s->cluster_size);

How trustworthy is qcow2_alloc_clusters() when we know that our refcount
information is corrupted? Couldn't we end up accidentally overwriting
things?

> +                    if (new_offset < 0) {
> +                        fprintf(stderr, "Could not allocate new cluster\n");
> +                        res->corruptions++;
> +                        continue;
> +                    }
> +                    /* fetch current content */
> +                    ret = qcow2_cache_get(bs, s->refcount_block_cache, 
> offset,
> +                            &refcount_block);
> +                    if (ret < 0) {
> +                        fprintf(stderr, "Could not fetch refcount block\n");

strerror(-ret) would be useful information in almost all of the error
cases.

> +                        qcow2_free_clusters(bs, new_offset, s->cluster_size,
> +                                QCOW2_DISCARD_ALWAYS);
> +                        res->corruptions++;
> +                        continue;
> +                    }
> +                    /* new block has not yet been entered into refcount 
> table,
> +                     * therefore it is no refcount block yet (regarding this
> +                     * check) */
> +                    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> +                            new_offset, s->cluster_sectors * 
> BDRV_SECTOR_SIZE);
> +                    if (ret < 0) {
> +                        fprintf(stderr, "Could not write refcount block 
> (would "
> +                                "overlap with existing metadata)\n");
> +                        /* the image will be marked corrupt here, so don't 
> even
> +                         * attempt on freeing the cluster */
> +                        res->corruptions++;
> +                        goto fail;
> +                    }
> +                    /* write to new block */
> +                    ret = bdrv_write(bs->file, new_offset >> 
> BDRV_SECTOR_BITS,
> +                            refcount_block, s->cluster_sectors);
> +                    if (ret < 0) {
> +                        fprintf(stderr, "Could not write refcount block\n");
> +                        qcow2_free_clusters(bs, new_offset, s->cluster_size,
> +                                QCOW2_DISCARD_ALWAYS);
> +                        res->corruptions++;
> +                        continue;
> +                    }
> +                    /* update refcount table */
> +                    assert(!(new_offset & (s->cluster_size - 1)));
> +                    s->refcount_table[i] = new_offset;
> +                    ret = write_reftable_entry(bs, i);
> +                    if (ret < 0) {
> +                        fprintf(stderr, "Could not update refcount table\n");
> +                        s->refcount_table[i] = offset;
> +                        qcow2_free_clusters(bs, new_offset, s->cluster_size,
> +                                QCOW2_DISCARD_ALWAYS);
> +                        res->corruptions++;
> +                        continue;
> +                    }
> +                    qcow2_cache_put(bs, s->refcount_block_cache,
> +                            &refcount_block);
> +                    /* update refcounts */
> +                    if ((new_offset >> s->cluster_bits) >= nb_clusters) {
> +                        /* increase refcount_table size if necessary */
> +                        int old_nb_clusters = nb_clusters;
> +                        nb_clusters = (new_offset >> s->cluster_bits) + 1;
> +                        refcount_table = g_realloc(refcount_table,
> +                                nb_clusters * sizeof(uint16_t));
> +                        memset(&refcount_table[old_nb_clusters], 0, 
> (nb_clusters
> +                                - old_nb_clusters) * sizeof(uint16_t));
> +                    }
> +                    refcount_table[cluster]--;
> +                    inc_refcounts(bs, res, refcount_table, nb_clusters,
> +                            new_offset, s->cluster_size);

res->corruptions_fixed++?

> +                } else {
> +                    res->corruptions++;
> +                }
>              }
>          }
>      }
> @@ -1364,10 +1443,108 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
> BdrvCheckResult *res,
>          }
>      }
>  
> +    l2_table = g_malloc(s->l2_size * sizeof(uint64_t));

qemu_blockalign() is better than g_malloc() because it avoids using a
bounce buffer for O_DIRECT images.

> +
> +    /* check OFLAG_COPIED */
> +    for (i = 0; i < s->l1_size; i++) {
> +        uint64_t l1_entry = s->l1_table[i];
> +        uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
> +        bool l2_dirty = false;
> +        int refcount;
> +
> +        if (!l2_offset) {
> +            continue;
> +        }
> +
> +        refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
> +        if (refcount < 0) {
> +            /* don't print message nor increment check_errors, since the 
> above
> +             * loop will have done this already */
> +            continue;
> +        }
> +        if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
> +            fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_entry=%" PRIx64
> +                    " refcount=%d\n",
> +                    fix & BDRV_FIX_ERRORS ? "Repairing" :
> +                                            "ERROR",
> +                    l1_entry, refcount);
> +            if (fix & BDRV_FIX_ERRORS) {
> +                s->l1_table[i] = refcount == 1
> +                               ? l1_entry |  QCOW_OFLAG_COPIED
> +                               : l1_entry & ~QCOW_OFLAG_COPIED;
> +                ret = qcow2_write_l1_entry(bs, i);
> +                if (ret < 0) {
> +                    res->check_errors++;
> +                    goto fail;
> +                }

else res->corruptions_fixed++?

> +            } else {
> +                res->corruptions++;
> +            }
> +        }
> +
> +        ret = bdrv_pread(bs->file, l2_offset, l2_table,
> +                s->l2_size * sizeof(uint64_t));
> +        if (ret != s->l2_size * sizeof(uint64_t)) {
> +            fprintf(stderr, "ERROR: Could not read L2 table\n");
> +            res->check_errors++;
> +            if (ret >= 0) {
> +                ret = -EIO;
> +            }

Doesn't happen. You can just check ret < 0 instead of ret != ... in the
first place, bdrv_pread() never returns short reads.

> +            goto fail;
> +        }
> +
> +        for (j = 0; j < s->l2_size; j++) {
> +            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
> +            uint64_t data_offset;
> +
> +            if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_NORMAL) {
> +                continue;
> +            }
> +
> +            data_offset = l2_entry & L2E_OFFSET_MASK;
> +
> +            refcount = get_refcount(bs, data_offset >> s->cluster_bits);
> +            if (refcount < 0) {
> +                /* don't print message nor increment check_errors */

Why?

> +                continue;
> +            }
> +            if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
> +                fprintf(stderr, "%s OFLAG_COPIED data cluster: l2_entry=%"
> +                        PRIx64 " refcount=%d\n",
> +                        fix & BDRV_FIX_ERRORS ? "Repairing" :
> +                                                "ERROR",
> +                        l2_entry, refcount);
> +                if (fix & BDRV_FIX_ERRORS) {
> +                    l2_table[j] = cpu_to_be64(refcount == 1
> +                                ? l2_entry |  QCOW_OFLAG_COPIED
> +                                : l2_entry & ~QCOW_OFLAG_COPIED);
> +                    l2_dirty = true;

res->corruptions_fixed++

> +                } else {
> +                    res->corruptions++;
> +                }
> +            }
> +        }
> +
> +        if (l2_dirty) {
> +            ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
> +                    s->l2_size * sizeof(uint64_t));
> +            if (ret != s->l2_size * sizeof(uint64_t)) {
> +                fprintf(stderr, "ERROR: Could not write L2 table\n");
> +                res->check_errors++;
> +                if (ret >= 0) {
> +                    ret = -EIO;
> +                }

bdrv_pwrite() also doesn't return short writes.

> +                goto fail;
> +            }
> +        }
> +    }
> +
> +
>      res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
>      ret = 0;
>  
>  fail:
> +    g_free(l2_table);
>      g_free(refcount_table);
>  
>      return ret;

Kevin

Re: [Qemu-devel] [PATCH v2 4/5] qcow2: More complete consistency check

Reply via email to