Signed-off-by: Benoit Canet <ben...@irqsave.net> --- block/qcow2-dedup.c | 325 ++++++++++++++++++++++++++++++++++++++++++++++++++- block/qcow2.h | 5 + 2 files changed, 329 insertions(+), 1 deletion(-)
diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c index 5901749..a424af8 100644 --- a/block/qcow2-dedup.c +++ b/block/qcow2-dedup.c @@ -29,6 +29,12 @@ #include "qemu-common.h" #include "qcow2.h" +static int qcow2_dedup_read_write_hash(BlockDriverState *bs, + QCowHash *hash, + uint64_t *first_logical_sect, + uint64_t physical_sect, + bool write); + /* * Prepare a buffer containing all the required data required to compute cluster * sized deduplication hashes. @@ -291,7 +297,11 @@ static int qcow2_clear_l2_copied_flag_if_needed(BlockDriverState *bs, /* remember that we dont't need to clear QCOW_OFLAG_COPIED again */ hash_node->first_logical_sect &= first_logical_sect; - return 0; + /* clear the QCOW_FLAG_FIRST flag from disk */ + return qcow2_dedup_read_write_hash(bs, &hash_node->hash, + &hash_node->first_logical_sect, + hash_node->physical_sect, + true); } /* This function deduplicate a cluster @@ -553,3 +563,316 @@ exit: return deduped_clusters_nr * s->cluster_sectors - begining_index; } + + +/* Create a deduplication table hash block, write it's offset to disk and + * reference it in the RAM deduplication table + * + * sync this to disk and get the dedup cluster cache entry + * + * @index: index in the RAM deduplication table + * @ret: offset on success, negative on error + */ +static uint64_t qcow2_create_block(BlockDriverState *bs, + int32_t index) +{ + BDRVQcowState *s = bs->opaque; + int64_t offset; + uint64_t data64; + int ret = 0; + + /* allocate a new dedup table hash block */ + offset = qcow2_alloc_clusters(bs, s->hash_block_size); + + if (offset < 0) { + return offset; + } + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + goto free_fail; + } + + /* write the new block offset in the dedup table L1 */ + data64 = cpu_to_be64(offset); + ret = bdrv_pwrite_sync(bs->file, + s->dedup_table_offset + + index * sizeof(uint64_t), + &data64, sizeof(data64)); + + if (ret < 0) { + goto free_fail; + } + + s->dedup_table[index] = offset; + + return offset; + +free_fail: + qcow2_free_clusters(bs, offset, s->hash_block_size); + return ret; +} + +static int qcow2_create_and_get_block(BlockDriverState *bs, + uint32_t index, + uint8_t **block) +{ + BDRVQcowState *s = bs->opaque; + int ret = 0; + int64_t offset; + + offset = qcow2_create_block(bs, index); + + if (offset < 0) { + return offset; + } + + + /* get an empty cluster from the dedup cache */ + ret = qcow2_cache_get_empty(bs, s->dedup_cluster_cache, + offset, + (void **) block); + + if (ret < 0) { + return ret; + } + + /* clear it */ + memset(*block, 0, s->hash_block_size); + + return 0; +} + +static inline bool qcow2_has_dedup_block(BlockDriverState *bs, + uint32_t index) +{ + BDRVQcowState *s = bs->opaque; + return s->dedup_table[index] == 0 ? false : true; +} + +static inline void qcow2_write_hash_to_block_and_dirty(BlockDriverState *bs, + uint8_t *block, + QCowHash *hash, + int offset, + uint64_t *logical_sect) +{ + BDRVQcowState *s = bs->opaque; + uint64_t first; + first = cpu_to_be64(*logical_sect); + memcpy(block + offset, hash->data, HASH_LENGTH); + memcpy(block + offset + HASH_LENGTH, &first, 8); + qcow2_cache_entry_mark_dirty(s->dedup_cluster_cache, block); +} + +static inline uint64_t qcow2_read_hash_from_block(uint8_t *block, + QCowHash *hash, + int offset) +{ + uint64_t first; + memcpy(hash->data, block + offset, HASH_LENGTH); + memcpy(&first, block + offset + HASH_LENGTH, 8); + return be64_to_cpu(first); +} + +/* Read/write a given hash and cluster_sect from/to the dedup table + * + * This function doesn't flush the dedup cache to disk + * + * @hash: the hash to read or store + * @first_logical_sect: logical sector of the QCOW_FLAG_OCOPIED cluster + * @physical_sect: sector of the cluster in QCOW2 file (in sectors) + * @write: true to write, false to read + * @ret: 0 on succes, errno on error + */ +static int qcow2_dedup_read_write_hash(BlockDriverState *bs, + QCowHash *hash, + uint64_t *first_logical_sect, + uint64_t physical_sect, + bool write) +{ + BDRVQcowState *s = bs->opaque; + uint8_t *block = NULL; + int ret = 0; + int64_t cluster_number; + uint32_t index_in_dedup_table; + int offset_in_block; + int nb_hash_in_block = s->hash_block_size / (HASH_LENGTH + 8); + + cluster_number = physical_sect / s->cluster_sectors; + index_in_dedup_table = cluster_number / nb_hash_in_block; + + if (s->dedup_table_size <= index_in_dedup_table) { + return -ENOSPC; + } + + /* if we must read and there is nothing to read return a null hash */ + if (!qcow2_has_dedup_block(bs, index_in_dedup_table) && !write) { + memset(hash->data, 0, HASH_LENGTH); + *first_logical_sect = 0; + return 0; + } + + if (qcow2_has_dedup_block(bs, index_in_dedup_table)) { + ret = qcow2_cache_get(bs, + s->dedup_cluster_cache, + s->dedup_table[index_in_dedup_table], + (void **) &block); + } else { + ret = qcow2_create_and_get_block(bs, + index_in_dedup_table, + &block); + } + + if (ret < 0) { + return ret; + } + + offset_in_block = (cluster_number % nb_hash_in_block) * + (HASH_LENGTH + 8); + + if (write) { + qcow2_write_hash_to_block_and_dirty(bs, + block, + hash, + offset_in_block, + first_logical_sect); + } else { + *first_logical_sect = qcow2_read_hash_from_block(block, + hash, + offset_in_block); + } + + qcow2_cache_put(bs, s->dedup_cluster_cache, (void **) &block); + + return 0; +} + +static inline bool is_hash_node_empty(QCowHashNode *hash_node) +{ + return hash_node->physical_sect & QCOW_FLAG_EMPTY; +} + +static void qcow2_remove_hash_node(BlockDriverState *bs, + QCowHashNode *hash_node) +{ + BDRVQcowState *s = bs->opaque; + g_tree_remove(s->dedup_tree_by_sect, &hash_node->physical_sect); + g_tree_remove(s->dedup_tree_by_hash, &hash_node->hash); +} + +/* This function removes a hash_node from the trees given a physical sector + * + * @physical_sect: The physical sector of the cluster corresponding to the hash + */ +static void qcow2_remove_hash_node_by_sector(BlockDriverState *bs, + uint64_t physical_sect) +{ + BDRVQcowState *s = bs->opaque; + QCowHashNode *hash_node; + + hash_node = g_tree_lookup(s->dedup_tree_by_sect, &physical_sect); + + if (!hash_node) { + return; + } + + qcow2_remove_hash_node(bs, hash_node); +} + +/* This function store a hash information to disk and RAM + * + * @hash: the QCowHash to process + * @logical_sect: the logical sector of the cluster seen by the guest + * @physical_sect: the physical sector of the stored cluster + * @ret: 0 on success, negative on error + */ +static int qcow2_store_hash(BlockDriverState *bs, + QCowHash *hash, + uint64_t logical_sect, + uint64_t physical_sect) +{ + BDRVQcowState *s = bs->opaque; + QCowHashNode *hash_node; + + hash_node = g_tree_lookup(s->dedup_tree_by_hash, hash); + + /* no hash node found for this hash */ + if (!hash_node) { + return 0; + } + + /* the hash node information are already completed */ + if (!is_hash_node_empty(hash_node)) { + return 0; + } + + /* Remember that this QCowHashNoderepresent the first occurence of the + * cluste so we will be able to clear QCOW_OFLAG_COPIED from the L2 table + * entry when refcount will go > 1. + */ + logical_sect = logical_sect | QCOW_FLAG_FIRST; + + /* remove stale hash node pointing to this physical sector from the trees */ + qcow2_remove_hash_node_by_sector(bs, physical_sect); + + /* fill the missing fields of the hash node */ + hash_node->physical_sect = physical_sect; + hash_node->first_logical_sect = logical_sect; + + /* insert the hash node in the second tree: it's already in the first one */ + g_tree_insert(s->dedup_tree_by_sect, &hash_node->physical_sect, hash_node); + + /* write the hash to disk */ + return qcow2_dedup_read_write_hash(bs, + hash, + &logical_sect, + physical_sect, + true); +} + +/* This function store the hashes of the clusters which are not duplicated + * + * @ds: The deduplication state + * @count: the number of dedup hash to process + * @logical_sect: logical offset of the first cluster (in sectors) + * @physical_sect: offset of the first cluster (in sectors) + * @ret: 0 on succes, errno on error + */ +int qcow2_dedup_store_new_hashes(BlockDriverState *bs, + QCowDedupState *ds, + int count, + uint64_t logical_sect, + uint64_t physical_sect) +{ + int ret = 0; + int i = 0; + BDRVQcowState *s = bs->opaque; + QCowHashElement *dedup_hash, *next_dedup_hash; + + /* round values on cluster boundaries for easier cluster deletion */ + logical_sect = logical_sect & ~(s->cluster_sectors - 1); + physical_sect = physical_sect & ~(s->cluster_sectors - 1); + + QTAILQ_FOREACH_SAFE(dedup_hash, &ds->undedupables, next, next_dedup_hash) { + + ret = qcow2_store_hash(bs, + &dedup_hash->hash, + logical_sect + i * s->cluster_sectors, + physical_sect + i * s->cluster_sectors); + + QTAILQ_REMOVE(&ds->undedupables, dedup_hash, next); + g_free(dedup_hash); + + if (ret < 0) { + break; + } + + i++; + + if (i == count) { + break; + } + } + + return ret; +} diff --git a/block/qcow2.h b/block/qcow2.h index 11c3002..ea0c30e 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -471,5 +471,10 @@ int qcow2_dedup(BlockDriverState *bs, uint64_t sector_num, uint8_t *data, int data_nr); +int qcow2_dedup_store_new_hashes(BlockDriverState *bs, + QCowDedupState *ds, + int count, + uint64_t logical_sect, + uint64_t physical_sect); #endif -- 1.7.10.4