As raw file format does not support backing_file and copy on write feature, so I add COW to it to support backing_file option. I store dirty bitmap in an add-cow file. When executed, it looks like this: qemu-img create -f add-cow -o backing_file=ubuntu.img,image_file=test.img test.add-cow qemu -drive if=virtio,file=test.add-cow -m 1024
(test.img is a raw format file; test.add-cow stores bitmap) Signed-off-by: Dong Xu Wang <wdon...@linux.vnet.ibm.com> --- Makefile.objs | 1 + block.c | 83 ++++++++++- block.h | 2 + block/add-cow.c | 456 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ block_int.h | 6 + qemu-img.c | 10 ++ 6 files changed, 555 insertions(+), 3 deletions(-) create mode 100644 block/add-cow.c diff --git a/Makefile.objs b/Makefile.objs index 26b885b..1402f9f 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -31,6 +31,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o +block-nested-y += add-cow.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o diff --git a/block.c b/block.c index a8c789a..c797cfc 100644 --- a/block.c +++ b/block.c @@ -369,7 +369,7 @@ static int find_image_format(const char *filename, BlockDriver **pdrv) { int ret, score, score_max; BlockDriver *drv1, *drv; - uint8_t buf[2048]; + uint8_t buf[4096]; BlockDriverState *bs; ret = bdrv_file_open(&bs, filename, 0); @@ -657,6 +657,10 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, int back_flags; BlockDriver *back_drv = NULL; + char imaging_filename[PATH_MAX]; + int cow_flags; + BlockDriver *cow_drv = NULL; + bs->backing_hd = bdrv_new(""); if (path_has_protocol(bs->backing_file)) { @@ -686,6 +690,30 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, /* base image inherits from "parent" */ bs->backing_hd->keep_read_only = bs->keep_read_only; } + + /* If there is a image_file, must be together with backing_file */ + if (bs->image_file[0] != '\0') { + bs->image_hd = bdrv_new(""); + if (path_has_protocol(bs->image_file)) { + pstrcpy(imaging_filename, sizeof(imaging_filename), + bs->image_file); + } else { + path_combine(imaging_filename, sizeof(imaging_filename), + filename, bs->image_file); + } + + cow_drv = bdrv_find_format("add-cow"); + + cow_flags = + (flags & (~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING))) | BDRV_O_RDWR; + bs->image_hd->keep_read_only = 0; + + ret = bdrv_open(bs->image_hd, imaging_filename, cow_flags, back_drv); + if (ret < 0) { + bdrv_close(bs); + return ret; + } + } } if (!bdrv_key_required(bs)) { @@ -711,6 +739,10 @@ void bdrv_close(BlockDriverState *bs) bdrv_delete(bs->backing_hd); bs->backing_hd = NULL; } + if (bs->image_hd) { + bdrv_delete(bs->image_hd); + bs->image_hd = NULL; + } bs->drv->bdrv_close(bs); g_free(bs->opaque); #ifdef _WIN32 @@ -851,7 +883,7 @@ int bdrv_commit(BlockDriverState *bs) if (!drv) return -ENOMEDIUM; - + if (!bs->backing_hd) { return -ENOTSUP; } @@ -2024,6 +2056,16 @@ void bdrv_get_backing_filename(BlockDriverState *bs, } } +void bdrv_get_image_filename(BlockDriverState *bs, + char *filename, int filename_size) +{ + if (!bs->image_file) { + pstrcpy(filename, filename_size, ""); + } else { + pstrcpy(filename, filename_size, bs->image_file); + } +} + int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { @@ -3201,8 +3243,10 @@ int bdrv_img_create(const char *filename, const char *fmt, QEMUOptionParameter *param = NULL, *create_options = NULL; QEMUOptionParameter *backing_fmt, *backing_file, *size; BlockDriverState *bs = NULL; - BlockDriver *drv, *proto_drv; + BlockDriver *drv, *proto_drv, *cow_drv;; BlockDriver *backing_drv = NULL; + QEMUOptionParameter *cow_create_options = NULL; + QEMUOptionParameter *image_file; int ret = 0; /* Find driver and parse its options */ @@ -3225,10 +3269,16 @@ int bdrv_img_create(const char *filename, const char *fmt, create_options = append_option_parameters(create_options, proto_drv->create_options); + /* Just support raw format now*/ + cow_drv = bdrv_find_format("raw"); + cow_create_options = append_option_parameters(cow_create_options, + cow_drv->create_options); + /* Create parameter list with default values */ param = parse_option_parameters("", create_options, param); set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size); + set_option_parameter_int(cow_create_options, BLOCK_OPT_SIZE, img_size); /* Parse -o options */ if (options) { @@ -3269,6 +3319,25 @@ int bdrv_img_create(const char *filename, const char *fmt, } } + image_file = get_option_parameter(param, BLOCK_OPT_IMAGE_FILE); + if (image_file && image_file->value.s) { + if (!strcmp(filename, image_file->value.s)) { + error_report("Error: Trying to create an cow file with the " + "same filename as the backing file"); + ret = -EINVAL; + goto out; + } + + if (backing_file && backing_file->value.s) { + if (!strcmp(image_file->value.s, backing_file->value.s)) { + error_report("Error: Trying to create an cow file with the " + "same filename as the backing file"); + ret = -EINVAL; + goto out; + } + } + } + backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT); if (backing_fmt && backing_fmt->value.s) { backing_drv = bdrv_find_format(backing_fmt->value.s); @@ -3300,6 +3369,7 @@ int bdrv_img_create(const char *filename, const char *fmt, snprintf(buf, sizeof(buf), "%" PRId64, size); set_option_parameter(param, BLOCK_OPT_SIZE, buf); + set_option_parameter(cow_create_options, BLOCK_OPT_SIZE, buf); } else { error_report("Image creation needs a size parameter"); ret = -EINVAL; @@ -3326,9 +3396,16 @@ int bdrv_img_create(const char *filename, const char *fmt, } } + if (!strcmp(fmt, "add-cow") && image_file && image_file->value.s) { + printf("Formatting '%s', fmt= raw ", image_file->value.s); + print_option_parameters(cow_create_options); + puts(""); + ret = bdrv_create(cow_drv, image_file->value.s, cow_create_options); + } out: free_option_parameters(create_options); free_option_parameters(param); + free_option_parameters(cow_create_options); if (bs) { bdrv_delete(bs); diff --git a/block.h b/block.h index 8ec409f..e55ce01 100644 --- a/block.h +++ b/block.h @@ -228,6 +228,8 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size); +void bdrv_get_image_filename(BlockDriverState *bs, + char *filename, int filename_size); int bdrv_can_snapshot(BlockDriverState *bs); int bdrv_is_snapshot(BlockDriverState *bs); BlockDriverState *bdrv_snapshots(void); diff --git a/block/add-cow.c b/block/add-cow.c new file mode 100644 index 0000000..f4b67e5 --- /dev/null +++ b/block/add-cow.c @@ -0,0 +1,456 @@ +#include "qemu-common.h" +#include "block_int.h" +#include "module.h" + +#define ADD_COW_MAGIC (((uint64_t)'A' << 56) | ((uint64_t)'D' << 48) | \ + ((uint64_t)'D' << 40) | ((uint64_t)'_' << 32) | \ + ((uint64_t)'C' << 24) | ((uint64_t)'O' << 16) | \ + ((uint64_t)'W' << 8) | 0xFF) +#define ADD_COW_VERSION 1 + +struct add_cow_header { + uint64_t magic; + uint32_t version; + char backing_file[1024]; + char image_file[1024]; + uint64_t size; + uint32_t sectorsize; +} add_cow_header; + +typedef struct BDRVAddCowState { + CoMutex lock; + CoMutex bitmap_lock; +} BDRVAddCowState; + +typedef struct AddCowAIOCB { + BlockDriverAIOCB common; + int64_t sector_num; + QEMUIOVector *qiov; + int remaining_sectors; + int cur_nr_sectors; + uint64_t bytes_done; + bool is_write; + QEMUIOVector hd_qiov; + QEMUBH *bh; + +} AddCowAIOCB; + +static int add_cow_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const struct add_cow_header *add_cow_header = (const void *)buf; + + if (buf_size >= sizeof(struct add_cow_header) && + be64_to_cpu(add_cow_header->magic) == ADD_COW_MAGIC && + be32_to_cpu(add_cow_header->version) == ADD_COW_VERSION) { + return 100; + } else { + return 0; + } +} + +static int add_cow_open(BlockDriverState *bs, int flags) +{ + struct add_cow_header add_cow_header; + int64_t size; + + if (bdrv_pread(bs->file, 0, &add_cow_header, sizeof(add_cow_header)) != + sizeof(add_cow_header)) { + goto fail; + } + + if (be64_to_cpu(add_cow_header.magic) != ADD_COW_MAGIC || + be32_to_cpu(add_cow_header.version) != ADD_COW_VERSION) { + goto fail; + } + + size = be64_to_cpu(add_cow_header.size); + bs->total_sectors = size / 512; + + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + add_cow_header.backing_file); + pstrcpy(bs->image_file, sizeof(bs->image_file), + add_cow_header.image_file); + + bs->bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(add_cow_header); + if (!bs->bitmap) { + bs->bitmap = g_malloc0(bs->bitmap_size); + } + if (bdrv_pread(bs->file, sizeof(add_cow_header), bs->bitmap, \ + bs->bitmap_size) != bs->bitmap_size) { + goto fail; + } + + return 0; + fail: + if (bs->bitmap) { + g_free(bs->bitmap); + bs->bitmap = NULL; + } + return -1; +} + +static inline int add_cow_set_bit(BlockDriverState *bs, int64_t bitnum) +{ + uint64_t offset = sizeof(struct add_cow_header) + bitnum / 8; + uint8_t bitmap; + + qemu_co_mutex_lock(&(((BDRVAddCowState *)(bs->opaque))->bitmap_lock)); + bitmap = (bs->bitmap[offset]) |= (1 << (bitnum % 8)); + qemu_co_mutex_unlock(&((BDRVAddCowState *)(bs->opaque))->bitmap_lock); + + return 0; +} + +static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum) +{ + qemu_co_mutex_lock(&((BDRVAddCowState *)(bs->opaque))->bitmap_lock); + uint64_t offset = sizeof(struct add_cow_header) + bitnum / 8; + qemu_co_mutex_unlock(&((BDRVAddCowState *)(bs->opaque))->bitmap_lock); + + return !!(bs->bitmap[offset] & (1 << (bitnum % 8))); +} + +static int add_cow_is_allocated(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *num_same) +{ + int changed; + + if (nb_sectors == 0) { + *num_same = nb_sectors; + return 0; + } + + changed = is_bit_set(bs, sector_num); + if (changed < 0) { + return 0; + } + + for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) { + if (is_bit_set(bs, sector_num + *num_same) != changed) { + break; + } + } + + return changed; +} + +static int add_cow_update_bitmap(BlockDriverState *bs, int64_t sector_num, + int nb_sectors) +{ + int error = 0; + int i; + int ret; + for (i = 0; i < nb_sectors; i++) { + error = add_cow_set_bit(bs, sector_num + i); + if (error) { + break; + } + } + ret = bdrv_pwrite_sync(bs->file, sizeof(struct add_cow_header), \ + bs->bitmap, bs->bitmap_size); + if (ret < 0) { + return ret; + } + return error; +} + +static void add_cow_close(BlockDriverState *bs) +{ + if (bs->bitmap) { + g_free(bs->bitmap); + bs->bitmap = NULL; + } +} + +static int add_cow_create(const char *filename, QEMUOptionParameter *options) +{ + struct add_cow_header add_cow_header; + int64_t image_sectors = 0; + const char *backing_filename = NULL; + const char *image_filename = NULL; + int ret; + BlockDriverState *bs; + + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + image_sectors = options->value.n / 512; + } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { + backing_filename = options->value.s; + } else if (!strcmp(options->name, BLOCK_OPT_IMAGE_FILE)) { + image_filename = options->value.s; + } + options++; + } + if (!backing_filename || !image_filename) { + fprintf(stderr, " backing_file and image_file can not be empty!\n"); + return -EINVAL; + } + ret = bdrv_create_file(filename, NULL); + if (ret < 0) { + return ret; + } + + ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR); + if (ret < 0) { + return ret; + } + + memset(&add_cow_header, 0, sizeof(add_cow_header)); + add_cow_header.magic = cpu_to_be64(ADD_COW_MAGIC); + add_cow_header.version = cpu_to_be32(ADD_COW_VERSION); + pstrcpy(add_cow_header.backing_file, \ + sizeof(add_cow_header.backing_file), backing_filename); + pstrcpy(add_cow_header.image_file, sizeof(add_cow_header.image_file), + image_filename); + + add_cow_header.sectorsize = cpu_to_be32(512); + add_cow_header.size = cpu_to_be64(image_sectors * 512); + + ret = bdrv_pwrite(bs, 0, &add_cow_header, sizeof(add_cow_header)); + if (ret < 0) { + return ret; + } + bdrv_close(bs); + + ret = bdrv_create_file(image_filename, NULL); + if (ret < 0) { + return ret; + } + + BlockDriver *drv = bdrv_find_format("add-cow"); + assert(drv != NULL); + ret = bdrv_open(bs, filename, BDRV_O_RDWR | BDRV_O_NO_FLUSH, drv); + if (ret < 0) { + return ret; + } + + ret = bdrv_truncate(bs, ((image_sectors + 7) >> 3)); + if (ret < 0) { + return ret; + } + return ret; +} + +static void add_cow_aio_cancel(BlockDriverAIOCB *blockacb) +{ + AddCowAIOCB *acb = container_of(blockacb, AddCowAIOCB, common); + qemu_aio_release(acb); +} + +static AIOPool add_cow_aio_pool = { + .aiocb_size = sizeof(AddCowAIOCB), + .cancel = add_cow_aio_cancel, +}; + +static AddCowAIOCB *add_cow_aio_setup(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + int is_write) +{ + AddCowAIOCB *acb; + acb = qemu_aio_get(&add_cow_aio_pool, bs, NULL, NULL); + if (!acb) { + return NULL; + } + acb->sector_num = sector_num; + acb->qiov = qiov; + acb->is_write = is_write; + qemu_iovec_init(&acb->hd_qiov, qiov->niov); + acb->bytes_done = 0; + acb->remaining_sectors = nb_sectors; + acb->cur_nr_sectors = 0; + return acb; +} + +static int add_cow_aio_read_cb(void *opaque) +{ + AddCowAIOCB *acb = opaque; + BlockDriverState *bs = acb->common.bs; + BDRVAddCowState *s = bs->opaque; + int ret; + int n; + acb->remaining_sectors -= acb->cur_nr_sectors; + acb->sector_num += acb->cur_nr_sectors; + acb->bytes_done += acb->cur_nr_sectors * 512; + + if (acb->remaining_sectors == 0) { + return 0; + } + acb->cur_nr_sectors = acb->remaining_sectors; + if (add_cow_is_allocated(bs, acb->sector_num, acb->cur_nr_sectors, &n)) { + acb->cur_nr_sectors = n; + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, + acb->cur_nr_sectors * 512); + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_readv(bs->image_hd, acb->sector_num, + n, &acb->hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + return -EIO; + } + + return 1; + } else { + acb->cur_nr_sectors = n; + if (bs->backing_hd) { + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, + acb->cur_nr_sectors * 512); + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_readv(bs->backing_hd, acb->sector_num, + n, &acb->hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + return -EIO; + } + return 1; + } else { + qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors); + return 1; + } + } + + return 1; +} + +static int add_cow_aio_write_cb(void *opaque) +{ + AddCowAIOCB *acb = opaque; + BlockDriverState *bs = acb->common.bs; + BDRVAddCowState *s = bs->opaque; + int ret = 0; + + acb->remaining_sectors -= acb->cur_nr_sectors; + acb->sector_num += acb->cur_nr_sectors; + acb->bytes_done += acb->cur_nr_sectors * 512; + + if (acb->remaining_sectors == 0) { + return 0; + } + + acb->cur_nr_sectors = acb->remaining_sectors; + + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, + acb->cur_nr_sectors * 512); + + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_writev(bs->image_hd, + acb->sector_num, + acb->cur_nr_sectors, &acb->hd_qiov); + qemu_co_mutex_lock(&s->lock); + + if (ret < 0) { + return ret; + } + return 1; +} + +static int add_cow_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + BDRVAddCowState *s = bs->opaque; + AddCowAIOCB *acb; + int ret; + + acb = add_cow_aio_setup(bs, sector_num, qiov, nb_sectors, 0); + qemu_co_mutex_lock(&s->lock); + do { + ret = add_cow_aio_read_cb(acb); + } while (ret > 0); + qemu_co_mutex_unlock(&s->lock); + + qemu_iovec_destroy(&acb->hd_qiov); + qemu_aio_release(acb); + return ret; +} + +static int add_cow_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + BDRVAddCowState *s = bs->opaque; + AddCowAIOCB *acb; + int ret; + + acb = add_cow_aio_setup(bs, sector_num, qiov, nb_sectors, 1); + + qemu_co_mutex_lock(&s->lock); + do { + ret = add_cow_aio_write_cb(acb); + } while (ret > 0); + qemu_co_mutex_unlock(&s->lock); + if (ret == 0) { + add_cow_update_bitmap(bs, sector_num, nb_sectors); + } + + qemu_iovec_destroy(&acb->hd_qiov); + qemu_aio_release(acb); + + return ret; +} + +static int bdrv_add_cow_truncate(BlockDriverState *bs, int64_t offset) +{ + int ret; + ret = bdrv_truncate(bs->file, offset + sizeof(add_cow_header)); + if (ret < 0) { + return ret; + } + + return 0; +} + +static QEMUOptionParameter add_cow_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_BACKING_FILE, + .type = OPT_STRING, + .help = "File name of a base image" + }, + { + .name = BLOCK_OPT_IMAGE_FILE, + .type = OPT_STRING, + .help = "File name of a image file" + }, + { NULL } +}; + +static int add_cow_flush(BlockDriverState *bs) +{ + return bdrv_flush(bs->file); +} + +static BlockDriverAIOCB *add_cow_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return bdrv_aio_flush(bs->file, cb, opaque); +} + +static BlockDriver bdrv_add_cow = { + .format_name = "add-cow", + .instance_size = sizeof(BDRVAddCowState), + .bdrv_probe = add_cow_probe, + .bdrv_open = add_cow_open, + .bdrv_close = add_cow_close, + .bdrv_create = add_cow_create, + .bdrv_is_allocated = add_cow_is_allocated, + + .bdrv_co_readv = add_cow_co_readv, + .bdrv_co_writev = add_cow_co_writev, + .bdrv_truncate = bdrv_add_cow_truncate, + + .create_options = add_cow_create_options, + .bdrv_flush = add_cow_flush, + .bdrv_aio_flush = add_cow_aio_flush, +}; + +static void bdrv_add_cow_init(void) +{ + bdrv_register(&bdrv_add_cow); +} + +block_init(bdrv_add_cow_init); diff --git a/block_int.h b/block_int.h index 5dc0074..1acb2ad 100644 --- a/block_int.h +++ b/block_int.h @@ -42,6 +42,7 @@ #define BLOCK_OPT_TABLE_SIZE "table_size" #define BLOCK_OPT_PREALLOC "preallocation" #define BLOCK_OPT_SUBFMT "subformat" +#define BLOCK_OPT_IMAGE_FILE "image_file" typedef struct AIOPool { void (*cancel)(BlockDriverAIOCB *acb); @@ -208,6 +209,11 @@ struct BlockDriverState { int in_use; /* users other than guest access, eg. block migration */ QTAILQ_ENTRY(BlockDriverState) list; void *private; + + char image_file[1024]; + BlockDriverState *image_hd; + uint8_t *bitmap; + uint64_t bitmap_size; }; struct BlockDriverAIOCB { diff --git a/qemu-img.c b/qemu-img.c index 6a39731..0c3f1c2 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1093,6 +1093,8 @@ static int img_info(int argc, char **argv) int64_t allocated_size; char backing_filename[1024]; char backing_filename2[1024]; + char image_filename[1024]; + char image_filename2[1024]; BlockDriverInfo bdi; fmt = NULL; @@ -1153,6 +1155,14 @@ static int img_info(int argc, char **argv) backing_filename, backing_filename2); } + bdrv_get_image_filename(bs, image_filename, sizeof(image_filename)); + if (image_filename[0] != '\0') { + path_combine(image_filename2, sizeof(image_filename2), + filename, image_filename); + printf("image file: %s (actual path: %s)\n", + image_filename, + image_filename2); + } dump_snapshots(bs); bdrv_delete(bs); return 0; -- 1.7.4.1