On Thu, Dec 29, 2011 at 05:36:59PM +0800, Dong Xu Wang wrote: Some comments on everything but the I/O path, which I haven't reviewed yet:
> diff --git a/block/add-cow.c b/block/add-cow.c > new file mode 100644 > index 0000000..95af5b7 > --- /dev/null > +++ b/block/add-cow.c > @@ -0,0 +1,429 @@ Missing GPL or LGPL license header. > +#include "qemu-common.h" > +#include "block_int.h" > +#include "module.h" > + > +#define ADD_COW_MAGIC (((uint64_t)'A' << 56) | ((uint64_t)'D' << 48) | > \ > + ((uint64_t)'D' << 40) | ((uint64_t)'_' << 32) | \ > + ((uint64_t)'C' << 24) | ((uint64_t)'O' << 16) | \ > + ((uint64_t)'W' << 8) | 0xFF) > +#define ADD_COW_VERSION 1 > +#define ADD_COW_FILE_LEN 1024 > + > +typedef struct AddCowHeader { > + uint64_t magic; > + uint32_t version; > + char backing_file[ADD_COW_FILE_LEN]; > + char image_file[ADD_COW_FILE_LEN]; > + uint64_t size; > + char reserved[492]; > +} QEMU_PACKED AddCowHeader; > + > +typedef struct BDRVAddCowState { > + char image_file[ADD_COW_FILE_LEN]; Why is this field needed? > + BlockDriverState *image_hd; > + uint8_t *bitmap; > + uint64_t bitmap_size; > + CoMutex lock; > +} BDRVAddCowState; > + > +static int add_cow_probe(const uint8_t *buf, int buf_size, const char > *filename) > +{ > + const AddCowHeader *header = (const void *)buf; > + > + if (be64_to_cpu(header->magic) == ADD_COW_MAGIC && > + be32_to_cpu(header->version) == ADD_COW_VERSION) { > + return 100; > + } else { > + return 0; > + } > +} > + > +static int add_cow_open(BlockDriverState *bs, int flags) > +{ > + AddCowHeader header; > + int64_t size; > + char image_filename[ADD_COW_FILE_LEN]; > + BlockDriver *image_drv = NULL; > + int ret; > + BDRVAddCowState *s = bs->opaque; > + BlockDriverState *backing_bs = NULL; > + > + ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); > + if (ret != sizeof(header)) { > + goto fail; > + } > + > + if (be64_to_cpu(header.magic) != ADD_COW_MAGIC) { > + ret = -EINVAL; > + goto fail; > + } > + if (be32_to_cpu(header.version) != ADD_COW_VERSION) { > + char version[64]; > + snprintf(version, sizeof(version), "ADD-COW version %d", > header.version); > + qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, > + bs->device_name, "add-cow", version); > + ret = -ENOTSUP; > + goto fail; > + } > + > + size = be64_to_cpu(header.size); > + bs->total_sectors = size / BDRV_SECTOR_SIZE; > + > + QEMU_BUILD_BUG_ON(sizeof(bs->backing_file) != > sizeof(header.backing_file)); > + QEMU_BUILD_BUG_ON(sizeof(s->image_file) != sizeof(header.image_file)); > + pstrcpy(bs->backing_file, sizeof(bs->backing_file), > + header.backing_file); > + pstrcpy(s->image_file, sizeof(s->image_file), > + header.image_file); This assumes that header.backing_file and header.image_file is NUL-terminated. If the file happened to have the magic number and version but does not include '\0' bytes in the header.backing_file then we may crash here when trying to read beyond the end of the header struct. Image format code should be robust. Please use strncpy(3) carefully instead of pstrcpy(). Also please update the file format specification to either make these fields NUL-terminated or NUL-terminated unless the length is 1024 characters (in which case there is no NUL but the string still ends). > + > + s->bitmap_size = ((bs->total_sectors + 7) >> 3); > + s->bitmap = qemu_blockalign(bs, s->bitmap_size); > + > + ret = bdrv_pread(bs->file, sizeof(header), s->bitmap, > + s->bitmap_size); > + if (ret != s->bitmap_size) { > + goto fail; > + } > + > + if (s->image_file[0] == '\0') { > + ret = -ENOENT; > + goto fail; > + } > + > + ret = bdrv_file_open(&backing_bs, bs->backing_file, 0); > + if (ret < 0) { > + return ret; > + } > + bdrv_delete(backing_bs); What does this do? (It leaks s->bitmap when it fails.) > + > + s->image_hd = bdrv_new(""); > + > + if (path_has_protocol(s->image_file)) { > + pstrcpy(image_filename, sizeof(image_filename), > + s->image_file); > + } else { > + path_combine(image_filename, sizeof(image_filename), > + bs->filename, s->image_file); > + } > + > + image_drv = bdrv_find_format("raw"); > + ret = bdrv_open(s->image_hd, image_filename, flags, image_drv); > + if (ret < 0) { > + bdrv_delete(s->image_hd); > + s->image_hd = NULL; > + goto fail; > + } TODO > + > + qemu_co_mutex_init(&s->lock); > + return 0; > + fail: > + g_free(s->bitmap); > + return ret; > +} > + > +static inline void add_cow_set_bit(BlockDriverState *bs, int64_t bitnum) > +{ > + uint64_t offset = bitnum >> 3; > + BDRVAddCowState *s = bs->opaque; > + s->bitmap[offset] |= (1 << (bitnum % 8)); > +} > + > +static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum) > +{ > + BDRVAddCowState *s = bs->opaque; > + uint64_t offset = bitnum >> 3; > + return !!(s->bitmap[offset] & (1 << (bitnum % 8))); > +} Please use bool instead of int. Then you can also drop the !!. > + > +static coroutine_fn int add_cow_is_allocated(BlockDriverState *bs, > + int64_t sector_num, int nb_sectors, int *num_same) > +{ > + int changed; > + BDRVAddCowState *s = bs->opaque; > + > + /* Beyond the end of bitmap, return error or read from backing_file? */ > + if (((sector_num + nb_sectors + 7) >> 3) > s->bitmap_size) { > + return 0; > + } If the bitmap size is tied to bs->total_sectors then you do not need this check since bdrv_co_is_allocated() already handles this case (and sets *num_same to 0). > + > + if (nb_sectors == 0) { > + *num_same = nb_sectors; > + return 0; > + } > + > + changed = is_bit_set(bs, sector_num); > + for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) { > + if (is_bit_set(bs, sector_num + *num_same) != changed) { > + break; > + } > + } > + > + return changed; > +} > + > +static int add_cow_update_bitmap(BlockDriverState *bs, int64_t sector_num, > + int nb_sectors) > +{ > + int i, ret = 0; > + bool changed = false; > + BDRVAddCowState *s = bs->opaque; > + uint64_t start_pos = (sector_num >> 3) & (~511); > + uint64_t end_pos = (((sector_num + nb_sectors - 1) >> 3) + 511) & > (~511); > + uint64_t sectors_to_write = MIN(end_pos - start_pos, > + s->bitmap_size - start_pos); > + > + if (start_pos > s->bitmap_size) { > + return 0; > + } > + > + for (i = 0; i < nb_sectors; i++) { > + if (!changed) { > + changed = true; > + } > + add_cow_set_bit(bs, sector_num + i); > + } > + > + if (changed) { > + ret = bdrv_pwrite(bs->file, sizeof(AddCowHeader) + start_pos, > + s->bitmap + start_pos, sectors_to_write); > + } > + return ret; > +} > + > +static void add_cow_close(BlockDriverState *bs) > +{ > + BDRVAddCowState *s = bs->opaque; > + g_free(s->bitmap); We also need to free s->image_hd. > diff --git a/docs/specs/add-cow.txt b/docs/specs/add-cow.txt > new file mode 100644 > index 0000000..33f358e > --- /dev/null > +++ b/docs/specs/add-cow.txt > @@ -0,0 +1,72 @@ > +== General == > + > +Raw file format does not support backing_file and copy on write feature. Then > +you can use add-cow file to implement these features. I suggest replacing the second sentence with these: "The add-cow image format makes it possible to use backing files with raw image by keeping a separate .add-cow metadata file. Once all sectors have been written to in the raw image it is safe to discard the .add-cow and backing files and instead use the raw image directly." They describe what add-cow does and how it can be used in a little more detail. > + > +When using add-cow, procedures may like this: > +(ubuntu.img is a disk image which has been installed OS.) > + 1) Create a raw image with the same size of ubuntu.img > + qemu-img create -f raw test.raw 8G > + 2) Create a add-cow image which will store dirty bitmap > + qemu-img create -f add-cow test.add-cow -o > backing_file=ubuntu.img,image_file=test.raw > + 3) Run qemu with add-cow image > + qemu -drive if=virtio,file=test.add-cow > + > +While QEMU is running, virtual size of image_file and backing_file must be > the > +same. So if image_file does not have the same virtual size as backing_file's > in > +step 2), qemu-img will truncate it. This limitation is unnecessary. QCOW2 and QED will read zeroes if the image is larger than the backing file. Please implement the same behavior so that add-cow behaves consistently with how users know QCOW2 and QED work. > + > +=Specification= > + > +The file format looks like this: > + > + +---------------+--------------------------+ > + | Header | Data | > + +---------------+--------------------------+ > + > +All numbers in add-cow are stored in Big Endian byte order. New file formats and protocols often choose little endian because x86 and arm are very popular nowadays. It's an arbitrary decision but I just wanted to suggest it. > +== Header == > + > +The Header is included in the first bytes: > + > + Byte 0 - 7: magic > + add-cow magic string ("ADD_COW\xff") > + > + 8 - 11: version > + Version number (only valid value is 1 now) > + > + 12 - 1035: backing_file > + backing_file file name related to add-cow file. While > + using backing_file, must together with image_file. > All > + unused bytes are padded with zeros. "While using backing_file, must together with image_file" What does this mean? > + > + 1036 - 2059: image_file > + image_file is a raw file, While using image_file, > must > + together with image_file. All unused bytes are padded "While using image_file, must together with image_file" What does this mean? > + with zeros. > + > + 2060 - 2067: size > + Virtual disk size of image_file in bytes. Why is a field required to store the virtual disk size? The image_file size should always tell us the virtual disk image. > + > + 2068 - 2559: The Reserved field is used to make sure Data field > starts > + at the multiple of 512, not used currently. All > bytes are > + filled with 0. > + > +== Data == > + > +The Data field starts at the 2560th byte, stores a bitmap related to > backing_file > +and image_file. The bitmap will track whether the sector in backing_file is > dirty > +or not. > + > + > +Each bit in the bitmap indicates one sector's status. So the size of bitmap > is > +calculated according to virtual size of backing_file. In each byte, bit 0 to > 7 > +will track the 1st to 7th sector in sequence, bit orders in one byte look > like: > + +----+----+----+----+----+----+----+----+ > + | b7 | b6 | b5 | b4 | b3 | b2 | b1 | b0 | > + +----+----+----+----+----+----+----+----+ > + > +If the bit is 0, indicates the sector has not been allocated in image_file, > data > +should be loaded from backing_file while reading; if the bit is 1, > indicates the > +related sector has been dirty, should be loaded from image_file while > reading. Perhaps also add the following so we define both read and write semantics: Writing to a sector causes the corresponding bit to be set to 1.