Am 23.04.2013 um 16:24 hat Jeff Cody geschrieben: > This is the initial block driver framework for VHDX image support ( > i.e. Hyper-V image file formats), that supports opening VHDX files, and > parsing the headers. > > This commit does not yet enable: > - reading > - writing > - updating the header > - differencing files (images with parents) > - log replay / dirty logs (only clean images) > > This is based on Microsoft's VHDX specification: > "VHDX Format Specification v0.95", published 4/12/2012 > https://www.microsoft.com/en-us/download/details.aspx?id=29681 > > Signed-off-by: Jeff Cody <jc...@redhat.com>
Okay, I need more time actually reading the spec and comparing this code to it before I can really say anything about the logic (I hope I'll get to it tomorrow), but for a start, let me point out the obvious things. > --- > block/Makefile.objs | 1 + > block/vhdx.c | 769 > ++++++++++++++++++++++++++++++++++++++++++++++++++++ > block/vhdx.h | 25 ++ > 3 files changed, 795 insertions(+) > create mode 100644 block/vhdx.c > > diff --git a/block/Makefile.objs b/block/Makefile.objs > index 6c4b5bc..5f0358a 100644 > --- a/block/Makefile.objs > +++ b/block/Makefile.objs > @@ -2,6 +2,7 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o > bochs.o vpc.o vvfat > block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o > qcow2-cache.o > block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o > block-obj-y += qed-check.o > +block-obj-y += vhdx.o > block-obj-y += parallels.o blkdebug.o blkverify.o > block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o > block-obj-$(CONFIG_POSIX) += raw-posix.o > diff --git a/block/vhdx.c b/block/vhdx.c > new file mode 100644 > index 0000000..b0ea2ba > --- /dev/null > +++ b/block/vhdx.c > @@ -0,0 +1,769 @@ > +/* > + * Block driver for Hyper-V VHDX Images > + * > + * Copyright (c) 2013 Red Hat, Inc., > + * > + * Authors: > + * Jeff Cody <jc...@redhat.com> > + * > + * This is based on the "VHDX Format Specification v0.95", published > 4/12/2012 > + * by Microsoft: > + * https://www.microsoft.com/en-us/download/details.aspx?id=29681 > + * > + * This work is licensed under the terms of the GNU LGPL, version 2 or later. > + * See the COPYING.LIB file in the top-level directory. > + * > + */ > + > +#include "qemu-common.h" > +#include "block/block_int.h" > +#include "qemu/module.h" > +#include "qemu/crc32c.h" > +#include "block/vhdx.h" > + > + > +/* Several metadata and region table data entries are identified by > + * guids in a MS-specific GUID format. */ > + > + > +/* ------- Known Region Table GUIDs ---------------------- */ > +static const ms_guid bat_guid = { .data1 = 0x2dc27766, > + .data2 = 0xf623, > + .data3 = 0x4200, > + .data4 = { 0x9d, 0x64, 0x11, 0x5e, > + 0x9b, 0xfd, 0x4a, 0x08} }; > + > +static const ms_guid metadata_guid = { .data1 = 0x8b7ca206, > + .data2 = 0x4790, > + .data3 = 0x4b9a, > + .data4 = { 0xb8, 0xfe, 0x57, 0x5f, > + 0x05, 0x0f, 0x88, 0x6e} }; > + > + > + > +/* ------- Known Metadata Entry GUIDs ---------------------- */ > +static const ms_guid file_param_guid = { .data1 = 0xcaa16737, > + .data2 = 0xfa36, > + .data3 = 0x4d43, > + .data4 = { 0xb3, 0xb6, 0x33, 0xf0, > + 0xaa, 0x44, 0xe7, > 0x6b} }; > + > +static const ms_guid virtual_size_guid = { .data1 = 0x2FA54224, > + .data2 = 0xcd1b, > + .data3 = 0x4876, > + .data4 = { 0xb2, 0x11, 0x5d, 0xbe, > + 0xd8, 0x3b, 0xf4, > 0xb8} }; > + > +static const ms_guid page83_guid = { .data1 = 0xbeca12ab, > + .data2 = 0xb2e6, > + .data3 = 0x4523, > + .data4 = { 0x93, 0xef, 0xc3, 0x09, > + 0xe0, 0x00, 0xc7, > 0x46} }; > + > + > +static const ms_guid phys_sector_guid = { .data1 = 0xcda348c7, > + .data2 = 0x445d, > + .data3 = 0x4471, > + .data4 = { 0x9c, 0xc9, 0xe9, 0x88, > + 0x52, 0x51, 0xc5, > 0x56} }; > + > +static const ms_guid parent_locator_guid = { .data1 = 0xa8d35f2d, > + .data2 = 0xb30b, > + .data3 = 0x454d, > + .data4 = { 0xab, 0xf7, 0xd3, > + 0xd8, 0x48, 0x34, > + 0xab, 0x0c} }; > + > +static const ms_guid logical_sector_guid = { .data1 = 0x8141bf1d, > + .data2 = 0xa96f, > + .data3 = 0x4709, > + .data4 = { 0xba, 0x47, 0xf2, > + 0x33, 0xa8, 0xfa, > + 0xab, 0x5f} }; > + > +/* Each parent type must have a valid GUID; this is for parent images > + * of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would > + * need to make up our own QCOW2 GUID type */ > +static const ms_guid parent_vhdx_guid = { .data1 = 0xb04aefb7, > + .data2 = 0xd19e, > + .data3 = 0x4a81, > + .data4 = { 0xb7, 0x89, 0x25, 0xb8, > + 0xe9, 0x44, 0x59, 0x13} > }; > + > + > +#define META_FILE_PARAMETER_PRESENT 0x01 > +#define META_VIRTUAL_DISK_SIZE_PRESENT 0x02 > +#define META_PAGE_83_PRESENT 0x04 > +#define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08 > +#define META_PHYS_SECTOR_SIZE_PRESENT 0x10 > +#define META_PARENT_LOCATOR_PRESENT 0x20 > + > +#define META_ALL_PRESENT \ > + (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \ > + META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \ > + META_PHYS_SECTOR_SIZE_PRESENT) > + > +typedef struct vhdx_metadata_entries { > + vhdx_metadata_table_entry file_parameters_entry; > + vhdx_metadata_table_entry virtual_disk_size_entry; > + vhdx_metadata_table_entry page83_data_entry; > + vhdx_metadata_table_entry logical_sector_size_entry; > + vhdx_metadata_table_entry phys_sector_size_entry; > + vhdx_metadata_table_entry parent_locator_entry; > + uint16_t present; > +} vhdx_metadata_entries; Should everything before this line actually be in the header? > + > +typedef struct BDRVVHDXState { > + CoMutex lock; > + > + int curr_header; > + vhdx_header *headers[2]; > + > + vhdx_region_table_header rt; > + vhdx_region_table_entry bat_rt; /* region table for the BAT */ > + vhdx_region_table_entry metadata_rt; /* region table for the metadata > */ > + > + vhdx_metadata_table_header metadata_hdr; > + vhdx_metadata_entries metadata_entries; > + > + vhdx_file_parameters params; > + uint32_t block_size; > + uint32_t block_size_bits; > + uint32_t sectors_per_block; > + uint32_t sectors_per_block_bits; > + > + uint64_t virtual_disk_size; > + uint32_t logical_sector_size; > + uint32_t physical_sector_size; > + > + uint64_t chunk_ratio; > + uint32_t chunk_ratio_bits; > + uint32_t logical_sector_size_bits; > + > + uint32_t bat_entries; > + vhdx_bat_entry *bat; > + uint64_t bat_offset; > + > + vhdx_parent_locator_header parent_header; > + vhdx_parent_locator_entry *parent_entries; > + > +} BDRVVHDXState; > + > +uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, > + int crc_offset) > +{ > + uint32_t crc_new; > + uint32_t crc_orig; > + assert(buf != NULL); > + > + if (crc_offset > 0) { > + memcpy(&crc_orig, buf+crc_offset, sizeof(crc_orig)); Please add spaces before and after operators. I saw this a lot in this series. I'm relatively sure that you can use checkpatch.pl to get all instances pointed out. > + memset(buf+crc_offset, 0, sizeof(crc_orig)); > + } > + > + crc_new = crc32c(crc, buf, size); > + if (crc_offset > 0) { > + memcpy(buf+crc_offset, &crc_orig, sizeof(crc_orig)); > + } > + > + return crc_new; > +} > + > +/* Validates the checksum of the buffer, with an in-place CRC. > + * > + * Zero is substituted during crc calculation for the original crc field, > + * and the crc field is restored afterwards. But the buffer will be modifed > + * during the calculation, so this may not be not suitable for multi-threaded > + * use. > + * > + * crc_offset: byte offset in buf of the buffer crc > + * buf: buffer pointer > + * size: size of buffer (must be > crc_offset+4) > + * > + * returns true if checksum is valid, false otherwise > + */ > +bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset) > +{ > + uint32_t crc_orig; > + uint32_t crc; > + > + assert(buf != NULL); > + assert(size > (crc_offset+4)); > + > + memcpy(&crc_orig, buf+crc_offset, sizeof(crc_orig)); > + crc_orig = le32_to_cpu(crc_orig); > + > + crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset); > + > + return crc == crc_orig; > +} > + > + > +/* > + * Per the MS VHDX Specification, for every VHDX file: > + * - The header section is fixed size - 1 MB > + * - The header section is always the first "object" > + * - The first 64KB of the header is the File Identifier > + * - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile") > + * - The following 512 bytes constitute a UTF-16 string identifiying the > + * software that created the file, and is optional and diagnostic > only. > + * > + * Therefore, we probe by looking for the vhdxfile signature "vhdxfile" > + */ > +static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename) > +{ > + if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) { There's a VHDX_FILE_ID_MAGIC constant in the header. Don't you want to use it? > + return 100; > + } > + return 0; > +} > + > +/* All VHDX structures on disk are little endian */ > +static void vhdx_header_le_import(vhdx_header *h) > +{ > + assert(h != NULL); > + > + le32_to_cpus(&h->signature); > + le32_to_cpus(&h->checksum); > + le64_to_cpus(&h->sequence_number); > + > + leguid_to_cpus(&h->file_write_guid); > + leguid_to_cpus(&h->data_write_guid); > + leguid_to_cpus(&h->log_guid); > + > + le16_to_cpus(&h->log_version); > + le16_to_cpus(&h->version); > + le32_to_cpus(&h->log_length); > + le64_to_cpus(&h->log_offset); > +} > + > + > +/* opens the specified header block from the VHDX file header section */ > +static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s) > +{ > + int ret = 0; > + vhdx_header *header1; > + vhdx_header *header2; > + uint64_t h1_seq = 0; > + uint64_t h2_seq = 0; > + uint8_t *buffer; > + > + header1 = qemu_blockalign(bs, sizeof(vhdx_header)); > + header2 = qemu_blockalign(bs, sizeof(vhdx_header)); > + > + buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE); > + > + s->headers[0] = header1; > + s->headers[1] = header2; > + > + /* We have to read the whole VHDX_HEADER_SIZE instead of > + * sizeof(vhdx_header), because the checksum is over the whole > + * region */ > + ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, > VHDX_HEADER_SIZE); > + if (ret < 0) { > + goto fail; > + } > + /* copy over just the relevant portion that we need */ > + memcpy(header1, buffer, sizeof(vhdx_header)); > + vhdx_header_le_import(header1); > + > + if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) && > + header1->signature == VHDX_HDR_MAGIC) { > + h1_seq = header1->sequence_number; > + } > + > + ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, > VHDX_HEADER_SIZE); > + if (ret < 0) { > + goto fail; > + } > + /* copy over just the relevant portion that we need */ > + memcpy(header2, buffer, sizeof(vhdx_header)); > + vhdx_header_le_import(header2); > + > + if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) && > + header2->signature == VHDX_HDR_MAGIC) { > + h2_seq = header2->sequence_number; > + } > + > + if (h1_seq > h2_seq) { > + s->curr_header = 0; > + } else if (h2_seq > h1_seq) { > + s->curr_header = 1; > + } else { > + printf("NO VALID HEADER\n"); Make that an qerror_report() at least. > + ret = -EINVAL; > + goto fail; > + } > + > + ret = 0; > + > + goto exit; > + > +fail: > + qemu_vfree(header1); > + qemu_vfree(header2); > + s->headers[0] = NULL; > + s->headers[1] = NULL; > +exit: > + qemu_vfree(buffer); > + return ret; > +} > + > + > +static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) > +{ > + int ret = 0; > + uint8_t *buffer; > + int offset = 0; > + vhdx_region_table_entry rt_entry; > + int i; > + > + /* We have to read the whole 64KB block, because the crc32 is over the > + * whole block */ > + buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE); > + > + ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer, > + VHDX_HEADER_BLOCK_SIZE); > + if (ret < 0) { > + goto fail; > + } > + memcpy(&s->rt, buffer, sizeof(s->rt)); > + le32_to_cpus(&s->rt.signature); > + le32_to_cpus(&s->rt.checksum); > + le32_to_cpus(&s->rt.entry_count); > + le32_to_cpus(&s->rt.reserved); > + offset += sizeof(s->rt); > + > + if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) || > + s->rt.signature != VHDX_RT_MAGIC) { > + ret = -EINVAL; > + goto fail; > + } > + > + for (i = 0; i < s->rt.entry_count; i++) { > + memcpy(&rt_entry, buffer+offset, sizeof(rt_entry)); > + offset += sizeof(rt_entry); > + > + leguid_to_cpus(&rt_entry.guid); > + le64_to_cpus(&rt_entry.file_offset); > + le32_to_cpus(&rt_entry.length); > + le32_to_cpus(&rt_entry.data_bits); > + > + /* see if we recognize the entry */ > + if (guid_eq(rt_entry.guid, bat_guid)) { > + s->bat_rt = rt_entry; > + continue; > + } > + > + if (guid_eq(rt_entry.guid, metadata_guid)) { > + s->metadata_rt = rt_entry; > + continue; > + } > + > + if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) { > + /* cannot read vhdx file - required region table entry that > + * we do not understand. per spec, we must fail to open */ > + ret = -ENOTSUP; > + goto fail; > + } > + } > + ret = 0; > + > +fail: > + qemu_vfree(buffer); > + return ret; > +} > + > + > + > +/* Metadata initial parser > + * > + * This loads all the metadata entry fields. This may cause additional > + * fields to be processed (e.g. parent locator, etc..). > + * > + * There are 5 Metadata items that are always required: > + * - File Parameters (block size, has a parent) > + * - Virtual Disk Size (size, in bytes, of the virtual drive) > + * - Page 83 Data (scsi page 83 guid) > + * - Logical Sector Size (logical sector size in bytes, either 512 or > + * 4096. We only support 512 currently) > + * - Physical Sector Size (512 or 4096) > + * > + * Also, if the File Parameters indicate this is a differencing file, > + * we must also look for the Parent Locator metadata item. > + */ > +static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) > +{ > + int ret = 0; > + uint8_t *buffer; > + int offset = 0; > + int i = 0; > + uint32_t block_size, sectors_per_block, logical_sector_size; > + uint64_t chunk_ratio; > + vhdx_metadata_table_entry md_entry; > + > + buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE); > + > + ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer, > + VHDX_METADATA_TABLE_MAX_SIZE); > + if (ret < 0) { > + goto exit; > + } > + memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr)); > + offset += sizeof(s->metadata_hdr); > + > + le64_to_cpus(&s->metadata_hdr.signature); > + le16_to_cpus(&s->metadata_hdr.reserved); > + le16_to_cpus(&s->metadata_hdr.entry_count); > + > + if (s->metadata_hdr.signature != VHDX_METADATA_MAGIC) { > + ret = -EINVAL; > + goto exit; > + } > + > + s->metadata_entries.present = 0; > + > + if ((s->metadata_hdr.entry_count * sizeof(md_entry)) > > + (VHDX_METADATA_TABLE_MAX_SIZE - offset)) { > + ret = -EINVAL; > + goto exit; > + } > + > + for (i = 0; i < s->metadata_hdr.entry_count; i++) { > + memcpy(&md_entry, buffer+offset, sizeof(md_entry)); > + offset += sizeof(md_entry); > + > + leguid_to_cpus(&md_entry.item_id); > + le32_to_cpus(&md_entry.offset); > + le32_to_cpus(&md_entry.length); > + le32_to_cpus(&md_entry.data_bits); > + le32_to_cpus(&md_entry.reserved2); > + > + if (guid_eq(md_entry.item_id, file_param_guid)) { > + s->metadata_entries.file_parameters_entry = md_entry; > + s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT; > + continue; > + } > + > + if (guid_eq(md_entry.item_id, virtual_size_guid)) { > + s->metadata_entries.virtual_disk_size_entry = md_entry; > + s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT; > + continue; > + } > + > + if (guid_eq(md_entry.item_id, page83_guid)) { > + s->metadata_entries.page83_data_entry = md_entry; > + s->metadata_entries.present |= META_PAGE_83_PRESENT; > + continue; > + } > + > + if (guid_eq(md_entry.item_id, logical_sector_guid)) { > + s->metadata_entries.logical_sector_size_entry = md_entry; > + s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT; > + continue; > + } > + > + if (guid_eq(md_entry.item_id, phys_sector_guid)) { > + s->metadata_entries.phys_sector_size_entry = md_entry; > + s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT; > + continue; > + } > + > + if (guid_eq(md_entry.item_id, parent_locator_guid)) { > + s->metadata_entries.parent_locator_entry = md_entry; > + s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT; > + continue; > + } > + > + if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) { > + /* cannot read vhdx file - required region table entry that > + * we do not understand. per spec, we must fail to open */ > + ret = -ENOTSUP; > + goto exit; > + } > + } > + > + if (s->metadata_entries.present != META_ALL_PRESENT) { > + ret = -ENOTSUP; > + goto exit; > + } > + > + ret = bdrv_pread(bs->file, > + s->metadata_entries.file_parameters_entry.offset > + + s->metadata_rt.file_offset, > + &s->params, > + sizeof(s->params)); > + > + if (ret < 0) { > + goto exit; > + } > + > + le32_to_cpus(&s->params.block_size); > + le32_to_cpus(&s->params.data_bits); > + > + > + /* We now have the file parameters, so we can tell if this is a > + * differencing file (i.e.. has_parent), is dynamic or fixed > + * sized (leave_blocks_allocated), and the block size */ > + > + /* The parent locator required iff the file parameters has_parent set */ > + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { > + if (s->metadata_entries.present & ~META_PARENT_LOCATOR_PRESENT) { > + /* TODO: parse parent locator fields */ > + ret = -ENOTSUP; /* temp, until differencing files are supported > */ > + goto exit; > + } else { > + /* if has_parent is set, but there is not parent locator present, > + * then that is an invalid combination */ > + ret = -EINVAL; > + goto exit; > + } > + } > + > + /* determine virtual disk size, logical sector size, > + * and phys sector size */ > + > + ret = bdrv_pread(bs->file, > + s->metadata_entries.virtual_disk_size_entry.offset > + + s->metadata_rt.file_offset, > + &s->virtual_disk_size, > + sizeof(uint64_t)); > + if (ret < 0) { > + goto exit; > + } > + ret = bdrv_pread(bs->file, > + s->metadata_entries.logical_sector_size_entry.offset > + + s->metadata_rt.file_offset, > + &s->logical_sector_size, > + sizeof(uint32_t)); > + if (ret < 0) { > + goto exit; > + } > + ret = bdrv_pread(bs->file, > + s->metadata_entries.phys_sector_size_entry.offset > + + s->metadata_rt.file_offset, > + &s->physical_sector_size, > + sizeof(uint32_t)); > + if (ret < 0) { > + goto exit; > + } > + > + le64_to_cpus(&s->virtual_disk_size); > + le32_to_cpus(&s->logical_sector_size); > + le32_to_cpus(&s->physical_sector_size); > + > + if (s->logical_sector_size == 0 || s->params.block_size == 0) { > + ret = -EINVAL; > + goto exit; > + } > + > + /* both block_size and sector_size are guaranteed powers of 2 */ > + s->sectors_per_block = s->params.block_size / s->logical_sector_size; > + s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) * > + (uint64_t)s->logical_sector_size / > + (uint64_t)s->params.block_size; > + > + /* These values are ones we will want to use for division / > multiplication > + * later on, and they are all guaranteed (per the spec) to be powers of > 2, > + * so we can take advantage of that for shift operations during > + * reads/writes */ > + logical_sector_size = s->logical_sector_size; > + if (logical_sector_size & (logical_sector_size - 1)) { > + ret = -EINVAL; > + goto exit; > + } > + > + sectors_per_block = s->sectors_per_block; > + if (sectors_per_block & (sectors_per_block - 1)) { > + ret = -EINVAL; > + goto exit; > + } > + chunk_ratio = s->chunk_ratio; > + if (chunk_ratio & (chunk_ratio - 1)) { > + ret = -EINVAL; > + goto exit; > + } > + block_size = s->params.block_size; > + if (block_size & (block_size - 1)) { > + ret = -EINVAL; > + goto exit; > + } > + > + while (logical_sector_size >>= 1) { > + s->logical_sector_size_bits++; > + } > + while (sectors_per_block >>= 1) { > + s->sectors_per_block_bits++; > + } > + while (chunk_ratio >>= 1) { > + s->chunk_ratio_bits++; > + } > + while (block_size >>= 1) { > + s->block_size_bits++; > + } > + > + if (s->logical_sector_size != BDRV_SECTOR_SIZE) { > + printf("VHDX error - QEMU only supports 512 byte sector sizes\n"); > + ret = -ENOTSUP; > + goto exit; > + } > + > + ret = 0; > + > +exit: > + qemu_vfree(buffer); > + return ret; > +} > + > +/* Parse the replay log. Per the VHDX spec, if the log is present > + * it must be replayed prior to opening the file, even read-only. > + * > + * If read-only, we must replay the log in RAM (or refuse to open > + * a dirty VHDX file read-only */ > +static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s) > +{ > + int ret = 0; > + int i; > + vhdx_header *hdr; > + > + hdr = s->headers[s->curr_header]; > + > + /* either either the log guid, or log length is zero, > + * then a replay log is present */ > + for (i = 0; i < sizeof(hdr->log_guid.data4); i++) { > + ret |= hdr->log_guid.data4[i]; > + } > + if (hdr->log_guid.data1 == 0 && > + hdr->log_guid.data2 == 0 && > + hdr->log_guid.data3 == 0 && > + ret == 0) { > + goto exit; > + } > + > + /* per spec, only log version of 0 is supported */ > + if (hdr->log_version != 0) { > + ret = -EINVAL; > + goto exit; > + } > + > + if (hdr->log_length == 0) { > + goto exit; > + } > + > + /* We currently do not support images with logs to replay */ > + ret = -ENOTSUP; > + > +exit: > + return ret; > +} > + > + > +static int vhdx_open(BlockDriverState *bs, QDict *options, int flags) > +{ > + BDRVVHDXState *s = bs->opaque; > + int ret = 0; > + int i; > + > + s->bat = NULL; > + > + qemu_co_mutex_init(&s->lock); > + > + ret = vhdx_parse_header(bs, s); > + if (ret) { > + goto fail; > + } > + > + ret = vhdx_parse_log(bs, s); > + if (ret) { > + goto fail; > + } > + > + ret = vhdx_open_region_tables(bs, s); > + if (ret) { > + goto fail; > + } > + > + ret = vhdx_parse_metadata(bs, s); > + if (ret) { > + goto fail; > + } > + s->block_size = s->params.block_size; > + > + /* the VHDX spec dictates that virtual_disk_size is always a multiple of > + * logical_sector_size */ > + bs->total_sectors = s->virtual_disk_size / s->logical_sector_size; > + > + s->bat_offset = s->bat_rt.file_offset; > + s->bat_entries = s->bat_rt.length / sizeof(vhdx_bat_entry); > + s->bat = qemu_blockalign(bs, s->bat_rt.length); > + > + ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length); No error checking? > + for (i = 0; i < s->bat_entries; i++) { > + le64_to_cpus(&s->bat[i]); > + } > + > + if (flags & BDRV_O_RDWR) { > + ret = -ENOTSUP; > + goto fail; > + } > + > + /* TODO: differencing files, read, write */ > + > + return 0; > +fail: > + qemu_vfree(s->bat); This doesn't consider all the structs that were allocated by functions called in vhdx_open(). Here the same things should be freed as in vhdx_close(). > + return ret; > +} > + > +static int vhdx_reopen_prepare(BDRVReopenState *state, > + BlockReopenQueue *queue, Error **errp) > +{ > + return 0; > +} > + > + > +static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t > sector_num, > + int nb_sectors, QEMUIOVector *qiov) > +{ > + return -ENOTSUP; > +} > + > + > + > +static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t > sector_num, > + int nb_sectors, QEMUIOVector *qiov) > +{ > + return -ENOTSUP; > +} > + > + > +static void vhdx_close(BlockDriverState *bs) > +{ > + BDRVVHDXState *s = bs->opaque; > + > + qemu_vfree(s->headers[0]); > + qemu_vfree(s->headers[1]); > + qemu_vfree(s->bat); > + qemu_vfree(s->parent_entries); > +} > + > +static BlockDriver bdrv_vhdx = { > + .format_name = "vhdx", > + .instance_size = sizeof(BDRVVHDXState), Use the same alignment for = as below? > + .bdrv_probe = vhdx_probe, > + .bdrv_open = vhdx_open, > + .bdrv_close = vhdx_close, > + .bdrv_reopen_prepare = vhdx_reopen_prepare, > + .bdrv_co_readv = vhdx_co_readv, > + .bdrv_co_writev = vhdx_co_writev, > +}; Kevin