On Mon, Jun 05, 2023 at 06:41:07PM +0800, Sam Li wrote: > By adding zone operations and zoned metadata, the zoned emulation > capability enables full emulation support of zoned device using > a qcow2 file. The zoned device metadata includes zone type, > zoned device state and write pointer of each zone, which is stored > to an array of unsigned integers. > > Each zone of a zoned device makes state transitions following > the zone state machine. The zone state machine mainly describes > five states, IMPLICIT OPEN, EXPLICIT OPEN, FULL, EMPTY and CLOSED. > READ ONLY and OFFLINE states will generally be affected by device > internal events. The operations on zones cause corresponding state > changing. > > Zoned devices have a limit on zone resources, which puts constraints on > write operations into zones. > > Signed-off-by: Sam Li <faithilike...@gmail.com> > --- > block/qcow2.c | 629 +++++++++++++++++++++++++++++++++++++++++++++++++- > block/qcow2.h | 2 + > 2 files changed, 629 insertions(+), 2 deletions(-) > > diff --git a/block/qcow2.c b/block/qcow2.c > index b886dab42b..f030965d5d 100644 > --- a/block/qcow2.c > +++ b/block/qcow2.c > @@ -194,6 +194,164 @@ qcow2_extract_crypto_opts(QemuOpts *opts, const char > *fmt, Error **errp) > return cryptoopts_qdict; > } > > +#define QCOW2_ZT_IS_CONV(wp) (wp & 1ULL << 59) > + > +static inline int qcow2_get_wp(uint64_t wp) > +{ > + /* clear state and type information */ > + return ((wp << 5) >> 5); > +} > + > +static inline int qcow2_get_zs(uint64_t wp) > +{ > + return (wp >> 60); > +} > + > +static inline void qcow2_set_wp(uint64_t *wp, BlockZoneState zs) > +{ > + uint64_t addr = qcow2_get_wp(*wp); > + addr |= ((uint64_t)zs << 60); > + *wp = addr; > +} > + > +/* > + * File wp tracking: reset zone, finish zone and append zone can > + * change the value of write pointer. All zone operations will change > + * the state of that/those zone. > + * */ > +static inline void qcow2_wp_tracking_helper(int index, uint64_t wp) { > + /* format: operations, the wp. */ > + printf("wps[%d]: 0x%x\n", index, qcow2_get_wp(wp)>>BDRV_SECTOR_BITS); > +} > + > +/* > + * Perform a state assignment and a flush operation that writes the new wp > + * value to the dedicated location of the disk file. > + */ > +static int qcow2_write_wp_at(BlockDriverState *bs, uint64_t *wp, > + uint32_t index, BlockZoneState zs) { > + BDRVQcow2State *s = bs->opaque; > + int ret; > + > + qcow2_set_wp(wp, zs); > + ret = bdrv_pwrite(bs->file, s->zoned_header.zonedmeta_offset > + + sizeof(uint64_t) * index, sizeof(uint64_t), wp, 0); > + > + if (ret < 0) { > + goto exit; > + } > + qcow2_wp_tracking_helper(index, *wp); > + return ret; > + > +exit: > + error_report("Failed to write metadata with file"); > + return ret; > +} > + > +static int qcow2_check_active(BlockDriverState *bs) > +{ > + BDRVQcow2State *s = bs->opaque; > + > + if (!s->zoned_header.max_active_zones) { > + return 0; > + } > + > + if (s->nr_zones_exp_open + s->nr_zones_imp_open + s->nr_zones_closed > + < s->zoned_header.max_active_zones) { > + return 0; > + } > + > + return -1; > +} > + > +static int qcow2_check_open(BlockDriverState *bs) > +{ > + BDRVQcow2State *s = bs->opaque; > + int ret; > + > + if (!s->zoned_header.max_open_zones) { > + return 0; > + } > + > + if (s->nr_zones_exp_open + s->nr_zones_imp_open > + < s->zoned_header.max_open_zones) { > + return 0; > + } > + > + if(s->nr_zones_imp_open) { > + ret = qcow2_check_active(bs); > + if (ret == 0) { > + /* TODO: it takes O(n) time complexity (n = nr_zones). > + * Optimizations required. */ > + /* close one implicitly open zones to make it available */ > + for (int i = s->zoned_header.zone_nr_conv; > + i < bs->bl.nr_zones; ++i) { > + uint64_t *wp = &s->wps->wp[i]; > + if (qcow2_get_zs(*wp) == BLK_ZS_IOPEN) { > + ret = qcow2_write_wp_at(bs, wp, i, BLK_ZS_CLOSED); > + if (ret < 0) { > + return ret; > + } > + s->wps->wp[i] = *wp; > + s->nr_zones_imp_open--; > + s->nr_zones_closed++; > + break; > + } > + } > + return 0; > + } > + return ret; > + } > + > + return -1; > +} > + > +/* > + * The zoned device has limited zone resources of open, closed, active > + * zones. > + */ > +static int qcow2_check_zone_resources(BlockDriverState *bs, > + BlockZoneState zs) > +{ > + int ret; > + > + switch (zs) { > + case BLK_ZS_EMPTY: > + ret = qcow2_check_active(bs); > + if (ret < 0) { > + error_report("No enough active zones"); > + return ret; > + } > + return ret; > + case BLK_ZS_CLOSED: > + ret = qcow2_check_open(bs); > + if (ret < 0) { > + error_report("No enough open zones"); > + return ret; > + } > + return ret; > + default: > + return -EINVAL; > + } > + > +} > + > +static inline int qcow2_refresh_zonedmeta(BlockDriverState *bs) > +{ > + int ret; > + BDRVQcow2State *s = bs->opaque; > + uint64_t *temp = g_malloc(s->zoned_header.zonedmeta_size); > + ret = bdrv_pread(bs->file, s->zoned_header.zonedmeta_offset, > + s->zoned_header.zonedmeta_size, temp, 0); > + if (ret < 0) { > + error_report("Can not read metadata\n"); > + return ret; > + } > + > + memcpy(s->wps->wp, temp, s->zoned_header.zonedmeta_size); > + return 0; > +} > + > /* > * read qcow2 extension and fill bs > * start reading from start_offset > @@ -455,7 +613,19 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t > start_offset, > be32_to_cpu(zoned_ext.max_active_zones); > zoned_ext.max_append_sectors = > be32_to_cpu(zoned_ext.max_append_sectors); > + zoned_ext.zonedmeta_offset = > + be64_to_cpu(zoned_ext.zonedmeta_offset); > + zoned_ext.zonedmeta_size = be64_to_cpu(zoned_ext.zonedmeta_size); > s->zoned_header = zoned_ext; > + s->wps = g_malloc(sizeof(BlockZoneWps) > + + s->zoned_header.zonedmeta_size); > + ret = qcow2_refresh_zonedmeta(bs); > + if (ret < 0) { > + error_setg_errno(errp, -ret, "zonedmeta: " > + "Could not update zoned meta"); > + return ret; > + } > + qemu_co_mutex_init(&s->wps->colock); > > #ifdef DEBUG_EXT > printf("Qcow2: Got zoned format extension: " > @@ -1982,6 +2152,14 @@ static void qcow2_refresh_limits(BlockDriverState *bs, > Error **errp) > } > bs->bl.pwrite_zeroes_alignment = s->subcluster_size; > bs->bl.pdiscard_alignment = s->cluster_size; > + bs->bl.zoned = s->zoned_header.zoned; > + bs->bl.nr_zones = s->zoned_header.nr_zones; > + bs->wps = s->wps; > + bs->bl.max_append_sectors = s->zoned_header.max_append_sectors; > + bs->bl.max_active_zones = s->zoned_header.max_active_zones; > + bs->bl.max_open_zones = s->zoned_header.max_open_zones; > + bs->bl.zone_size = s->zoned_header.zone_size; > + bs->bl.write_granularity = BDRV_SECTOR_SIZE; > } > > static int qcow2_reopen_prepare(BDRVReopenState *state, > @@ -2672,9 +2850,26 @@ qcow2_co_pwritev_part(BlockDriverState *bs, int64_t > offset, int64_t bytes, > uint64_t host_offset; > QCowL2Meta *l2meta = NULL; > AioTaskPool *aio = NULL; > + int64_t start_offset, start_bytes; > + BlockZoneState zs; > + int64_t end; > + uint64_t *wp; > + int64_t zone_size = bs->bl.zone_size; > + int index; > > trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); > > + start_offset = offset; > + start_bytes = bytes; > + /* The offset should not less than the wp of that > + * zone where offset starts. */ > + if (zone_size) { > + index = start_offset / zone_size; > + wp = &s->wps->wp[index]; > + if (offset < qcow2_get_wp(*wp)) { > + return -EINVAL; > + } > + } > while (bytes != 0 && aio_task_pool_status(aio) == 0) { > > l2meta = NULL; > @@ -2720,6 +2915,47 @@ qcow2_co_pwritev_part(BlockDriverState *bs, int64_t > offset, int64_t bytes, > qiov_offset += cur_bytes; > trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); > } > + > + if (zone_size) { > + index = start_offset / zone_size; > + wp = &s->wps->wp[index]; > + uint64_t wpv = *wp; > + if (!QCOW2_ZT_IS_CONV(wpv)) { > + /* > + * Implicitly open one closed zone to write if there are zone > resources > + * left. > + */ > + zs = qcow2_get_zs(wpv); > + if (zs == BLK_ZS_CLOSED || zs == BLK_ZS_EMPTY) { > + ret = qcow2_check_zone_resources(bs, zs); > + if (ret < 0) { > + goto fail_nometa; > + } > + > + if (zs == BLK_ZS_CLOSED) { > + s->nr_zones_closed--; > + s->nr_zones_imp_open++; > + } else { > + s->nr_zones_imp_open++; > + } > + } > + > + /* align up (start_offset, zone_size), the start offset is not > + * necessarily power of two. */ > + end = ((start_offset + zone_size) / zone_size) * zone_size; > + if (start_offset + start_bytes <= end) { > + *wp = start_offset + start_bytes; > + } else { > + ret = -EINVAL; > + goto fail_nometa; > + } > + > + ret = qcow2_write_wp_at(bs, wp, index,BLK_ZS_IOPEN); > + if (ret < 0) { > + goto fail_nometa; > + } > + } > + } > ret = 0; > > qemu_co_mutex_lock(&s->lock); > @@ -3117,7 +3353,9 @@ int qcow2_update_header(BlockDriverState *bs) > .max_active_zones = > cpu_to_be32(s->zoned_header.max_active_zones), > .max_append_sectors = > - cpu_to_be32(s->zoned_header.max_append_sectors) > + cpu_to_be32(s->zoned_header.max_append_sectors), > + .zonedmeta_offset = > cpu_to_be64(s->zoned_header.zonedmeta_offset), > + .zonedmeta_size = cpu_to_be64(s->zoned_header.zonedmeta_size) > }; > ret = header_ext_add(buf, QCOW2_EXT_MAGIC_ZONED_FORMAT, > &zoned_header, sizeof(zoned_header), > @@ -3522,7 +3760,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, > Error **errp) > int version; > int refcount_order; > uint64_t *refcount_table; > - int ret; > + uint64_t zoned_meta_size, zoned_clusterlen; > + int ret, offset, i; > uint8_t compression_type = QCOW2_COMPRESSION_TYPE_ZLIB; > > assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2); > @@ -3823,6 +4062,48 @@ qcow2_co_create(BlockdevCreateOptions *create_options, > Error **errp) > s->zoned_header.max_open_zones = qcow2_opts->max_open_zones; > s->zoned_header.max_active_zones = qcow2_opts->max_active_zones; > s->zoned_header.max_append_sectors = qcow2_opts->max_append_sectors; > + s->zoned_header.nr_zones = qcow2_opts->size / qcow2_opts->zone_size; > + > + zoned_meta_size = sizeof(uint64_t) * s->zoned_header.nr_zones; > + uint64_t meta[zoned_meta_size];
zoned_meta_size is in bytes but the array is in uint64_t. I guess the array size should be s->zoned_header.nr_zones (in zones) instead of zoned_meta_size (in bytes). Please use g_autoptr and g_new() for this to avoid stack overflow issues if nr_zones is large. > + memset(meta, 0, zoned_meta_size); Unnecessary if you use g_new0(). Also, zeroing is probably unnecessary since the for loops below fill in every element of the array. > + > + for (i = 0; i < s->zoned_header.zone_nr_conv; ++i) { > + meta[i] = i * s->zoned_header.zone_size; > + meta[i] += 1ULL << 59; Bitwise OR ('|') is clearer than addition. You do not rely on or want the add operation's arithmetic carry here. > + } > + for (; i < s->zoned_header.nr_zones; ++i) { > + meta[i] = i * s->zoned_header.zone_size; > + /* For sequential zones, the first four most significant bit > + * indicates zone states. */ > + meta[i] += ((uint64_t)BLK_ZS_EMPTY << 60); Bitwise OR. > + } > + > + offset = qcow2_alloc_clusters(blk_bs(blk), zoned_meta_size); > + if (offset < 0) { > + error_setg_errno(errp, -offset, "Could not allocate clusters " > + "for zoned metadata size"); > + goto out; > + } > + s->zoned_header.zonedmeta_offset = offset; > + s->zoned_header.zonedmeta_size = zoned_meta_size; > + > + zoned_clusterlen = size_to_clusters(s, zoned_meta_size) > + * s->cluster_size; > + assert(qcow2_pre_write_overlap_check(bs, 0, offset, > + zoned_clusterlen,false) == 0); > + ret = bdrv_pwrite_zeroes(blk_bs(blk)->file, offset, > + zoned_clusterlen, 0); > + if (ret < 0) { > + error_setg_errno(errp, -ret, "Could not zero fill zoned > metadata"); > + goto out; > + } > + ret = bdrv_pwrite(blk_bs(blk)->file, offset, zoned_meta_size, meta, > 0); > + if (ret < 0) { > + error_setg_errno(errp, -ret, "Could not write zoned metadata " > + "to disk"); > + goto out; > + } > } > > /* Create a full header (including things like feature table) */ > @@ -4166,6 +4447,346 @@ static coroutine_fn int > qcow2_co_pdiscard(BlockDriverState *bs, > return ret; > } > > +static int coroutine_fn > +qcow2_co_zone_report(BlockDriverState *bs, int64_t offset, > + unsigned int *nr_zones, BlockZoneDescriptor *zones) > +{ > + BDRVQcow2State *s = bs->opaque; > + uint64_t zone_size = s->zoned_header.zone_size; > + int64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS; > + int64_t size = bs->bl.nr_zones * zone_size; > + int i = 0; > + int si; > + > + if (zone_size > 0) { > + si = offset / zone_size; offset must be validated. It might be beyond the capacity of the device. > + unsigned int nrz = *nr_zones; nr_zones must be validated. It might be larger than bs->bl.nr_zones. > + qemu_co_mutex_lock(&s->wps->colock); > + for (; i < nrz; ++i) { > + zones[i].start = (si + i) * zone_size; > + > + /* The last zone can be smaller than the zone size */ > + if ((si + i + 1) == bs->bl.nr_zones && size > capacity) { > + zones[i].length = zone_size - (size - capacity); > + } else { > + zones[i].length = zone_size; > + } > + zones[i].cap = zone_size; Should capacity also be capped for the last zone? > + > + uint64_t wp = s->wps->wp[si + i]; > + if (QCOW2_ZT_IS_CONV(wp)) { > + zones[i].type = BLK_ZT_CONV; > + zones[i].state = BLK_ZS_NOT_WP; > + /* Clear the zone type bit */ > + wp &= ~(1ULL << 59); > + } else { > + zones[i].type = BLK_ZT_SWR; > + zones[i].state = qcow2_get_zs(wp); > + /* Clear the zone state bits */ > + wp = qcow2_get_wp(wp); > + } > + > + zones[i].wp = wp; > + if (si + i == bs->bl.nr_zones) { > + break; > + } This check is too late because wp[] has already been indexed. It is insufficient when the first zone is already out of bounds. > + } > + qemu_co_mutex_unlock(&s->wps->colock); > + } > + *nr_zones = i; > + return 0; > +} > + > +static int qcow2_open_zone(BlockDriverState *bs, uint32_t index) { > + BDRVQcow2State *s = bs->opaque; > + int ret; > + > + qemu_co_mutex_lock(&s->wps->colock); > + uint64_t *wp = &s->wps->wp[index]; > + BlockZoneState zs = qcow2_get_zs(*wp); > + > + switch(zs) { > + case BLK_ZS_EMPTY: > + ret = qcow2_check_zone_resources(bs, BLK_ZS_EMPTY); > + if (ret < 0) { > + return ret; > + } > + break; > + case BLK_ZS_IOPEN: > + s->nr_zones_imp_open--; > + break; > + case BLK_ZS_EOPEN: > + return 0; > + case BLK_ZS_CLOSED: > + ret = qcow2_check_zone_resources(bs, BLK_ZS_CLOSED); > + if (ret < 0) { > + return ret; > + } > + s->nr_zones_closed--; > + break; > + case BLK_ZS_FULL: > + break; > + default: > + return -EINVAL; > + } s->wps->colock is not unlocked in the return code paths above. > + ret = qcow2_write_wp_at(bs, wp, index, BLK_ZS_EOPEN); I wanted to confirm with you and Damien that zone states are persisted in the zoned storage model, even OPEN/CLOSED? To me, OPEN/CLOSED, seem more related to runtime resource limits than persistent state that needs to be stored. > + if (!ret) { > + s->nr_zones_exp_open++; > + } > + qemu_co_mutex_unlock(&s->wps->colock); > + return ret; > +} > + > +static int qcow2_close_zone(BlockDriverState *bs, uint32_t index) { > + BDRVQcow2State *s = bs->opaque; > + int ret; > + > + qemu_co_mutex_lock(&s->wps->colock); > + uint64_t *wp = &s->wps->wp[index]; > + BlockZoneState zs = qcow2_get_zs(*wp); > + > + switch(zs) { > + case BLK_ZS_EMPTY: > + break; > + case BLK_ZS_IOPEN: > + s->nr_zones_imp_open--; > + break; > + case BLK_ZS_EOPEN: > + s->nr_zones_exp_open--; > + break; > + case BLK_ZS_CLOSED: > + ret = qcow2_check_zone_resources(bs, BLK_ZS_CLOSED); > + if (ret < 0) { > + return ret; > + } > + s->nr_zones_closed--; > + break; > + case BLK_ZS_FULL: > + break; > + default: > + return -EINVAL; > + } s->wps->colock is not unlocked in the return code paths above. > + > + if (zs == BLK_ZS_EMPTY) { > + ret = qcow2_write_wp_at(bs, wp, index, BLK_ZS_EMPTY); > + } else { > + ret = qcow2_write_wp_at(bs, wp, index, BLK_ZS_CLOSED); > + if (!ret) { > + s->nr_zones_closed++; > + } > + } > + qemu_co_mutex_unlock(&s->wps->colock); > + return ret; > +} > + > +static int qcow2_finish_zone(BlockDriverState *bs, uint32_t index) { > + BDRVQcow2State *s = bs->opaque; > + int ret; > + > + qemu_co_mutex_lock(&s->wps->colock); > + uint64_t *wp = &s->wps->wp[index]; > + BlockZoneState zs = qcow2_get_zs(*wp); > + > + switch(zs) { > + case BLK_ZS_EMPTY: > + ret = qcow2_check_zone_resources(bs, BLK_ZS_EMPTY); > + if (ret < 0) { > + return ret; > + } > + break; > + case BLK_ZS_IOPEN: > + s->nr_zones_imp_open--; > + break; > + case BLK_ZS_EOPEN: > + s->nr_zones_exp_open--; > + break; > + case BLK_ZS_CLOSED: > + ret = qcow2_check_zone_resources(bs, BLK_ZS_CLOSED); > + if (ret < 0) { > + return ret; > + } > + s->nr_zones_closed--; > + break; > + case BLK_ZS_FULL: > + return 0; > + default: > + return -EINVAL; > + } s->wps->colock is not unlocked in the return code paths above. > + > + *wp = (index + 1) * s->zoned_header.zone_size; There is an integer overflow here. Please see my comment in qcow2_reset_zone() below. > + ret = qcow2_write_wp_at(bs, wp, index, BLK_ZS_FULL); > + qemu_co_mutex_unlock(&s->wps->colock); > + return ret; > +} > + > +static int qcow2_reset_zone(BlockDriverState *bs, uint32_t index, > + int64_t len) { > + BDRVQcow2State *s = bs->opaque; > + int nrz = bs->bl.nr_zones; > + int zone_size = bs->bl.zone_size; > + int n, ret = 0; > + > + qemu_co_mutex_lock(&s->wps->colock); > + uint64_t *wp = &s->wps->wp[index]; > + if (len == bs->total_sectors << BDRV_SECTOR_BITS) { > + n = nrz; > + index = 0; > + } else { > + n = len / zone_size; > + } > + > + for (int i = 0; i < n; ++i) { > + uint64_t *wp_i = (uint64_t *)(wp + i); > + uint64_t wpi_v = *wp_i; > + if (QCOW2_ZT_IS_CONV(wpi_v)) { > + continue; > + } > + > + BlockZoneState zs = qcow2_get_zs(wpi_v); > + switch (zs) { > + case BLK_ZS_EMPTY: > + break; > + case BLK_ZS_IOPEN: > + s->nr_zones_imp_open--; > + break; > + case BLK_ZS_EOPEN: > + s->nr_zones_exp_open--; > + break; > + case BLK_ZS_CLOSED: > + s->nr_zones_closed--; > + break; > + case BLK_ZS_FULL: > + break; > + default: > + return -EINVAL; > + } > + > + if (zs == BLK_ZS_EMPTY) { > + continue; > + } > + > + *wp_i = (index + i) * zone_size; This calculation needs uint64_t to avoid overflowing int. The types involved are: uint64_t = (uint32_t + int) * int; You can fix it using: *wp_i = ((uint64_t)index + i) * zone_size; Then the entire expression will be evaluated as a 64-bit integer instead of a 32-bit integer. > + ret = qcow2_write_wp_at(bs, wp_i, index + i, BLK_ZS_EMPTY); > + if (ret < 0) { > + return ret; s->wps->colock must be unlocked. > + } > + /* clear data */ > + ret = qcow2_co_pwrite_zeroes(bs, qcow2_get_wp(*wp_i), zone_size, 0); Does zone reset guarantee that the data blocks will be zeroed according to the zoned storage model? > + if (ret < 0) { > + error_report("Failed to reset zone at 0x%" PRIx64 "", *wp_i); > + } > + } > + qemu_co_mutex_unlock(&s->wps->colock); > + return ret; > +} > + > +static int coroutine_fn qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp > op, > + int64_t offset, int64_t len) > +{ > + BDRVQcow2State *s = bs->opaque; > + int ret = 0; > + int64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS; > + int64_t zone_size = s->zoned_header.zone_size; > + int64_t zone_size_mask = zone_size - 1; > + uint32_t index = offset / zone_size; > + BlockZoneWps *wps = s->wps; > + > + if (offset & zone_size_mask) { > + error_report("sector offset %" PRId64 " is not aligned to zone size" > + " %" PRId64 "", offset / 512, zone_size / 512); > + return -EINVAL; > + } > + > + if (((offset + len) < capacity && len & zone_size_mask) || > + offset + len > capacity) { > + error_report("number of sectors %" PRId64 " is not aligned to zone" > + " size %" PRId64 "", len / 512, zone_size / 512); > + return -EINVAL; > + } > + > + qemu_co_mutex_lock(&wps->colock); > + uint64_t wpv = wps->wp[offset / zone_size]; Use index here instead of recalculating it? > + if (QCOW2_ZT_IS_CONV(wpv) && len != capacity) { > + error_report("zone mgmt operations are not allowed for " > + "conventional zones"); > + ret = -EIO; > + goto unlock; > + } > + qemu_co_mutex_unlock(&wps->colock); > + > + switch(op) { > + case BLK_ZO_OPEN: > + ret = qcow2_open_zone(bs, index); > + break; > + case BLK_ZO_CLOSE: > + ret = qcow2_close_zone(bs, index); > + break; > + case BLK_ZO_FINISH: > + ret = qcow2_finish_zone(bs, index); > + break; > + case BLK_ZO_RESET: > + ret = qcow2_reset_zone(bs, index, len); > + break; > + default: > + error_report("Unsupported zone op: 0x%x", op); > + ret = -ENOTSUP; > + break; > + } > + return ret; > + > +unlock: > + qemu_co_mutex_unlock(&wps->colock); > + return ret; > +} > + > +static int coroutine_fn > +qcow2_co_zone_append(BlockDriverState *bs, int64_t *offset, QEMUIOVector > *qiov, > + BdrvRequestFlags flags) > +{ > + assert(flags == 0); > + BDRVQcow2State *s = bs->opaque; > + int ret; > + int64_t zone_size_mask = bs->bl.zone_size - 1; > + int64_t iov_len = 0; > + int64_t len = 0; > + > + /* offset + len should not pass the end of that zone starting from > offset */ > + if (*offset & zone_size_mask) { > + error_report("sector offset %" PRId64 " is not aligned to zone size " > + "%" PRId32 "", *offset / 512, bs->bl.zone_size / 512); > + return -EINVAL; > + } > + > + int64_t wg = bs->bl.write_granularity; > + int64_t wg_mask = wg - 1; > + for (int i = 0; i < qiov->niov; i++) { > + iov_len = qiov->iov[i].iov_len; > + if (iov_len & wg_mask) { > + error_report("len of IOVector[%d] %" PRId64 " is not aligned to " > + "block size %" PRId64 "", i, iov_len, wg); > + return -EINVAL; > + } > + } > + len = qiov->size; > + > + if ((len >> BDRV_SECTOR_BITS) > bs->bl.max_append_sectors) { > + return -ENOTSUP; > + } > + > + qemu_co_mutex_lock(&s->wps->colock); > + uint64_t wp = s->wps->wp[*offset / bs->bl.zone_size]; Where is *offset checked against nr_zones to prevent an access beyond the end of the array? > + uint64_t wp_i = qcow2_get_wp(wp); > + ret = qcow2_co_pwritev_part(bs, wp_i, len, qiov, 0, 0); > + if (ret == 0) { > + *offset = wp_i; > + } else { > + error_report("qcow2: zap failed"); > + } > + > + qemu_co_mutex_unlock(&s->wps->colock); > + return ret; > +} > + > static int coroutine_fn GRAPH_RDLOCK > qcow2_co_copy_range_from(BlockDriverState *bs, > BdrvChild *src, int64_t src_offset, > @@ -6214,6 +6835,10 @@ BlockDriver bdrv_qcow2 = { > .bdrv_co_pwritev_part = qcow2_co_pwritev_part, > .bdrv_co_flush_to_os = qcow2_co_flush_to_os, > > + .bdrv_co_zone_report = qcow2_co_zone_report, > + .bdrv_co_zone_mgmt = qcow2_co_zone_mgmt, > + .bdrv_co_zone_append = qcow2_co_zone_append, > + > .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, > .bdrv_co_pdiscard = qcow2_co_pdiscard, > .bdrv_co_copy_range_from = qcow2_co_copy_range_from, > diff --git a/block/qcow2.h b/block/qcow2.h > index fe18dc4d97..a3a96ddbce 100644 > --- a/block/qcow2.h > +++ b/block/qcow2.h > @@ -246,6 +246,8 @@ typedef struct Qcow2ZonedHeaderExtension { > uint32_t max_active_zones; > uint32_t max_open_zones; > uint32_t max_append_sectors; > + uint64_t zonedmeta_offset; > + uint64_t zonedmeta_size; > uint8_t padding[3]; > } QEMU_PACKED Qcow2ZonedHeaderExtension; > > -- > 2.40.1 >
signature.asc
Description: PGP signature