By adding zone management operations in BlockDriver, storage controller emulation can use the new block layer APIs including zone_report and zone_mgmt(open, close, finish, reset). --- block/block-backend.c | 56 ++++++++ block/coroutines.h | 5 + block/file-posix.c | 238 +++++++++++++++++++++++++++++++ include/block/block-common.h | 43 +++++- include/block/block_int-common.h | 20 +++ 5 files changed, 361 insertions(+), 1 deletion(-)
diff --git a/block/block-backend.c b/block/block-backend.c index e0e1aff4b1..786f964d02 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1810,6 +1810,62 @@ int blk_flush(BlockBackend *blk) return ret; } +/* + * Return zone_report from BlockDriver. Offset can be any number within + * the zone size. No alignment for offset and len. + */ +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset, + int64_t len, int64_t *nr_zones, + BlockZoneDescriptor *zones) +{ + int ret; + BlockDriverState *bs; + IO_CODE(); + + blk_inc_in_flight(blk); /* increase before waiting */ + blk_wait_while_drained(blk); + bs = blk_bs(blk); + + ret = blk_check_byte_request(blk, offset, len); + if (ret < 0) { + return ret; + } + + bdrv_inc_in_flight(bs); + ret = bdrv_co_zone_report(blk->root->bs, offset, len, + nr_zones, zones); + bdrv_dec_in_flight(bs); + blk_dec_in_flight(blk); + return ret; +} + +/* + * Return zone_mgmt from BlockDriver. + * Offset is the start of a zone and len is aligned to zones. + */ +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op, + int64_t offset, int64_t len) +{ + int ret; + BlockDriverState *bs; + IO_CODE(); + + blk_inc_in_flight(blk); + blk_wait_while_drained(blk); + bs = blk_bs(blk); + + ret = blk_check_byte_request(blk, offset, len); + if (ret < 0) { + return ret; + } + + bdrv_inc_in_flight(bs); + ret = bdrv_co_zone_mgmt(blk->root->bs, op, offset, len); + bdrv_dec_in_flight(bs); + blk_dec_in_flight(blk); + return ret; +} + void blk_drain(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); diff --git a/block/coroutines.h b/block/coroutines.h index 830ecaa733..a114d7bc30 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -80,6 +80,11 @@ int coroutine_fn blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); int coroutine_fn blk_co_do_flush(BlockBackend *blk); +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset, + int64_t len, int64_t *nr_zones, + BlockZoneDescriptor *zones); +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op, + int64_t offset, int64_t len); /* diff --git a/block/file-posix.c b/block/file-posix.c index 48cd096624..1b8b0d351f 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -67,6 +67,7 @@ #include <sys/param.h> #include <sys/syscall.h> #include <sys/vfs.h> +#include <linux/blkzoned.h> #include <linux/cdrom.h> #include <linux/fd.h> #include <linux/fs.h> @@ -216,6 +217,11 @@ typedef struct RawPosixAIOData { PreallocMode prealloc; Error **errp; } truncate; + struct { + int64_t *nr_zones; + BlockZoneDescriptor *zones; + } zone_report; + zone_op op; }; } RawPosixAIOData; @@ -1801,6 +1807,135 @@ static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd, } #endif +/* + * parse_zone - Fill a zone descriptor + */ +static inline void parse_zone(struct BlockZoneDescriptor *zone, + struct blk_zone *blkz) { + zone->start = blkz->start; + zone->length = blkz->len; + zone->cap = blkz->capacity; + zone->wp = blkz->wp - blkz->start; + zone->type = blkz->type; + zone->cond = blkz->cond; +} + +static int handle_aiocb_zone_report(void *opaque) { + RawPosixAIOData *aiocb = opaque; + int fd = aiocb->aio_fildes; + int64_t *nr_zones = aiocb->zone_report.nr_zones; + BlockZoneDescriptor *zones = aiocb->zone_report.zones; + int64_t offset = aiocb->aio_offset; + int64_t len = aiocb->aio_nbytes; + + struct blk_zone *blkz; + int64_t rep_size, nrz; + int ret, n = 0, i = 0; + + nrz = *nr_zones; + if (len == -1) { + return -errno; + } + rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct blk_zone); + g_autofree struct blk_zone_report *rep = g_new(struct blk_zone_report, nrz); + offset = offset / 512; /* get the unit of the start sector: sector size is 512 bytes. */ + printf("start to report zone with offset: 0x%lx\n", offset); + + blkz = (struct blk_zone *)(rep + 1); + while (n < nrz) { + memset(rep, 0, rep_size); + rep->sector = offset; + rep->nr_zones = nrz; + + ret = ioctl(fd, BLKREPORTZONE, rep); + if (ret != 0) { + ret = -errno; + error_report("%d: ioctl BLKREPORTZONE at %ld failed %d", + fd, offset, errno); + return ret; + } + + if (!rep->nr_zones) { + break; + } + + for (i = 0; i < rep->nr_zones; i++, n++) { + parse_zone(&zones[n], &blkz[i]); + /* The next report should start after the last zone reported */ + offset = blkz[i].start + blkz[i].len; + } + } + + *nr_zones = n; + return 0; +} + +static int handle_aiocb_zone_mgmt(void *opaque) { + RawPosixAIOData *aiocb = opaque; + int fd = aiocb->aio_fildes; + int64_t offset = aiocb->aio_offset; + int64_t len = aiocb->aio_nbytes; + zone_op op = aiocb->op; + + struct blk_zone_range range; + const char *ioctl_name; + unsigned long ioctl_op; + int64_t zone_size; + int64_t zone_size_mask; + int ret; + + ret = ioctl(fd, BLKGETZONESZ, &zone_size); + if (ret) { + return -1; + } + + zone_size_mask = zone_size - 1; + if (offset & zone_size_mask) { + error_report("offset is not the start of a zone"); + return -1; + } + + if (len & zone_size_mask) { + error_report("len is not aligned to zones"); + return -1; + } + + switch (op) { + case zone_open: + ioctl_name = "BLKOPENZONE"; + ioctl_op = BLKOPENZONE; + break; + case zone_close: + ioctl_name = "BLKCLOSEZONE"; + ioctl_op = BLKCLOSEZONE; + break; + case zone_finish: + ioctl_name = "BLKFINISHZONE"; + ioctl_op = BLKFINISHZONE; + break; + case zone_reset: + ioctl_name = "BLKRESETZONE"; + ioctl_op = BLKRESETZONE; + break; + default: + error_report("Invalid zone operation 0x%x", op); + errno = -EINVAL; + return -1; + } + + /* Execute the operation */ + range.sector = offset; + range.nr_sectors = len; + ret = ioctl(fd, ioctl_op, &range); + if (ret != 0) { + error_report("ioctl %s failed %d", + ioctl_name, errno); + return -1; + } + + return 0; +} + static int handle_aiocb_copy_range(void *opaque) { RawPosixAIOData *aiocb = opaque; @@ -2973,6 +3108,58 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret) } } +/* + * zone report - Get a zone block device's information in the form + * of an array of zone descriptors. + * + * @param bs: passing zone block device file descriptor + * @param zones: an array of zone descriptors to hold zone + * information on reply + * @param offset: offset can be any byte within the zone size. + * @param len: (not sure yet. + * @return 0 on success, -1 on failure + */ +static int coroutine_fn raw_co_zone_report(BlockDriverState *bs, int64_t offset, + int64_t len, int64_t *nr_zones, + BlockZoneDescriptor *zones) { + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = QEMU_AIO_IOCTL, + .aio_offset = offset, + .aio_nbytes = len, + .zone_report = { + .nr_zones = nr_zones, + .zones = zones, + }, + }; + + return raw_thread_pool_submit(bs, handle_aiocb_zone_report, &acb); +} + +/* + * zone management operations - Execute an operation on a zone + */ +static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, zone_op op, + int64_t offset, int64_t len) { + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = QEMU_AIO_IOCTL, + .aio_offset = offset, + .aio_nbytes = len, + .op = op, + }; + + return raw_thread_pool_submit(bs, handle_aiocb_zone_mgmt, &acb); +} + static coroutine_fn int raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes, bool blkdev) @@ -3324,6 +3511,9 @@ BlockDriver bdrv_file = { .bdrv_abort_perm_update = raw_abort_perm_update, .create_opts = &raw_create_opts, .mutable_opts = mutable_opts, + + .bdrv_co_zone_report = raw_co_zone_report, + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, }; /***********************************************/ @@ -3703,6 +3893,53 @@ static BlockDriver bdrv_host_device = { #endif }; +static BlockDriver bdrv_zoned_host_device = { + .format_name = "zoned_host_device", + .protocol_name = "zoned_host_device", + .instance_size = sizeof(BDRVRawState), + .bdrv_needs_filename = true, + .bdrv_probe_device = hdev_probe_device, + .bdrv_parse_filename = hdev_parse_filename, + .bdrv_file_open = hdev_open, + .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, + .bdrv_co_create_opts = bdrv_co_create_opts_simple, + .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, + + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_co_pdiscard = hdev_co_pdiscard, + .bdrv_co_copy_range_from = raw_co_copy_range_from, + .bdrv_co_copy_range_to = raw_co_copy_range_to, + .bdrv_refresh_limits = raw_refresh_limits, + .bdrv_io_plug = raw_aio_plug, + .bdrv_io_unplug = raw_aio_unplug, + .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, + .bdrv_get_info = raw_get_info, + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, + .bdrv_get_specific_stats = hdev_get_specific_stats, + .bdrv_check_perm = raw_check_perm, + .bdrv_set_perm = raw_set_perm, + .bdrv_abort_perm_update = raw_abort_perm_update, + .bdrv_probe_blocksizes = hdev_probe_blocksizes, + .bdrv_probe_geometry = hdev_probe_geometry, + .bdrv_co_ioctl = hdev_co_ioctl, + + /* zone management operations */ + .bdrv_co_zone_report = raw_co_zone_report, + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, +}; + #if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) static void cdrom_parse_filename(const char *filename, QDict *options, Error **errp) @@ -3964,6 +4201,7 @@ static void bdrv_file_init(void) #if defined(HAVE_HOST_BLOCK_DEVICE) bdrv_register(&bdrv_host_device); #ifdef __linux__ + bdrv_register(&bdrv_zoned_host_device); bdrv_register(&bdrv_host_cdrom); #endif #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) diff --git a/include/block/block-common.h b/include/block/block-common.h index fdb7306e78..78cddeeda5 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -23,7 +23,6 @@ */ #ifndef BLOCK_COMMON_H #define BLOCK_COMMON_H - #include "block/aio.h" #include "block/aio-wait.h" #include "qemu/iov.h" @@ -49,6 +48,48 @@ typedef struct BlockDriver BlockDriver; typedef struct BdrvChild BdrvChild; typedef struct BdrvChildClass BdrvChildClass; +typedef enum zone_op { + zone_open, + zone_close, + zone_finish, + zone_reset, +} zone_op; + +typedef enum zone_model { + BLK_Z_HM, + BLK_Z_HA, +} zone_model; + +typedef enum BlkZoneCondition { + BLK_ZS_NOT_WP = 0x0, + BLK_ZS_EMPTY = 0x1, + BLK_ZS_IOPEN = 0x2, + BLK_ZS_EOPEN = 0x3, + BLK_ZS_CLOSED = 0x4, + BLK_ZS_RDONLY = 0xD, + BLK_ZS_FULL = 0xE, + BLK_ZS_OFFLINE = 0xF, +} BlkZoneCondition; + +typedef enum BlkZoneType { + BLK_ZT_CONV = 0x1, + BLK_ZT_SWR = 0x2, + BLK_ZT_SWP = 0x3, +} BlkZoneType; + +/* + * Zone descriptor data structure. + * Provide information on a zone with all position and size values in bytes. + */ +typedef struct BlockZoneDescriptor { + uint64_t start; + uint64_t length; + uint64_t cap; + uint64_t wp; + BlkZoneType type; + BlkZoneCondition cond; +} BlockZoneDescriptor; + typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ int cluster_size; diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 8947abab76..b9ea9db6dc 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -94,6 +94,20 @@ typedef struct BdrvTrackedRequest { struct BdrvTrackedRequest *waiting_for; } BdrvTrackedRequest; +/** + * Zone device information data structure. + * Provide information on a device. + */ +typedef struct zbd_dev { + uint32_t zone_size; + zone_model model; + uint32_t block_size; + uint32_t write_granularity; + uint32_t nr_zones; + struct BlockZoneDescriptor *zones; /* array of zones */ + uint32_t max_nr_open_zones; /* maximum number of explicitly open zones */ + uint32_t max_nr_active_zones; +} zbd_dev; struct BlockDriver { /* @@ -691,6 +705,12 @@ struct BlockDriver { QEMUIOVector *qiov, int64_t pos); + int coroutine_fn (*bdrv_co_zone_report)(BlockDriverState *bs, + int64_t offset, int64_t len, int64_t *nr_zones, + BlockZoneDescriptor *zones); + int coroutine_fn (*bdrv_co_zone_mgmt)(BlockDriverState *bs, enum zone_op op, + int64_t offset, int64_t len); + /* removable device specific */ bool (*bdrv_is_inserted)(BlockDriverState *bs); void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); -- 2.35.3