Signed-off-by: Sam Li <faithilike...@gmail.com> --- block/block-backend.c | 8 ++ block/qcow2.c | 7 +- hw/nvme/ctrl.c | 195 ++++++++++++++++++++++-------- include/sysemu/block-backend-io.h | 1 + include/sysemu/dma.h | 3 + softmmu/dma-helpers.c | 17 +++ 6 files changed, 181 insertions(+), 50 deletions(-)
diff --git a/block/block-backend.c b/block/block-backend.c index 9c95ae0267..2aafb4cee3 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2426,6 +2426,14 @@ uint32_t blk_get_nr_zones(BlockBackend *blk) return bs ? bs->bl.nr_zones : 0; } +uint32_t blk_get_write_granularity(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + IO_CODE(); + + return bs ? bs->bl.write_granularity : 0; +} + uint8_t *blk_get_zone_extension(BlockBackend *blk) { BlockDriverState * bs = blk_bs(blk); IO_CODE(); diff --git a/block/qcow2.c b/block/qcow2.c index 41549dd68b..5a038792f1 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2198,7 +2198,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.max_active_zones = s->zoned_header.max_active_zones; bs->bl.max_open_zones = s->zoned_header.max_open_zones; bs->bl.zone_size = s->zoned_header.zone_size; - bs->bl.write_granularity = BDRV_SECTOR_SIZE; + bs->bl.write_granularity = 4096; /* physical block size */ } static int qcow2_reopen_prepare(BDRVReopenState *state, @@ -4915,6 +4915,11 @@ qcow2_co_zone_append(BlockDriverState *bs, int64_t *offset, QEMUIOVector *qiov, qemu_co_mutex_lock(&s->wps->colock); uint64_t wp = s->wps->wp[index]; uint64_t wp_i = qcow2_get_wp(wp); + printf("qcow2 offset 0x%lx\n", *offset); + printf("checking wp[%ld]: 0b%lb\n", *offset / bs->bl.zone_size, wp); + for (int i = 0; i < bs->bl.nr_zones; i++) { + printf("Listing wp[%d]: 0b%lb\n", i, s->wps->wp[i]); + } ret = qcow2_co_pwritev_part(bs, wp_i, len, qiov, 0, 0); if (ret == 0) { *offset = wp_i; diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8d4c08dc4c..3932b516ed 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1740,6 +1740,95 @@ static void nvme_misc_cb(void *opaque, int ret) nvme_enqueue_req_completion(nvme_cq(req), req); } +typedef struct NvmeZoneCmdAIOCB { + NvmeRequest *req; + NvmeCmd *cmd; + NvmeCtrl *n; + + union { + struct { + uint32_t partial; + unsigned int nr_zones; + BlockZoneDescriptor *zones; + } zone_report_data; + struct { + int64_t offset; + } zone_append_data; + }; +} NvmeZoneCmdAIOCB; + +static void nvme_blk_zone_append_complete_cb(void *opaque, int ret) +{ + NvmeZoneCmdAIOCB *cb = opaque; + NvmeRequest *req = cb->req; + int64_t *offset = (int64_t *)&req->cqe; + + if (ret) { + nvme_aio_err(req, ret); + } + + *offset = nvme_b2l(req->ns, cb->zone_append_data.offset); + nvme_enqueue_req_completion(nvme_cq(req), req); + g_free(cb); +} + +static inline void nvme_blk_zone_append(BlockBackend *blk, int64_t *offset, + uint32_t align, + BlockCompletionFunc *cb, + NvmeZoneCmdAIOCB *aiocb) +{ + NvmeRequest *req = aiocb->req; + assert(req->sg.flags & NVME_SG_ALLOC); + + if (req->sg.flags & NVME_SG_DMA) { + req->aiocb = dma_blk_zone_append(blk, &req->sg.qsg, (int64_t)offset, + align, cb, aiocb); + } else { + req->aiocb = blk_aio_zone_append(blk, offset, &req->sg.iov, 0, + cb, aiocb); + } +} + +static void nvme_zone_append_cb(void *opaque, int ret) +{ + NvmeZoneCmdAIOCB *aiocb = opaque; + NvmeRequest *req = aiocb->req; + NvmeNamespace *ns = req->ns; + + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + goto out; + } + + if (ns->lbaf.ms) { + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; + int64_t offset = aiocb->zone_append_data.offset; + + if (nvme_ns_ext(ns) || req->cmd.mptr) { + uint16_t status; + + nvme_sg_unmap(&req->sg); + status = nvme_map_mdata(nvme_ctrl(req), nlb, req); + if (status) { + ret = -EFAULT; + goto out; + } + + return nvme_blk_zone_append(blk, &offset, 1, + nvme_blk_zone_append_complete_cb, + aiocb); + } + } + +out: + nvme_blk_zone_append_complete_cb(aiocb, ret); +} + + void nvme_rw_complete_cb(void *opaque, int ret) { NvmeRequest *req = opaque; @@ -3067,6 +3156,9 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, uint64_t mapped_size = data_size; uint64_t data_offset; BlockBackend *blk = ns->blkconf.blk; + BlockZoneWps *wps = blk_get_zone_wps(blk); + uint32_t zone_size = blk_get_zone_size(blk); + uint32_t zone_idx; uint16_t status; if (nvme_ns_ext(ns)) { @@ -3097,42 +3189,47 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, } if (blk_get_zone_model(blk)) { - uint32_t zone_size = blk_get_zone_size(blk); - uint32_t zone_idx = slba / zone_size; - int64_t zone_start = zone_idx * zone_size; + assert(wps); + if (zone_size) { + zone_idx = slba / zone_size; + int64_t zone_start = zone_idx * zone_size; + + if (append) { + bool piremap = !!(ctrl & NVME_RW_PIREMAP); + + if (n->params.zasl && + data_size > (uint64_t) + n->page_size << n->params.zasl) { + trace_pci_nvme_err_zasl(data_size); + return NVME_INVALID_FIELD | NVME_DNR; + } - if (append) { - bool piremap = !!(ctrl & NVME_RW_PIREMAP); + rw->slba = cpu_to_le64(slba); - if (n->params.zasl && - data_size > (uint64_t)n->page_size << n->params.zasl) { - trace_pci_nvme_err_zasl(data_size); - return NVME_INVALID_FIELD | NVME_DNR; - } + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_1: + if (!piremap) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } - rw->slba = cpu_to_le64(slba); - switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - case NVME_ID_NS_DPS_TYPE_1: - if (!piremap) { - return NVME_INVALID_PROT_INFO | NVME_DNR; - } + /* fallthrough */ - /* fallthrough */ + case NVME_ID_NS_DPS_TYPE_2: + if (piremap) { + uint32_t reftag = le32_to_cpu(rw->reftag); + rw->reftag = + cpu_to_le32(reftag + (slba - zone_start)); + } - case NVME_ID_NS_DPS_TYPE_2: - if (piremap) { - uint32_t reftag = le32_to_cpu(rw->reftag); - rw->reftag = cpu_to_le32(reftag + (slba - zone_start)); - } + break; - break; + case NVME_ID_NS_DPS_TYPE_3: + if (piremap) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } - case NVME_ID_NS_DPS_TYPE_3: - if (piremap) { - return NVME_INVALID_PROT_INFO | NVME_DNR; + break; } - - break; } } @@ -3152,9 +3249,21 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, goto invalid; } - block_acct_start(blk_get_stats(blk), &req->acct, data_size, - BLOCK_ACCT_WRITE); - nvme_blk_write(blk, data_offset, BDRV_SECTOR_SIZE, nvme_rw_cb, req); + if (append) { + NvmeZoneCmdAIOCB *cb = g_malloc(sizeof(NvmeZoneCmdAIOCB)); + cb->req = req; + cb->zone_append_data.offset = data_offset; + + block_acct_start(blk_get_stats(blk), &req->acct, data_size, + BLOCK_ACCT_ZONE_APPEND); + nvme_blk_zone_append(blk, &cb->zone_append_data.offset, + blk_get_write_granularity(blk), + nvme_zone_append_cb, cb); + } else { + block_acct_start(blk_get_stats(blk), &req->acct, data_size, + BLOCK_ACCT_WRITE); + nvme_blk_write(blk, data_offset, BDRV_SECTOR_SIZE, nvme_rw_cb, req); + } } else { req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size, BDRV_REQ_MAY_UNMAP, nvme_rw_cb, @@ -3178,24 +3287,7 @@ static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req) return nvme_do_write(n, req, false, true); } -typedef struct NvmeZoneCmdAIOCB { - NvmeRequest *req; - NvmeCmd *cmd; - NvmeCtrl *n; - - union { - struct { - uint32_t partial; - unsigned int nr_zones; - BlockZoneDescriptor *zones; - } zone_report_data; - struct { - int64_t offset; - } zone_append_data; - }; -} NvmeZoneCmdAIOCB; - -static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req) +static uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req) { return nvme_do_write(n, req, true, false); } @@ -3333,6 +3425,11 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) NvmeNamespace *ns = req->ns; NvmeZoneMgmtAIOCB *iocb; uint64_t slba = 0; + uint64_t offset; + BlockBackend *blk = ns->blkconf.blk; + uint32_t zone_size = blk_get_zone_size(blk); + uint64_t size = zone_size * blk_get_nr_zones(blk); + int64_t len; uint32_t zone_idx = 0; uint16_t status; uint8_t action = cmd->zsa; diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h index f69aa1094a..fcbdd93dea 100644 --- a/include/sysemu/block-backend-io.h +++ b/include/sysemu/block-backend-io.h @@ -109,6 +109,7 @@ uint32_t blk_get_max_append_sectors(BlockBackend *blk); uint32_t blk_get_nr_zones(BlockBackend *blk); uint8_t *blk_get_zone_extension(BlockBackend *blk); uint32_t blk_get_zd_ext_size(BlockBackend *blk); +uint32_t blk_get_write_granularity(BlockBackend *blk); BlockZoneWps *blk_get_zone_wps(BlockBackend *blk); void blk_io_plug(void); diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h index a1ac5bc1b5..680e0b5477 100644 --- a/include/sysemu/dma.h +++ b/include/sysemu/dma.h @@ -301,6 +301,9 @@ BlockAIOCB *dma_blk_read(BlockBackend *blk, BlockAIOCB *dma_blk_write(BlockBackend *blk, QEMUSGList *sg, uint64_t offset, uint32_t align, BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *dma_blk_zone_append(BlockBackend *blk, + QEMUSGList *sg, int64_t offset, uint32_t align, + void (*cb)(void *opaque, int ret), void *opaque); MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual, QEMUSGList *sg, MemTxAttrs attrs); MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual, diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c index 2463964805..88bc13264b 100644 --- a/softmmu/dma-helpers.c +++ b/softmmu/dma-helpers.c @@ -282,6 +282,23 @@ BlockAIOCB *dma_blk_write(BlockBackend *blk, DMA_DIRECTION_TO_DEVICE); } +static +BlockAIOCB *dma_blk_zone_append_io_func(int64_t offset, QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque) +{ + BlockBackend *blk = opaque; + return blk_aio_zone_append(blk, (int64_t *)offset, iov, 0, cb, cb_opaque); +} + +BlockAIOCB *dma_blk_zone_append(BlockBackend *blk, + QEMUSGList *sg, int64_t offset, uint32_t align, + void (*cb)(void *opaque, int ret), void *opaque) +{ + return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, + dma_blk_zone_append_io_func, blk, cb, opaque, + DMA_DIRECTION_TO_DEVICE); +} static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual, QEMUSGList *sg, DMADirection dir, -- 2.40.1