On s390x we can now support userspace mmio and mmap from vfio. This patch uses s390x mmio support to enable the NVMe userspace driver for s390x.
Signed-off-by: Farhan Ali <al...@linux.ibm.com> --- block/nvme.c | 95 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 18 deletions(-) diff --git a/block/nvme.c b/block/nvme.c index bbf7c23dcd..90f5708d9b 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -24,6 +24,9 @@ #include "qemu/option.h" #include "qemu/memalign.h" #include "qemu/vfio-helpers.h" +#ifdef __s390x__ +#include "qemu/s390x_pci_mmio.h" +#endif #include "block/block-io.h" #include "block/block_int.h" #include "system/block-backend.h" @@ -60,7 +63,7 @@ typedef struct { uint8_t *queue; uint64_t iova; /* Hardware MMIO register */ - volatile uint32_t *doorbell; + uint32_t *doorbell; } NVMeQueue; typedef struct { @@ -100,7 +103,7 @@ struct BDRVNVMeState { QEMUVFIOState *vfio; void *bar0_wo_map; /* Memory mapped registers */ - volatile struct { + struct { uint32_t sq_tail; uint32_t cq_head; } *doorbells; @@ -164,6 +167,58 @@ static QemuOptsList runtime_opts = { }, }; +static inline uint32_t nvme_mmio_read_32(const void *addr) +{ + uint32_t ret; + +#ifdef __s390x__ + ret = s390x_pci_mmio_read_32(addr); +#else + /* Prevent the compiler from optimizing away the load */ + ret = *((volatile uint32_t *)addr); +#endif + + return le32_to_cpu(ret); +} + +static inline uint64_t nvme_mmio_read_64(const void *addr) +{ + uint64_t ret; + +#ifdef __s390x__ + ret = s390x_pci_mmio_read_64(addr); +#else + /* Prevent the compiler from optimizing away the load */ + ret = *((volatile uint64_t *)addr); +#endif + + return le64_to_cpu(ret); +} + +static inline void nvme_mmio_write_32(void *addr, uint32_t val) +{ + val = cpu_to_le32(val); + +#ifdef __s390x__ + s390x_pci_mmio_write_32(addr, val); +#else + /* Prevent the compiler from optimizing away the store */ + *((volatile uint32_t *)addr) = val; +#endif +} + +static inline void nvme_mmio_write_64(void *addr, uint64_t val) +{ + val = cpu_to_le64(val); + +#ifdef __s390x__ + s390x_pci_mmio_write_64(addr, val); +#else + /* Prevent the compiler from optimizing away the store */ + *((volatile uint64_t *)addr) = val; +#endif +} + /* Returns true on success, false on failure. */ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, unsigned nentries, size_t entry_bytes, Error **errp) @@ -292,7 +347,7 @@ static void nvme_kick(NVMeQueuePair *q) assert(!(q->sq.tail & 0xFF00)); /* Fence the write to submission queue entry before notifying the device. */ smp_wmb(); - *q->sq.doorbell = cpu_to_le32(q->sq.tail); + nvme_mmio_write_32(q->sq.doorbell, q->sq.tail); q->inflight += q->need_kick; q->need_kick = 0; } @@ -441,7 +496,7 @@ static bool nvme_process_completion(NVMeQueuePair *q) if (progress) { /* Notify the device so it can post more completions. */ smp_mb_release(); - *q->cq.doorbell = cpu_to_le32(q->cq.head); + nvme_mmio_write_32(q->cq.doorbell, q->cq.head); nvme_wake_free_req_locked(q); } @@ -460,7 +515,7 @@ static void nvme_process_completion_bh(void *opaque) * so notify the device that it has space to fill in more completions now. */ smp_mb_release(); - *q->cq.doorbell = cpu_to_le32(q->cq.head); + nvme_mmio_write_32(q->cq.doorbell, q->cq.head); nvme_wake_free_req_locked(q); nvme_process_completion(q); @@ -749,9 +804,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, int ret; uint64_t cap; uint32_t ver; + uint32_t cc; uint64_t timeout_ms; uint64_t deadline, now; - volatile NvmeBar *regs = NULL; + NvmeBar *regs = NULL; qemu_co_mutex_init(&s->dma_map_lock); qemu_co_queue_init(&s->dma_flush_queue); @@ -779,7 +835,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, /* Perform initialize sequence as described in NVMe spec "7.6.1 * Initialization". */ - cap = le64_to_cpu(regs->cap); + cap = nvme_mmio_read_64(®s->cap); trace_nvme_controller_capability_raw(cap); trace_nvme_controller_capability("Maximum Queue Entries Supported", 1 + NVME_CAP_MQES(cap)); @@ -805,16 +861,17 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, bs->bl.request_alignment = s->page_size; timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000); - ver = le32_to_cpu(regs->vs); + ver = nvme_mmio_read_32(®s->vs); trace_nvme_controller_spec_version(extract32(ver, 16, 16), extract32(ver, 8, 8), extract32(ver, 0, 8)); /* Reset device to get a clean state. */ - regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE); + cc = nvme_mmio_read_32(®s->cc); + nvme_mmio_write_32(®s->cc, (cc & 0xFE)); /* Wait for CSTS.RDY = 0. */ deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS; - while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { + while (NVME_CSTS_RDY(nvme_mmio_read_32(®s->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to reset (%" PRId64 " ms)", @@ -843,19 +900,21 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, s->queues[INDEX_ADMIN] = q; s->queue_count = 1; QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000); - regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | - ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); - regs->asq = cpu_to_le64(q->sq.iova); - regs->acq = cpu_to_le64(q->cq.iova); + nvme_mmio_write_32(®s->aqa, + ((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | + ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); + nvme_mmio_write_64(®s->asq, q->sq.iova); + nvme_mmio_write_64(®s->acq, q->cq.iova); /* After setting up all control registers we can enable device now. */ - regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | - (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | - CC_EN_MASK); + nvme_mmio_write_32(®s->cc, + ((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | + (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | + CC_EN_MASK)); /* Wait for CSTS.RDY = 1. */ now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); deadline = now + timeout_ms * SCALE_MS; - while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { + while (!NVME_CSTS_RDY(nvme_mmio_read_32(®s->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to start (%" PRId64 " ms)", -- 2.43.0