On s390x we can now support userspace mmio and mmap
from vfio. This patch uses s390x mmio support to
enable the NVMe userspace driver for s390x.

Signed-off-by: Farhan Ali <al...@linux.ibm.com>
---
 block/nvme.c | 95 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 77 insertions(+), 18 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index bbf7c23dcd..90f5708d9b 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -24,6 +24,9 @@
 #include "qemu/option.h"
 #include "qemu/memalign.h"
 #include "qemu/vfio-helpers.h"
+#ifdef __s390x__
+#include "qemu/s390x_pci_mmio.h"
+#endif
 #include "block/block-io.h"
 #include "block/block_int.h"
 #include "system/block-backend.h"
@@ -60,7 +63,7 @@ typedef struct {
     uint8_t  *queue;
     uint64_t iova;
     /* Hardware MMIO register */
-    volatile uint32_t *doorbell;
+    uint32_t *doorbell;
 } NVMeQueue;
 
 typedef struct {
@@ -100,7 +103,7 @@ struct BDRVNVMeState {
     QEMUVFIOState *vfio;
     void *bar0_wo_map;
     /* Memory mapped registers */
-    volatile struct {
+    struct {
         uint32_t sq_tail;
         uint32_t cq_head;
     } *doorbells;
@@ -164,6 +167,58 @@ static QemuOptsList runtime_opts = {
     },
 };
 
+static inline uint32_t nvme_mmio_read_32(const void *addr)
+{
+    uint32_t ret;
+
+#ifdef __s390x__
+    ret = s390x_pci_mmio_read_32(addr);
+#else
+    /* Prevent the compiler from optimizing away the load */
+    ret = *((volatile uint32_t *)addr);
+#endif
+
+    return le32_to_cpu(ret);
+}
+
+static inline uint64_t nvme_mmio_read_64(const void *addr)
+{
+    uint64_t ret;
+
+#ifdef __s390x__
+    ret = s390x_pci_mmio_read_64(addr);
+#else
+    /* Prevent the compiler from optimizing away the load */
+    ret = *((volatile uint64_t *)addr);
+#endif
+
+    return le64_to_cpu(ret);
+}
+
+static inline void nvme_mmio_write_32(void *addr, uint32_t val)
+{
+    val = cpu_to_le32(val);
+
+#ifdef __s390x__
+    s390x_pci_mmio_write_32(addr, val);
+#else
+    /* Prevent the compiler from optimizing away the store */
+    *((volatile uint32_t *)addr) = val;
+#endif
+}
+
+static inline void nvme_mmio_write_64(void *addr, uint64_t val)
+{
+    val = cpu_to_le64(val);
+
+#ifdef __s390x__
+    s390x_pci_mmio_write_64(addr, val);
+#else
+    /* Prevent the compiler from optimizing away the store */
+    *((volatile uint64_t *)addr) = val;
+#endif
+}
+
 /* Returns true on success, false on failure. */
 static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
                             unsigned nentries, size_t entry_bytes, Error 
**errp)
@@ -292,7 +347,7 @@ static void nvme_kick(NVMeQueuePair *q)
     assert(!(q->sq.tail & 0xFF00));
     /* Fence the write to submission queue entry before notifying the device. 
*/
     smp_wmb();
-    *q->sq.doorbell = cpu_to_le32(q->sq.tail);
+    nvme_mmio_write_32(q->sq.doorbell, q->sq.tail);
     q->inflight += q->need_kick;
     q->need_kick = 0;
 }
@@ -441,7 +496,7 @@ static bool nvme_process_completion(NVMeQueuePair *q)
     if (progress) {
         /* Notify the device so it can post more completions. */
         smp_mb_release();
-        *q->cq.doorbell = cpu_to_le32(q->cq.head);
+        nvme_mmio_write_32(q->cq.doorbell, q->cq.head);
         nvme_wake_free_req_locked(q);
     }
 
@@ -460,7 +515,7 @@ static void nvme_process_completion_bh(void *opaque)
      * so notify the device that it has space to fill in more completions now.
      */
     smp_mb_release();
-    *q->cq.doorbell = cpu_to_le32(q->cq.head);
+    nvme_mmio_write_32(q->cq.doorbell, q->cq.head);
     nvme_wake_free_req_locked(q);
 
     nvme_process_completion(q);
@@ -749,9 +804,10 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
     int ret;
     uint64_t cap;
     uint32_t ver;
+    uint32_t cc;
     uint64_t timeout_ms;
     uint64_t deadline, now;
-    volatile NvmeBar *regs = NULL;
+    NvmeBar *regs = NULL;
 
     qemu_co_mutex_init(&s->dma_map_lock);
     qemu_co_queue_init(&s->dma_flush_queue);
@@ -779,7 +835,7 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
     /* Perform initialize sequence as described in NVMe spec "7.6.1
      * Initialization". */
 
-    cap = le64_to_cpu(regs->cap);
+    cap = nvme_mmio_read_64(&regs->cap);
     trace_nvme_controller_capability_raw(cap);
     trace_nvme_controller_capability("Maximum Queue Entries Supported",
                                      1 + NVME_CAP_MQES(cap));
@@ -805,16 +861,17 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
     bs->bl.request_alignment = s->page_size;
     timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000);
 
-    ver = le32_to_cpu(regs->vs);
+    ver = nvme_mmio_read_32(&regs->vs);
     trace_nvme_controller_spec_version(extract32(ver, 16, 16),
                                        extract32(ver, 8, 8),
                                        extract32(ver, 0, 8));
 
     /* Reset device to get a clean state. */
-    regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
+    cc = nvme_mmio_read_32(&regs->cc);
+    nvme_mmio_write_32(&regs->cc, (cc & 0xFE));
     /* Wait for CSTS.RDY = 0. */
     deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
-    while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
+    while (NVME_CSTS_RDY(nvme_mmio_read_32(&regs->csts))) {
         if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
             error_setg(errp, "Timeout while waiting for device to reset (%"
                              PRId64 " ms)",
@@ -843,19 +900,21 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
     s->queues[INDEX_ADMIN] = q;
     s->queue_count = 1;
     QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000);
-    regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) |
-                            ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT));
-    regs->asq = cpu_to_le64(q->sq.iova);
-    regs->acq = cpu_to_le64(q->cq.iova);
+    nvme_mmio_write_32(&regs->aqa,
+                       ((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) |
+                       ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT));
+    nvme_mmio_write_64(&regs->asq, q->sq.iova);
+    nvme_mmio_write_64(&regs->acq, q->cq.iova);
 
     /* After setting up all control registers we can enable device now. */
-    regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
-                           (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) |
-                           CC_EN_MASK);
+    nvme_mmio_write_32(&regs->cc,
+                    ((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
+                    (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) |
+                    CC_EN_MASK));
     /* Wait for CSTS.RDY = 1. */
     now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     deadline = now + timeout_ms * SCALE_MS;
-    while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
+    while (!NVME_CSTS_RDY(nvme_mmio_read_32(&regs->csts))) {
         if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
             error_setg(errp, "Timeout while waiting for device to start (%"
                              PRId64 " ms)",
-- 
2.43.0


Reply via email to