Re: [Qemu-devel] [PATCH v3 2/6] block: Add VFIO based NVMe driver

Fam Zheng Tue, 11 Jul 2017 19:15:30 -0700

On Mon, 07/10 15:55, Stefan Hajnoczi wrote:
> On Wed, Jul 05, 2017 at 09:36:31PM +0800, Fam Zheng wrote:
> > +static bool nvme_identify(BlockDriverState *bs, int namespace, Error 
> > **errp)
> > +{
> > +    BDRVNVMeState *s = bs->opaque;
> > +    uint8_t *resp;
> > +    int r;
> > +    uint64_t iova;
> > +    NvmeCmd cmd = {
> > +        .opcode = NVME_ADM_CMD_IDENTIFY,
> > +        .cdw10 = cpu_to_le32(0x1),
> > +    };
> > +
> > +    resp = qemu_try_blockalign0(bs, 4096);
> 
> Is it possible to use struct NvmeIdCtrl to make this code clearer and
> eliminate the hardcoded sizes/offsets?


Yes, will do.

> 
> > +    if (!resp) {
> > +        error_setg(errp, "Cannot allocate buffer for identify response");
> > +        return false;
> > +    }
> > +    r = nvme_vfio_dma_map(s->vfio, resp, 4096, true, &iova);
> > +    if (r) {
> > +        error_setg(errp, "Cannot map buffer for DMA");
> > +        goto fail;
> > +    }
> > +    cmd.prp1 = cpu_to_le64(iova);
> > +
> > +    if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
> > +        error_setg(errp, "Failed to identify controller");
> > +        goto fail;
> > +    }
> > +
> > +    if (le32_to_cpu(*(uint32_t *)&resp[516]) < namespace) {
> > +        error_setg(errp, "Invalid namespace");
> > +        goto fail;
> > +    }
> > +    s->write_cache = le32_to_cpu(resp[525]) & 0x1;
> > +    s->max_transfer = (resp[77] ? 1 << resp[77] : 0) * s->page_size;
> > +    /* For now the page list buffer per command is one page, to hold at 
> > most
> > +     * s->page_size / sizeof(uint64_t) entries. */
> > +    s->max_transfer = MIN_NON_ZERO(s->max_transfer,
> > +                          s->page_size / sizeof(uint64_t) * s->page_size);
> > +
> > +    memset((char *)resp, 0, 4096);
> > +
> > +    cmd.cdw10 = 0;
> > +    cmd.nsid = namespace;
> > +    if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
> > +        error_setg(errp, "Failed to identify namespace");
> > +        goto fail;
> > +    }
> > +
> > +    s->nsze = le64_to_cpu(*(uint64_t *)&resp[0]);
> > +
> > +    nvme_vfio_dma_unmap(s->vfio, resp);
> > +    qemu_vfree(resp);
> > +    return true;
> > +fail:
> > +    qemu_vfree(resp);
> > +    return false;
> 
> nvme_vfio_dma_unmap() is not called in the error path.

Will fix.

> 
> > +static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd 
> > *cmd,
> > +                                          NVMeRequest *req, QEMUIOVector 
> > *qiov)
> > +{
> > +    BDRVNVMeState *s = bs->opaque;
> > +    uint64_t *pagelist = req->prp_list_page;
> > +    int i, j, r;
> > +    int entries = 0;
> > +
> > +    assert(qiov->size);
> > +    assert(QEMU_IS_ALIGNED(qiov->size, s->page_size));
> > +    assert(qiov->size / s->page_size <= s->page_size / sizeof(uint64_t));
> > +    for (i = 0; i < qiov->niov; ++i) {
> > +        bool retry = true;
> > +        uint64_t iova;
> > +        qemu_co_mutex_lock(&s->dma_map_lock);
> > +try_map:
> > +        r = nvme_vfio_dma_map(s->vfio,
> > +                              qiov->iov[i].iov_base,
> > +                              qiov->iov[i].iov_len,
> > +                              true, &iova);
> > +        if (r == -ENOMEM && retry) {
> > +            retry = false;
> > +            trace_nvme_dma_flush_queue_wait(s);
> > +            if (s->inflight) {
> > +                trace_nvme_dma_map_flush(s);
> > +                qemu_co_queue_wait(&s->dma_flush_queue, &s->dma_map_lock);
> > +            } else {
> > +                r = nvme_vfio_dma_reset_temporary(s->vfio);
> > +                if (r) {
> > +                    return r;
> 
> dma_map_lock is held here!

Oops, will fix.

> 
> > +static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t 
> > bytes,
> > +                       QEMUIOVector *qiov, bool is_write, int flags)
> > +{
> > +    BDRVNVMeState *s = bs->opaque;
> > +    int r;
> > +    uint8_t *buf = NULL;
> > +    QEMUIOVector local_qiov;
> > +
> > +    assert(QEMU_IS_ALIGNED(offset, s->page_size));
> > +    assert(QEMU_IS_ALIGNED(bytes, s->page_size));
> > +    assert(bytes <= s->max_transfer);
> 
> Who guarantees max_transfer?  I think request alignment is enforced by
> block/io.c but there is no generic max_transfer handling code, so this
> assertion can be triggered by the guest.  Please handle it as a genuine
> request error instead of using an assertion.

There has been one since 04ed95f4843281e292d93018d56d4b14705f9f2c, see the code
around max_transfer in block/io.c:bdrv_aligned_*.

> 
> > +static int nvme_reopen_prepare(BDRVReopenState *reopen_state,
> > +                               BlockReopenQueue *queue, Error **errp)
> > +{
> > +    return 0;
> > +}
> 
> What is the purpose of this dummy .bdrv_reopen_prepare() implementation?

This is necessary for block jobs to work, other drivers provides dummy
implementations as well.

Fam

Re: [Qemu-devel] [PATCH v3 2/6] block: Add VFIO based NVMe driver

Reply via email to