at 12:35 PM, Jinhao Fan <fanjinhao...@ict.ac.cn> wrote:

> Use irqfd to directly notify KVM to inject interrupts. This is done by
> registering a virtual IRQ(virq) in KVM and associate the virq with an
> irqfd, so that KVM can directly inject the interrupt when it receives
> notification from the irqfd. This approach is supposed to improve 
> performance because it bypasses QEMU's MSI interrupt emulation logic.
> 
> However, I did not see an obvious improvement of the emulation KIOPS:
> 
> QD      1   4  16  64 
> QEMU   38 123 210 329
> irqfd  40 129 219 328
> 
> I found this problem quite hard to diagnose since irqfd's workflow
> involves both QEMU and the in-kernel KVM. 
> 
> Could you help me figure out the following questions:
> 
> 1. How much performance improvement can I expect from using irqfd?
> 2. How can I debug this kind of cross QEMU-KVM problems?
> 
> Signed-off-by: Jinhao Fan <fanjinhao...@ict.ac.cn>
> ---
> hw/nvme/ctrl.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++-
> hw/nvme/nvme.h |  3 +++
> 2 files changed, 68 insertions(+), 1 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 4b75c5f549..59768c4586 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -159,6 +159,7 @@
> #include "sysemu/sysemu.h"
> #include "sysemu/block-backend.h"
> #include "sysemu/hostmem.h"
> +#include "sysemu/kvm.h"
> #include "hw/pci/msix.h"
> #include "migration/vmstate.h"
> 
> @@ -484,12 +485,70 @@ static void nvme_irq_check(NvmeCtrl *n)
>     }
> }
> 
> +static int nvme_kvm_msix_vector_use(NvmeCtrl *n,
> +                                    NvmeCQueue *cq,
> +                                    int vector)
> +{
> +    int ret;
> +
> +    KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
> +    ret = kvm_irqchip_add_msi_route(&c, vector, &n->parent_obj);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +    kvm_irqchip_commit_route_changes(&c);
> +    cq->virq = ret;
> +    return 0;
> +}
> +
> +static int nvme_init_cq_irqfd(NvmeCQueue *cq)
> +{
> +    int ret;
> +    
> +    ret = nvme_kvm_msix_vector_use(cq->ctrl, cq, (int)cq->vector);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    ret = event_notifier_init(&cq->irq_notifier, 0);
> +    if (ret < 0) {
> +        goto fail_notifier;
> +    }
> +
> +    ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &cq->irq_notifier,
> +                                              NULL, cq->virq);
> +    if (ret < 0) {
> +        goto fail_kvm;
> +    }
> +
> +    return 0;
> +                        
> +fail_kvm:
> +    event_notifier_cleanup(&cq->irq_notifier);
> +fail_notifier:
> +    kvm_irqchip_release_virq(kvm_state, cq->virq);
> +fail:
> +    return ret;
> +}
> +
> static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
> {
>     if (cq->irq_enabled) {
>         if (msix_enabled(&(n->parent_obj))) {
> +            /* Initialize CQ irqfd */
> +            if (!cq->irqfd_enabled && n->params.ioeventfd && cq->cqid != 0) {
> +                int ret = nvme_init_cq_irqfd(cq);
> +                if (ret == 0) {
> +                    cq->irqfd_enabled = true;
> +                }
> +            }
> +
>             trace_pci_nvme_irq_msix(cq->vector);
> -            msix_notify(&(n->parent_obj), cq->vector);
> +            if (cq->irqfd_enabled) {
> +                event_notifier_set(&cq->irq_notifier);
> +            } else {
> +                msix_notify(&(n->parent_obj), cq->vector);
> +            }
>         } else {
>             trace_pci_nvme_irq_pin();
>             assert(cq->vector < 32);
> @@ -4670,6 +4729,12 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
>         event_notifier_cleanup(&cq->notifier);
>     }
>     if (msix_enabled(&n->parent_obj)) {
> +        if (cq->irqfd_enabled) {
> +            kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, 
> &cq->irq_notifier,
> +                                                  cq->virq);
> +            kvm_irqchip_release_virq(kvm_state, cq->virq);
> +            event_notifier_cleanup(&cq->irq_notifier);
> +        }
>         msix_vector_unuse(&n->parent_obj, cq->vector);
>     }
>     if (cq->cqid) {
> diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
> index 2a9beea0c8..84e5b00fe3 100644
> --- a/hw/nvme/nvme.h
> +++ b/hw/nvme/nvme.h
> @@ -391,7 +391,10 @@ typedef struct NvmeCQueue {
>     uint64_t    ei_addr;
>     QEMUTimer   *timer;
>     EventNotifier notifier;
> +    EventNotifier irq_notifier;
> +    int         virq;
>     bool        ioeventfd_enabled;
> +    bool        irqfd_enabled;
>     QTAILQ_HEAD(, NvmeSQueue) sq_list;
>     QTAILQ_HEAD(, NvmeRequest) req_list;
> } NvmeCQueue;
> -- 
> 2.25.1

Hi Stefan,

It seems you originally introduced irqfd to virtio-blk to solve thread
safety problems [1]. Could you help explain the benefits of irqfd?

Thanks!
Jinhao Fan

Reply via email to