On 09/18/2012 06:16 AM, Alex Williamson wrote:
> To emulate level triggered interrupts, add a resample option to
> KVM_IRQFD.  When specified, a new resamplefd is provided that notifies
> the user when the irqchip has been resampled by the VM.  This may, for
> instance, indicate an EOI.  Also in this mode, injection of an

s/inject/post (or queue)/ everywhere, please.  'Inject' implies
something immediate, this is very asynchronous both in terms of host and
guest behaviour.  Yes, the existing code is guilty too.

> interrupt through an irqfd only asserts the interrupt.  On resampling,
> the interrupt is automatically de-asserted prior to user notification.
> This enables level triggered interrupts to be injected and re-enabled
> from vfio with no userspace intervention.
> 
> All resampling irqfds can make use of a single irq source ID, so we
> reserve a new one for this interface.
> 
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index bf33aaa..d30af74 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -1946,6 +1946,19 @@ the guest using the specified gsi pin.  The irqfd is 
> removed using
>  the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
>  and kvm_irqfd.gsi.
>  
> +With KVM_CAP_IRQFD_RESAMPLE, KVM_IRQFD supports a de-assert and notify
> +mechanism allowing emulation of level-triggered, irqfd-based
> +interrupts.  When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an
> +additional eventfd in the kvm_irqfd.resamplefd field.  When operating
> +in resample mode, injection of an interrupt through kvm_irq.fd asserts
> +the specified gsi in the irqchip.  When the irqchip is resampled, such
> +as from an EOI, the gsi is de-asserted and the user is notifed via
> +kvm_irqfd.resamplefd.  It is the user's respnosibility to re-inject

responsibility.

> +the interrupt if the device making use of it still requires service.
> +Note that closing the resamplefd is not sufficient to disable the
> +irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
> +and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
> +
>       raw_spin_lock_init(&kvm->arch.tsc_write_lock);
>  
>  
>  struct kvm;
>  struct kvm_vcpu;
> @@ -283,6 +284,7 @@ struct kvm {
>       struct {
>               spinlock_t        lock;
>               struct list_head  items;
> +             struct list_head  resamplers;
>       } irqfds;
>       struct list_head ioeventfds;
>  #endif
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 7d7e2aa..822f50a 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -43,6 +43,31 @@
>   * --------------------------------------------------------------------
>   */
>  
> +/*
> + * Resamper irqfds are a special variety of irqfds used to emulate

Resampler (or resampling?)

> + * level triggered interrupts.  The interrupt is asserted on eventfd
> + * trigger.  On acknowledgement through the irq ack notifier, the
> + * interrupt is de-asserted and userspace is notified through the
> + * resamplefd.  All resamplers on the same gsi are de-asserted
> + * together, so we don't need to track the state of each individual
> + * user.  We can also therefore share the same irq source ID.
> + */
> +struct _irqfd_resampler {
> +     struct kvm *kvm;
> +     /*
> +      * List of resampling irqfds sharing this gsi.
> +      * RCU list modified under kvm->irqfds.lock
> +      */
> +     struct list_head irqfds;

Specify the type.

> +     /* The irq ack notifier for this gsi */
> +     struct kvm_irq_ack_notifier notifier;
> +     /*
> +      * Entry in list of kvm->irqfd.resamplers for shutdown cleanup.
> +      * Accessed and modified under kvm->irqfds.lock
> +      */
> +     struct list_head list;

Usually we call those links, (and the real heads, list).

> +};
> +
>  
> +
> +/*
> + * Since resampler irqfds share an IRQ source ID, we de-assert once
> + * then notify all of the resampler irqfds using this GSI.  We can't
> + * do multiple de-asserts or we risk racing with incoming re-asserts.
> + */
> +static void
> +irqfd_resampler_notify(struct kvm_irq_ack_notifier *kian)
> +{
> +     struct _irqfd_resampler *resampler;
> +     struct _irqfd *irqfd;
> +
> +     resampler = container_of(kian, struct _irqfd_resampler, notifier);
> +
> +     rcu_read_lock();
> +
> +     kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
> +                 resampler->notifier.gsi, 0);

Could be outside the rcu read-side critical section, no?

> +
> +     list_for_each_entry_rcu(irqfd, &resampler->irqfds, resampler_list)
> +             eventfd_signal(irqfd->resamplefd, 1);
> +
> +     rcu_read_unlock();
> +}
> +
>  /*
>   * Race-free decouple logic (ordering is critical)
>   */
> @@ -92,6 +156,43 @@ irqfd_shutdown(struct work_struct *work)
>        */
>       flush_work_sync(&irqfd->inject);
>  
> +     if (irqfd->resampler) {
> +             struct _irqfd_resampler *resampler = irqfd->resampler;
> +             struct kvm *kvm = resampler->kvm;
> +
> +             mutex_lock(&kvm->irq_lock);
> +             spin_lock_irq(&irqfd->kvm->irqfds.lock);
> +
> +             list_del_rcu(&irqfd->resampler_list);
> +
> +             /*
> +              * On removal of the last irqfd in the resampler list,
> +              * remove the resampler and unregister the irq ack
> +              * notifier.  It's possible to race the ack of the final
> +              * injection here, so manually de-assert the gsi to avoid
> +              * leaving an unmanaged, asserted interrupt line.
> +              */
> +             if (list_empty(&resampler->irqfds)) {
> +                     list_del(&resampler->list);
> +                     __kvm_unregister_irq_ack_notifier(kvm,
> +                                                       &resampler->notifier);
> +                     kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
> +                                 resampler->notifier.gsi, 0);
> +                     kfree(resampler);

Is this rcu safe?

> +             }
> +
> +             spin_unlock_irq(&irqfd->kvm->irqfds.lock);
> +             mutex_unlock(&kvm->irq_lock);
> +
> +             /*
> +              * Both list_del_rcu & __kvm_unregister_irq_ack_notifier
> +              * require an rcu grace period/
> +              */
> +             synchronize_rcu();

Quite ugly to expose the internals this way.

> +
> +             eventfd_ctx_put(irqfd->resamplefd);
> +     }
> +
>       /*
>        * It is now safe to release the object's resources
>        */
> @@ -202,8 +303,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>  {
>       struct kvm_irq_routing_table *irq_rt;
>       struct _irqfd *irqfd, *tmp;
> +     struct _irqfd_resampler *resampler = NULL;
>       struct file *file = NULL;
> -     struct eventfd_ctx *eventfd = NULL;
> +     struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
>       int ret;
>       unsigned int events;
>  
> @@ -214,7 +316,40 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>       irqfd->kvm = kvm;
>       irqfd->gsi = args->gsi;
>       INIT_LIST_HEAD(&irqfd->list);
> -     INIT_WORK(&irqfd->inject, irqfd_inject);
> +     INIT_LIST_HEAD(&irqfd->resampler_list);
> +
> +     if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
> +             resamplefd = eventfd_ctx_fdget(args->resamplefd);
> +             if (IS_ERR(resamplefd)) {
> +                     ret = PTR_ERR(resamplefd);
> +                     goto fail;
> +             }
> +
> +             irqfd->resamplefd = resamplefd;
> +
> +             /*
> +              * We may be able to share an existing resampler, but we
> +              * won't know that until we search under spinlock.  Setup
> +              * one here to avoid an atomic allocation later.
> +              */
> +             resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
> +             if (!resampler) {
> +                     ret = -ENOMEM;
> +                     goto fail;
> +             }
> +
> +             resampler->kvm = kvm;
> +             INIT_LIST_HEAD(&resampler->irqfds);
> +             resampler->notifier.gsi = irqfd->gsi;
> +             resampler->notifier.irq_acked = irqfd_resampler_notify;
> +             INIT_LIST_HEAD(&resampler->list);
> +
> +             irqfd->resampler = resampler;
> +
> +             INIT_WORK(&irqfd->inject, irqfd_resampler_inject);
> +     } else
> +             INIT_WORK(&irqfd->inject, irqfd_inject);
> +
>       INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
>  
>       file = eventfd_fget(args->fd);
> @@ -238,6 +373,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>       init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
>       init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
>  
> +     mutex_lock(&kvm->irq_lock);
>       spin_lock_irq(&kvm->irqfds.lock);
>  
>       ret = 0;
> @@ -247,9 +383,36 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>               /* This fd is used for another irq already. */
>               ret = -EBUSY;
>               spin_unlock_irq(&kvm->irqfds.lock);
> +             mutex_unlock(&kvm->irq_lock);
>               goto fail;
>       }
>  
> +     if (resampler) {
> +             struct _irqfd_resampler *resampler_tmp;
> +
> +             /*
> +              * Re-use a resampler if it's already registered on the
> +              * gsi we need.  Free the one we created above if found,
> +              * otherwise add to the list and setup the irq ack notifier.
> +              */
> +             list_for_each_entry(resampler_tmp,
> +                                 &kvm->irqfds.resamplers, list) {
> +                     if (resampler_tmp->notifier.gsi == irqfd->gsi) {
> +                             irqfd->resampler = resampler_tmp;
> +                             break;
> +                     }
> +             }
> +
> +             if (irqfd->resampler == resampler) {
> +                     list_add(&resampler->list, &kvm->irqfds.resamplers);
> +                     __kvm_register_irq_ack_notifier(kvm,
> +                                                     &resampler->notifier);
> +             } else
> +                     kfree(resampler);
> +
> +             list_add_rcu(&irqfd->resampler_list, &irqfd->resampler->irqfds);
> +     }
> +

Whoa.  Can't we put the resampler entry somewhere we don't need to
search for it?  Like a kvm_kernel_irq_routing_entry, that's indexed by
gsi already (kvm_irq_routing_table::rt_entries[gsi]).



-- 
error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to