This patch introduces a new KVM_DEV_VFIO_DEVICE attribute.

This is a new control channel which enables KVM to cooperate with
viable VFIO devices.

The kvm-vfio device now holds a list of devices (kvm_vfio_device)
in addition to a list of groups (kvm_vfio_group). The new
infrastructure enables to check the validity of the VFIO device
file descriptor, get and hold a reference to it.

The first concrete implemented command is IRQ forward control:
KVM_DEV_VFIO_DEVICE_FORWARD_IRQ, KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ.

It consists in programing the VFIO driver and KVM in a consistent manner
so that an optimized IRQ injection/completion is set up. Each
kvm_vfio_device holds a list of forwarded IRQ. When putting a
kvm_vfio_device, the implementation makes sure the forwarded IRQs
are set again in the normal handling state (non forwarded).

The forwarding programmming is architecture specific, embodied by the
kvm_arch_set_fwd_state function. Its implementation is given in a
separate patch file.

The forwarding control modality is enabled by the
__KVM_HAVE_ARCH_KVM_VFIO_FORWARD define.

Signed-off-by: Eric Auger <eric.au...@linaro.org>

---

v1 -> v2:
- __KVM_HAVE_ARCH_KVM_VFIO renamed into __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
- original patch file separated into 2 parts: generic part moved in vfio.c
  and ARM specific part(kvm_arch_set_fwd_state)
---
 include/linux/kvm_host.h |  27 +++
 virt/kvm/vfio.c          | 452 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 477 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a4c33b3..24350dc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1065,6 +1065,21 @@ struct kvm_device_ops {
                      unsigned long arg);
 };
 
+enum kvm_fwd_irq_action {
+       KVM_VFIO_IRQ_SET_FORWARD,
+       KVM_VFIO_IRQ_SET_NORMAL,
+       KVM_VFIO_IRQ_CLEANUP,
+};
+
+/* internal structure describing a forwarded IRQ */
+struct kvm_fwd_irq {
+       struct list_head link;
+       __u32 index; /* platform device irq index */
+       __u32 hwirq; /*physical IRQ */
+       __u32 gsi; /* virtual IRQ */
+       struct kvm_vcpu *vcpu; /* vcpu to inject into*/
+};
+
 void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
@@ -1075,6 +1090,18 @@ extern struct kvm_device_ops kvm_vfio_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_flic_ops;
 
+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
+int kvm_arch_set_fwd_state(struct kvm_fwd_irq *pfwd,
+                          enum kvm_fwd_irq_action action);
+
+#else
+static inline int kvm_arch_set_fwd_state(struct kvm_fwd_irq *pfwd,
+                                        enum kvm_fwd_irq_action action)
+{
+       return 0;
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 76dc7a1..e4a81c4 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -18,14 +18,24 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
+#include <linux/platform_device.h>
 
 struct kvm_vfio_group {
        struct list_head node;
        struct vfio_group *vfio_group;
 };
 
+struct kvm_vfio_device {
+       struct list_head node;
+       struct vfio_device *vfio_device;
+       /* list of forwarded IRQs for that VFIO device */
+       struct list_head fwd_irq_list;
+       int fd;
+};
+
 struct kvm_vfio {
        struct list_head group_list;
+       struct list_head device_list;
        struct mutex lock;
        bool noncoherent;
 };
@@ -246,12 +256,441 @@ static int kvm_vfio_set_group(struct kvm_device *dev, 
long attr, u64 arg)
        return -ENXIO;
 }
 
+/**
+ * get_vfio_device - returns the vfio-device corresponding to this fd
+ * @fd:fd of the vfio platform device
+ *
+ * checks it is a vfio device
+ * increment its ref counter
+ */
+static struct vfio_device *kvm_vfio_get_vfio_device(int fd)
+{
+       struct fd f;
+       struct vfio_device *vdev;
+
+       f = fdget(fd);
+       if (!f.file)
+               return NULL;
+       vdev = kvm_vfio_device_get_external_user(f.file);
+       fdput(f);
+       return vdev;
+}
+
+/**
+ * put_vfio_device: put the vfio platform device
+ * @vdev: vfio_device to put
+ *
+ * decrement the ref counter
+ */
+static void kvm_vfio_put_vfio_device(struct vfio_device *vdev)
+{
+       kvm_vfio_device_put_external_user(vdev);
+}
+
+/**
+ * kvm_vfio_find_device - look for the device in the assigned
+ * device list
+ * @kv: the kvm-vfio device
+ * @vdev: the vfio_device to look for
+ *
+ * returns the associated kvm_vfio_device if the device is known,
+ * meaning at least 1 IRQ is forwarded for this device.
+ * in the device is not registered, returns NULL.
+ */
+struct kvm_vfio_device *kvm_vfio_find_device(struct kvm_vfio *kv,
+                                            struct vfio_device *vdev)
+{
+       struct kvm_vfio_device *kvm_vdev_iter;
+
+       list_for_each_entry(kvm_vdev_iter, &kv->device_list, node) {
+               if (kvm_vdev_iter->vfio_device == vdev)
+                       return kvm_vdev_iter;
+       }
+       return NULL;
+}
+
+/**
+ * kvm_vfio_find_irq - look for a an irq in the device IRQ list
+ * @kvm_vdev: the kvm_vfio_device
+ * @irq_index: irq index
+ *
+ * returns the forwarded irq struct if it exists, NULL in the negative
+ */
+struct kvm_fwd_irq *kvm_vfio_find_irq(struct kvm_vfio_device *kvm_vdev,
+                                     int irq_index)
+{
+       struct kvm_fwd_irq *fwd_irq_iter;
+
+       list_for_each_entry(fwd_irq_iter, &kvm_vdev->fwd_irq_list, link) {
+               if (fwd_irq_iter->index == irq_index)
+                       return fwd_irq_iter;
+       }
+       return NULL;
+}
+
+/**
+ * validate_forward - checks whether forwarding a given IRQ is meaningful
+ * @vdev:  vfio_device the IRQ belongs to
+ * @fwd_irq: user struct containing the irq_index to forward
+ * @kvm_vdev: if a forwarded IRQ already exists for that VFIO device,
+ * kvm_vfio_device that holds it
+ * @hwirq: irq numberthe irq index corresponds to
+ *
+ * checks the vfio-device is a platform vfio device
+ * checks the irq_index corresponds to an actual hwirq and
+ * checks this hwirq is not already forwarded
+ * returns < 0 on following errors:
+ * not a platform device, bad irq index, already forwarded
+ */
+static int kvm_vfio_validate_forward(struct kvm_vfio *kv,
+                           struct vfio_device *vdev,
+                           struct kvm_arch_forwarded_irq *fwd_irq,
+                           struct kvm_vfio_device **kvm_vdev,
+                           int *hwirq)
+{
+       struct device *dev = kvm_vfio_external_base_device(vdev);
+       struct platform_device *platdev;
+
+       *hwirq = -1;
+       *kvm_vdev = NULL;
+       if (strcmp(dev->bus->name, "platform") == 0) {
+               platdev = to_platform_device(dev);
+               *hwirq = platform_get_irq(platdev, fwd_irq->index);
+               if (*hwirq < 0) {
+                       kvm_err("%s incorrect index\n", __func__);
+                       return -EINVAL;
+               }
+       } else {
+               kvm_err("%s not a platform device\n", __func__);
+               return -EINVAL;
+       }
+       /* is a ref to this device already owned by the KVM-VFIO device? */
+       *kvm_vdev = kvm_vfio_find_device(kv, vdev);
+       if (*kvm_vdev) {
+               if (kvm_vfio_find_irq(*kvm_vdev, fwd_irq->index)) {
+                       kvm_err("%s irq %d already forwarded\n",
+                               __func__, *hwirq);
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
+
+/**
+ * validate_unforward: check a deassignment is meaningful
+ * @kv: the kvm_vfio device
+ * @vdev: the vfio_device whose irq to deassign belongs to
+ * @fwd_irq: the user struct that contains the fd and irq_index of the irq
+ * @kvm_vdev: the kvm_vfio_device the forwarded irq belongs to, if
+ * it exists
+ *
+ * returns 0 if the provided irq effectively is forwarded
+ * (a ref to this vfio_device is hold and this irq belongs to
+ * the forwarded irq of this device)
+ * returns -EINVAL in the negative
+ */
+static int kvm_vfio_validate_unforward(struct kvm_vfio *kv,
+                             struct vfio_device *vdev,
+                             struct kvm_arch_forwarded_irq *fwd_irq,
+                             struct kvm_vfio_device **kvm_vdev)
+{
+       struct kvm_fwd_irq *pfwd;
+
+       *kvm_vdev = kvm_vfio_find_device(kv, vdev);
+       if (!kvm_vdev) {
+               kvm_err("%s no forwarded irq for this device\n", __func__);
+               return -EINVAL;
+       }
+       pfwd = kvm_vfio_find_irq(*kvm_vdev, fwd_irq->index);
+       if (!pfwd) {
+               kvm_err("%s irq %d is not forwarded\n", __func__, fwd_irq->fd);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/**
+ * kvm_vfio_forward - set a forwarded IRQ
+ * @kdev: the kvm device
+ * @vdev: the vfio device the IRQ belongs to
+ * @fwd_irq: the user struct containing the irq_index and guest irq
+ * @must_put: tells the caller whether the vfio_device must be put after
+ * the call (ref must be released in case a ref onto this device was
+ * already hold or in case of new device and failure)
+ *
+ * validate the injection, activate forward and store the information
+ * about which irq and which device is concerned so that on deassign or
+ * kvm-vfio destruction everuthing can be cleaned up.
+ */
+static int kvm_vfio_forward(struct kvm_device *kdev,
+                           struct vfio_device *vdev,
+                           struct kvm_arch_forwarded_irq *fwd_irq,
+                           bool *must_put)
+{
+       int ret;
+       struct kvm_fwd_irq *pfwd = NULL;
+       struct kvm_vfio_device *kvm_vdev = NULL;
+       struct kvm_vfio *kv = kdev->private;
+       int hwirq;
+
+       *must_put = true;
+       ret = kvm_vfio_validate_forward(kv, vdev, fwd_irq,
+                                       &kvm_vdev, &hwirq);
+       if (ret < 0)
+               return -EINVAL;
+
+       pfwd = kzalloc(sizeof(*pfwd), GFP_KERNEL);
+       if (!pfwd)
+               return -ENOMEM;
+       pfwd->index = fwd_irq->index;
+       pfwd->gsi = fwd_irq->gsi;
+       pfwd->hwirq = hwirq;
+       pfwd->vcpu = kvm_get_vcpu(kdev->kvm, 0);
+       ret = kvm_arch_set_fwd_state(pfwd, KVM_VFIO_IRQ_SET_FORWARD);
+       if (ret < 0) {
+               kvm_arch_set_fwd_state(pfwd, KVM_VFIO_IRQ_CLEANUP);
+               kfree(pfwd);
+               return ret;
+       }
+
+       if (!kvm_vdev) {
+               /* create & insert the new device and keep the ref */
+               kvm_vdev = kzalloc(sizeof(*kvm_vdev), GFP_KERNEL);
+               if (!kvm_vdev) {
+                       kvm_arch_set_fwd_state(pfwd, false);
+                       kfree(pfwd);
+                       return -ENOMEM;
+               }
+
+               kvm_vdev->vfio_device = vdev;
+               kvm_vdev->fd = fwd_irq->fd;
+               INIT_LIST_HEAD(&kvm_vdev->fwd_irq_list);
+               list_add(&kvm_vdev->node, &kv->device_list);
+               /*
+                * the only case where we keep the ref:
+                * new device and forward setting successful
+                */
+               *must_put = false;
+       }
+
+       list_add(&pfwd->link, &kvm_vdev->fwd_irq_list);
+
+       kvm_debug("forwarding set for fd=%d, hwirq=%d, gsi=%d\n",
+       fwd_irq->fd, hwirq, fwd_irq->gsi);
+
+       return 0;
+}
+
+/**
+ * remove_assigned_device - put a given device from the list
+ * @kv: the kvm-vfio device
+ * @vdev: the vfio-device to remove
+ *
+ * change the state of all forwarded IRQs, free the forwarded IRQ list,
+ * remove the corresponding kvm_vfio_device from the assigned device
+ * list.
+ * returns true if the device could be removed, false in the negative
+ */
+bool remove_assigned_device(struct kvm_vfio *kv,
+                           struct vfio_device *vdev)
+{
+       struct kvm_vfio_device *kvm_vdev_iter, *tmp_vdev;
+       struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq;
+       bool removed = false;
+       int ret;
+
+       list_for_each_entry_safe(kvm_vdev_iter, tmp_vdev,
+                                &kv->device_list, node) {
+               if (kvm_vdev_iter->vfio_device == vdev) {
+                       /* loop on all its forwarded IRQ */
+                       list_for_each_entry_safe(fwd_irq_iter, tmp_irq,
+                                                &kvm_vdev_iter->fwd_irq_list,
+                                                link) {
+                               ret = kvm_arch_set_fwd_state(fwd_irq_iter,
+                                               KVM_VFIO_IRQ_SET_NORMAL);
+                               if (ret < 0)
+                                       return ret;
+                               list_del(&fwd_irq_iter->link);
+                               kfree(fwd_irq_iter);
+                       }
+                       /* all IRQs could be deassigned */
+                       list_del(&kvm_vdev_iter->node);
+                       kvm_vfio_device_put_external_user(
+                               kvm_vdev_iter->vfio_device);
+                       kfree(kvm_vdev_iter);
+                       removed = true;
+                       break;
+               }
+       }
+       return removed;
+}
+
+
+/**
+ * remove_fwd_irq - remove a forwarded irq
+ *
+ * @kv: kvm-vfio device
+ * kvm_vdev: the kvm_vfio_device the IRQ belongs to
+ * irq_index: the index of the IRQ
+ *
+ * change the forwarded state of the IRQ, remove the IRQ from
+ * the device forwarded IRQ list. In case it is the last one,
+ * put the device
+ */
+int remove_fwd_irq(struct kvm_vfio *kv,
+                  struct kvm_vfio_device *kvm_vdev,
+                  int irq_index)
+{
+       struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq;
+       int ret = -1;
+
+       list_for_each_entry_safe(fwd_irq_iter, tmp_irq,
+                                &kvm_vdev->fwd_irq_list, link) {
+               if (fwd_irq_iter->index == irq_index) {
+                       ret = kvm_arch_set_fwd_state(fwd_irq_iter,
+                                               KVM_VFIO_IRQ_SET_NORMAL);
+                       if (ret < 0)
+                               break;
+                       list_del(&fwd_irq_iter->link);
+                       kfree(fwd_irq_iter);
+                       ret = 0;
+                       break;
+               }
+       }
+       if (list_empty(&kvm_vdev->fwd_irq_list))
+               remove_assigned_device(kv, kvm_vdev->vfio_device);
+
+       return ret;
+}
+
+/**
+ * kvm_vfio_unforward - remove a forwarded IRQ
+ * @kdev: the kvm device
+ * @vdev: the vfio_device
+ * @fwd_irq: user struct
+ * after checking this IRQ effectively is forwarded, change its state,
+ * remove it from the corresponding kvm_vfio_device list
+ */
+static int kvm_vfio_unforward(struct kvm_device *kdev,
+                                    struct vfio_device *vdev,
+                                    struct kvm_arch_forwarded_irq *fwd_irq)
+{
+       struct kvm_vfio *kv = kdev->private;
+       struct kvm_vfio_device *kvm_vdev;
+       int ret;
+
+       ret = kvm_vfio_validate_unforward(kv, vdev, fwd_irq, &kvm_vdev);
+       if (ret < 0)
+               return -EINVAL;
+
+       ret = remove_fwd_irq(kv, kvm_vdev, fwd_irq->index);
+       if (ret < 0)
+               kvm_err("%s fail unforwarding (fd=%d, index=%d)\n",
+                       __func__, fwd_irq->fd, fwd_irq->index);
+       else
+               kvm_debug("%s unforwarding IRQ (fd=%d, index=%d)\n",
+                         __func__, fwd_irq->fd, fwd_irq->index);
+       return ret;
+}
+
+
+
+
+/**
+ * kvm_vfio_set_device - the top function for interracting with a vfio
+ * device
+ */
+static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
+{
+       struct kvm_vfio *kv = kdev->private;
+       struct vfio_device *vdev;
+       struct kvm_arch_forwarded_irq fwd_irq; /* user struct */
+       int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
+
+       switch (attr) {
+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
+       case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:{
+               bool must_put;
+               int ret;
+
+               if (copy_from_user(&fwd_irq, argp, sizeof(fwd_irq)))
+                       return -EFAULT;
+               vdev = kvm_vfio_get_vfio_device(fwd_irq.fd);
+               if (IS_ERR(vdev))
+                       return PTR_ERR(vdev);
+               mutex_lock(&kv->lock);
+               ret = kvm_vfio_forward(kdev, vdev, &fwd_irq, &must_put);
+               if (must_put)
+                       kvm_vfio_put_vfio_device(vdev);
+               mutex_unlock(&kv->lock);
+               return ret;
+               }
+       case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: {
+               int ret;
+
+               if (copy_from_user(&fwd_irq, argp, sizeof(fwd_irq)))
+                       return -EFAULT;
+               vdev = kvm_vfio_get_vfio_device(fwd_irq.fd);
+               if (IS_ERR(vdev))
+                       return PTR_ERR(vdev);
+
+               kvm_vfio_device_put_external_user(vdev);
+               mutex_lock(&kv->lock);
+               ret = kvm_vfio_unforward(kdev, vdev, &fwd_irq);
+               mutex_unlock(&kv->lock);
+               return ret;
+       }
+#endif
+       default:
+               return -ENXIO;
+       }
+}
+
+/**
+ * kvm_vfio_put_all_devices - cancel forwarded IRQs and put all devices
+ * @kv: kvm-vfio device
+ *
+ * loop on all got devices and their associated forwarded IRQs
+ * restore the non forwarded state, remove IRQs and their devices from
+ * the respective list, put the vfio platform devices
+ *
+ * When this function is called, the vcpu already are destroyed. No
+ * vgic manipulation can happen hence the KVM_VFIO_IRQ_CLEANUP
+ * kvm_arch_set_fwd_state action
+ */
+int kvm_vfio_put_all_devices(struct kvm_vfio *kv)
+{
+       struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq;
+       struct kvm_vfio_device *kvm_vdev_iter, *tmp_vdev;
+
+       /* loop on all the assigned devices */
+       list_for_each_entry_safe(kvm_vdev_iter, tmp_vdev,
+                                &kv->device_list, node) {
+
+               /* loop on all its forwarded IRQ */
+               list_for_each_entry_safe(fwd_irq_iter, tmp_irq,
+                                        &kvm_vdev_iter->fwd_irq_list, link) {
+                       kvm_arch_set_fwd_state(fwd_irq_iter,
+                                               KVM_VFIO_IRQ_CLEANUP);
+                       list_del(&fwd_irq_iter->link);
+                       kfree(fwd_irq_iter);
+               }
+               list_del(&kvm_vdev_iter->node);
+               kvm_vfio_device_put_external_user(kvm_vdev_iter->vfio_device);
+               kfree(kvm_vdev_iter);
+       }
+       return 0;
+}
+
+
 static int kvm_vfio_set_attr(struct kvm_device *dev,
                             struct kvm_device_attr *attr)
 {
        switch (attr->group) {
        case KVM_DEV_VFIO_GROUP:
                return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+       case KVM_DEV_VFIO_DEVICE:
+               return kvm_vfio_set_device(dev, attr->attr, attr->addr);
        }
 
        return -ENXIO;
@@ -267,10 +706,17 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
                case KVM_DEV_VFIO_GROUP_DEL:
                        return 0;
                }
-
                break;
+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
+       case KVM_DEV_VFIO_DEVICE:
+               switch (attr->attr) {
+               case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
+               case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
+                       return 0;
+               }
+               break;
+#endif
        }
-
        return -ENXIO;
 }
 
@@ -284,6 +730,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
                list_del(&kvg->node);
                kfree(kvg);
        }
+       kvm_vfio_put_all_devices(kv);
 
        kvm_vfio_update_coherency(dev);
 
@@ -306,6 +753,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
                return -ENOMEM;
 
        INIT_LIST_HEAD(&kv->group_list);
+       INIT_LIST_HEAD(&kv->device_list);
        mutex_init(&kv->lock);
 
        dev->private = kv;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to