Add support for MSI and MSI-X interrupt modes:
   - Interrupt mode selection order is:
        INT#X (for backward compatibility) -> MSI-X -> MSI.
   - Add ioctl() commands:
      - UIO_PCI_GENERIC_INT_MODE_GET: query the current interrupt mode.
      - UIO_PCI_GENERIC_IRQ_NUM_GET: query the maximum number of IRQs.
      - UIO_PCI_GENERIC_IRQ_SET: bind the IRQ to eventfd (similar to vfio).
   - Add mappings to all bars (memory and portio): some devices have
     registers related to MSI/MSI-X handling outside BAR0.

Signed-off-by: Vlad Zolotarov <vl...@cloudius-systems.com>
---
New in v3:
   - Add __iomem qualifier to temp buffer receiving ioremap value.

New in v2:
   - Added #include <linux/uaccess.h> to uio_pci_generic.c

Signed-off-by: Vlad Zolotarov <vl...@cloudius-systems.com>
---
 drivers/uio/uio_pci_generic.c   | 410 +++++++++++++++++++++++++++++++++++++---
 include/linux/uio_pci_generic.h |  36 ++++
 2 files changed, 423 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/uio_pci_generic.h

diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
index d0b508b..6b8b1789 100644
--- a/drivers/uio/uio_pci_generic.c
+++ b/drivers/uio/uio_pci_generic.c
@@ -22,16 +22,32 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/msi.h>
 #include <linux/slab.h>
 #include <linux/uio_driver.h>
+#include <linux/uio_pci_generic.h>
+#include <linux/eventfd.h>
+#include <linux/uaccess.h>
 
 #define DRIVER_VERSION "0.01.0"
 #define DRIVER_AUTHOR  "Michael S. Tsirkin <m...@redhat.com>"
 #define DRIVER_DESC    "Generic UIO driver for PCI 2.3 devices"
 
+struct msix_info {
+       int num_irqs;
+       struct msix_entry *table;
+       struct uio_msix_irq_ctx {
+               struct eventfd_ctx *trigger;    /* MSI-x vector to eventfd */
+               char *name;                     /* name in /proc/interrupts */
+       } *ctx;
+};
+
 struct uio_pci_generic_dev {
        struct uio_info info;
        struct pci_dev *pdev;
+       struct mutex msix_state_lock;           /* ioctl mutex */
+       enum uio_int_mode int_mode;
+       struct msix_info msix;
 };
 
 static inline struct uio_pci_generic_dev *
@@ -40,9 +56,177 @@ to_uio_pci_generic_dev(struct uio_info *info)
        return container_of(info, struct uio_pci_generic_dev, info);
 }
 
-/* Interrupt handler. Read/modify/write the command register to disable
- * the interrupt. */
-static irqreturn_t irqhandler(int irq, struct uio_info *info)
+/* Unmap previously ioremap'd resources */
+static void release_iomaps(struct uio_pci_generic_dev *gdev)
+{
+       int i;
+       struct uio_mem *mem = gdev->info.mem;
+
+       for (i = 0; i < MAX_UIO_MAPS; i++, mem++) {
+               if (mem->internal_addr) {
+                       iounmap(mem->internal_addr);
+                       mem->internal_addr = NULL;
+               }
+       }
+}
+
+static int setup_maps(struct pci_dev *pdev, struct uio_info *info)
+{
+       int i, m = 0, p = 0, err;
+       static const char * const bar_names[] = {
+               "BAR0", "BAR1", "BAR2", "BAR3", "BAR4", "BAR5",
+       };
+
+       for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
+               unsigned long start = pci_resource_start(pdev, i);
+               unsigned long flags = pci_resource_flags(pdev, i);
+               unsigned long len = pci_resource_len(pdev, i);
+
+               if (start == 0 || len == 0)
+                       continue;
+
+               if (flags & IORESOURCE_MEM) {
+                       void __iomem *addr;
+
+                       if (m >= MAX_UIO_MAPS)
+                               continue;
+
+                       addr = ioremap(start, len);
+                       if (addr == NULL) {
+                               err = -EINVAL;
+                               goto fail;
+                       }
+
+                       info->mem[m].name = bar_names[i];
+                       info->mem[m].addr = start;
+                       info->mem[m].internal_addr = addr;
+                       info->mem[m].size = len;
+                       info->mem[m].memtype = UIO_MEM_PHYS;
+                       ++m;
+               } else if (flags & IORESOURCE_IO) {
+                       if (p >= MAX_UIO_PORT_REGIONS)
+                               continue;
+
+                       info->port[p].name = bar_names[i];
+                       info->port[p].start = start;
+                       info->port[p].size = len;
+                       info->port[p].porttype = UIO_PORT_X86;
+                       ++p;
+               }
+       }
+
+       return 0;
+fail:
+       for (i = 0; i < m; i++) {
+               iounmap(info->mem[i].internal_addr);
+               info->mem[i].internal_addr = NULL;
+       }
+
+       return err;
+}
+
+static irqreturn_t msix_irqhandler(int irq, void *arg);
+
+/* set the mapping between vector # and existing eventfd. */
+static int set_irq_eventfd(struct uio_pci_generic_dev *gdev, int vec, int fd)
+{
+       struct uio_msix_irq_ctx *ctx;
+       struct eventfd_ctx *trigger;
+       struct pci_dev *pdev = gdev->pdev;
+       int irq, err;
+
+       if (vec >= gdev->msix.num_irqs) {
+               dev_notice(&gdev->pdev->dev, "vec %u >= num_vec %u\n",
+                          vec, gdev->msix.num_irqs);
+               return -ERANGE;
+       }
+
+       irq = gdev->msix.table[vec].vector;
+
+       /* Cleanup existing irq mapping */
+       ctx = &gdev->msix.ctx[vec];
+       if (ctx->trigger) {
+               free_irq(irq, ctx->trigger);
+               eventfd_ctx_put(ctx->trigger);
+               ctx->trigger = NULL;
+       }
+
+       /* Passing -1 is used to disable interrupt */
+       if (fd < 0)
+               return 0;
+
+
+       trigger = eventfd_ctx_fdget(fd);
+       if (IS_ERR(trigger)) {
+               err = PTR_ERR(trigger);
+               dev_notice(&gdev->pdev->dev,
+                          "eventfd ctx get failed: %d\n", err);
+               return err;
+       }
+
+       err = request_irq(irq, msix_irqhandler, 0, ctx->name, trigger);
+       if (err) {
+               dev_notice(&pdev->dev, "request irq failed: %d\n", err);
+               eventfd_ctx_put(trigger);
+               return err;
+       }
+
+       dev_dbg(&pdev->dev, "map vector %u to fd %d trigger %p\n",
+               vec, fd, trigger);
+       ctx->trigger = trigger;
+
+       return 0;
+}
+
+static int uio_pci_generic_ioctl(struct uio_info *info, unsigned int cmd,
+                                unsigned long arg)
+{
+       struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
+       struct uio_pci_generic_irq_set hdr;
+       int err;
+
+       switch (cmd) {
+       case UIO_PCI_GENERIC_IRQ_SET:
+               if (copy_from_user(&hdr, (void __user *)arg, sizeof(hdr)))
+                       return -EFAULT;
+
+               /* Locking is needed to ensure two things:
+                *  1) Two IRQ_SET ioctl()'s are not running in parallel.
+                *  2) IRQ_SET ioctl() is not running in parallel with remove().
+                */
+               mutex_lock(&gdev->msix_state_lock);
+               if (gdev->int_mode != UIO_INT_MODE_MSIX) {
+                       mutex_unlock(&gdev->msix_state_lock);
+                       return -EOPNOTSUPP;
+               }
+
+               err = set_irq_eventfd(gdev, hdr.vec, hdr.fd);
+               mutex_unlock(&gdev->msix_state_lock);
+
+               break;
+       case UIO_PCI_GENERIC_IRQ_NUM_GET:
+               if (gdev->int_mode == UIO_INT_MODE_NONE)
+                       err = put_user(0, (u32 __user *)arg);
+               else if (gdev->int_mode != UIO_INT_MODE_MSIX)
+                       err = put_user(1, (u32 __user *)arg);
+               else
+                       err = put_user(gdev->msix.num_irqs,
+                                      (u32 __user *)arg);
+
+               break;
+       case UIO_PCI_GENERIC_INT_MODE_GET:
+               err = put_user(gdev->int_mode, (u32 __user *)arg);
+
+               break;
+       default:
+               err = -EOPNOTSUPP;
+       }
+
+       return err;
+}
+
+/* INT#X interrupt handler. */
+static irqreturn_t intx_irqhandler(int irq, struct uio_info *info)
 {
        struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
 
@@ -53,8 +237,162 @@ static irqreturn_t irqhandler(int irq, struct uio_info 
*info)
        return IRQ_HANDLED;
 }
 
-static int probe(struct pci_dev *pdev,
-                          const struct pci_device_id *id)
+/* MSI interrupt handler. */
+static irqreturn_t msi_irqhandler(int irq, struct uio_info *info)
+{
+       /* UIO core will signal the user process. */
+       return IRQ_HANDLED;
+}
+
+/* MSI-X interrupt handler. */
+static irqreturn_t msix_irqhandler(int irq, void *arg)
+{
+       struct eventfd_ctx *trigger = arg;
+
+       pr_devel("irq %u trigger %p\n", irq, trigger);
+
+       eventfd_signal(trigger, 1);
+       return IRQ_HANDLED;
+}
+
+static bool enable_intx(struct uio_pci_generic_dev *gdev)
+{
+       struct pci_dev *pdev = gdev->pdev;
+
+       if (!pdev->irq || !pci_intx_mask_supported(pdev))
+               return false;
+
+       gdev->int_mode = UIO_INT_MODE_INTX;
+       gdev->info.irq = pdev->irq;
+       gdev->info.irq_flags = IRQF_SHARED;
+       gdev->info.handler = intx_irqhandler;
+
+       return true;
+}
+
+static void set_pci_master(struct pci_dev *pdev)
+{
+       pci_set_master(pdev);
+       dev_warn(&pdev->dev, "Enabling PCI bus mastering. Bogus userspace 
application is able to trash kernel memory using DMA");
+       add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+}
+
+static bool enable_msi(struct uio_pci_generic_dev *gdev)
+{
+       struct pci_dev *pdev = gdev->pdev;
+
+       set_pci_master(pdev);
+
+       if (pci_enable_msi(pdev))
+               return false;
+
+       gdev->int_mode = UIO_INT_MODE_MSI;
+       gdev->info.irq = pdev->irq;
+       gdev->info.irq_flags = 0;
+       gdev->info.handler = msi_irqhandler;
+
+       return true;
+}
+
+static bool enable_msix(struct uio_pci_generic_dev *gdev)
+{
+       struct pci_dev *pdev = gdev->pdev;
+       int i, vectors = pci_msix_vec_count(pdev);
+
+       if (vectors <= 0)
+               return false;
+
+       gdev->msix.table = kcalloc(vectors, sizeof(struct msix_entry),
+                                  GFP_KERNEL);
+       if (!gdev->msix.table) {
+               dev_err(&pdev->dev, "Failed to allocate memory for MSI-X 
table");
+               return false;
+       }
+
+       gdev->msix.ctx = kcalloc(vectors, sizeof(struct uio_msix_irq_ctx),
+                                GFP_KERNEL);
+       if (!gdev->msix.ctx) {
+               dev_err(&pdev->dev, "Failed to allocate memory for MSI-X 
contexts");
+               goto err_ctx_alloc;
+       }
+
+       for (i = 0; i < vectors; i++) {
+               gdev->msix.table[i].entry = i;
+               gdev->msix.ctx[i].name = kasprintf(GFP_KERNEL,
+                                                  KBUILD_MODNAME "[%d](%s)",
+                                                  i, pci_name(pdev));
+               if (!gdev->msix.ctx[i].name)
+                       goto err_name_alloc;
+       }
+
+       set_pci_master(pdev);
+
+       if (pci_enable_msix(pdev, gdev->msix.table, vectors))
+               goto err_msix_enable;
+
+       gdev->int_mode = UIO_INT_MODE_MSIX;
+       gdev->info.irq = UIO_IRQ_CUSTOM;
+       gdev->msix.num_irqs = vectors;
+
+       return true;
+
+err_msix_enable:
+       pci_clear_master(pdev);
+err_name_alloc:
+       for (i = 0; i < vectors; i++)
+               kfree(gdev->msix.ctx[i].name);
+
+       kfree(gdev->msix.ctx);
+err_ctx_alloc:
+       kfree(gdev->msix.table);
+
+       return false;
+}
+
+/**
+ * Disable interrupts and free related resources.
+ *
+ * @gdev device handle
+ *
+ * This function should be called after the corresponding UIO device has been
+ * unregistered. This will ensure that there are no currently running ioctl()s
+ * and there won't be any new ones until next probe() call.
+ */
+static void disable_intr(struct uio_pci_generic_dev *gdev)
+{
+       struct pci_dev *pdev = gdev->pdev;
+       int i;
+
+       switch (gdev->int_mode) {
+       case UIO_INT_MODE_MSI:
+               pci_disable_msi(pdev);
+               pci_clear_master(pdev);
+
+               break;
+       case UIO_INT_MODE_MSIX:
+               /* No need for locking here since there shouldn't be any
+                * ioctl()s running by now.
+                */
+               for (i = 0; i < gdev->msix.num_irqs; i++) {
+                       if (gdev->msix.ctx[i].trigger)
+                               set_irq_eventfd(gdev, i, -1);
+
+                       kfree(gdev->msix.ctx[i].name);
+               }
+
+               pci_disable_msix(pdev);
+               pci_clear_master(pdev);
+               kfree(gdev->msix.ctx);
+               kfree(gdev->msix.table);
+
+               break;
+       default:
+               break;
+       }
+}
+
+
+static int probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        struct uio_pci_generic_dev *gdev;
        int err;
@@ -66,42 +404,64 @@ static int probe(struct pci_dev *pdev,
                return err;
        }
 
-       if (!pdev->irq) {
-               dev_warn(&pdev->dev, "No IRQ assigned to device: "
-                        "no support for interrupts?\n");
-               pci_disable_device(pdev);
-               return -ENODEV;
-       }
-
-       if (!pci_intx_mask_supported(pdev)) {
-               err = -ENODEV;
-               goto err_verify;
-       }
-
        gdev = kzalloc(sizeof(struct uio_pci_generic_dev), GFP_KERNEL);
        if (!gdev) {
                err = -ENOMEM;
                goto err_alloc;
        }
 
+       gdev->pdev = pdev;
        gdev->info.name = "uio_pci_generic";
        gdev->info.version = DRIVER_VERSION;
-       gdev->info.irq = pdev->irq;
-       gdev->info.irq_flags = IRQF_SHARED;
-       gdev->info.handler = irqhandler;
-       gdev->pdev = pdev;
+       gdev->info.ioctl = uio_pci_generic_ioctl;
+       mutex_init(&gdev->msix_state_lock);
+
+       err = pci_request_regions(pdev, "uio_pci_generic");
+       if (err != 0) {
+               dev_err(&pdev->dev, "Cannot request regions\n");
+               goto err_request_regions;
+       }
+
+       /* Enable the corresponding interrupt mode. Try to enable INT#X first
+        * for backward compatibility.
+        */
+       if (enable_intx(gdev))
+               dev_info(&pdev->dev, "Using INT#x mode: IRQ %ld",
+                        gdev->info.irq);
+       else if (enable_msix(gdev))
+               dev_info(&pdev->dev, "Using MSI-X mode: number of IRQs %d",
+                        gdev->msix.num_irqs);
+       else if (enable_msi(gdev))
+               dev_info(&pdev->dev, "Using MSI mode: IRQ %ld", gdev->info.irq);
+       else {
+               err = -ENODEV;
+               goto err_verify;
+       }
+
+       /* remap resources */
+       err = setup_maps(pdev, &gdev->info);
+       if (err)
+               goto err_maps;
 
        err = uio_register_device(&pdev->dev, &gdev->info);
        if (err)
                goto err_register;
+
        pci_set_drvdata(pdev, gdev);
 
        return 0;
+
 err_register:
+       release_iomaps(gdev);
+err_maps:
+       disable_intr(gdev);
+err_verify:
+       pci_release_regions(pdev);
+err_request_regions:
        kfree(gdev);
 err_alloc:
-err_verify:
        pci_disable_device(pdev);
+
        return err;
 }
 
@@ -110,8 +470,12 @@ static void remove(struct pci_dev *pdev)
        struct uio_pci_generic_dev *gdev = pci_get_drvdata(pdev);
 
        uio_unregister_device(&gdev->info);
-       pci_disable_device(pdev);
+       disable_intr(gdev);
+       release_iomaps(gdev);
+       pci_release_regions(pdev);
        kfree(gdev);
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
 }
 
 static struct pci_driver uio_pci_driver = {
diff --git a/include/linux/uio_pci_generic.h b/include/linux/uio_pci_generic.h
new file mode 100644
index 0000000..10716fc
--- /dev/null
+++ b/include/linux/uio_pci_generic.h
@@ -0,0 +1,36 @@
+/*
+ * include/linux/uio_pci_generic.h
+ *
+ * Userspace generic PCI IO driver.
+ *
+ * Licensed under the GPLv2 only.
+ */
+
+#ifndef _UIO_PCI_GENERIC_H_
+#define _UIO_PCI_GENERIC_H_
+
+#include <linux/ioctl.h>
+
+enum uio_int_mode {
+       UIO_INT_MODE_NONE,
+       UIO_INT_MODE_INTX,
+       UIO_INT_MODE_MSI,
+       UIO_INT_MODE_MSIX
+};
+
+/* bind the requested IRQ to the given eventfd */
+struct uio_pci_generic_irq_set {
+       int vec; /* index of the IRQ to connect to starting from 0 */
+       int fd;
+};
+
+#define UIO_PCI_GENERIC_BASE           0x86
+
+#define UIO_PCI_GENERIC_IRQ_SET                _IOW('I', UIO_PCI_GENERIC_BASE 
+ 1, \
+               struct uio_pci_generic_irq_set)
+#define UIO_PCI_GENERIC_IRQ_NUM_GET    _IOW('I', UIO_PCI_GENERIC_BASE + 2, \
+               uint32_t)
+#define UIO_PCI_GENERIC_INT_MODE_GET   _IOW('I', UIO_PCI_GENERIC_BASE + 3, \
+               uint32_t)
+
+#endif /* _UIO_PCI_GENERIC_H_ */
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to