This is a merge of igb_uio with the MSI-X support through
eventfd (similar to VFIO). The driver requires a small change to
upstream UIO driver to allow UIO drivers to support ioctl's.

See:
http://marc.info/?l=linux-kernel&m=143197030217434&w=2
http://www.spinics.net/lists/kernel/msg1993359.html

Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
 config/common_linuxapp                    |   1 +
 lib/librte_eal/linuxapp/Makefile          |   3 +
 lib/librte_eal/linuxapp/uio_msi/Makefile  |  13 ++
 lib/librte_eal/linuxapp/uio_msi/uio_msi.c | 365 ++++++++++++++++++++++++++++++
 lib/librte_eal/linuxapp/uio_msi/uio_msi.h |  22 ++
 5 files changed, 404 insertions(+)
 create mode 100644 lib/librte_eal/linuxapp/uio_msi/Makefile
 create mode 100644 lib/librte_eal/linuxapp/uio_msi/uio_msi.c
 create mode 100644 lib/librte_eal/linuxapp/uio_msi/uio_msi.h

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0078dc9..8299efe 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -100,6 +100,7 @@ CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID=n
 CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n
 CONFIG_RTE_EAL_IGB_UIO=y
 CONFIG_RTE_EAL_VFIO=y
+CONFIG_RTE_EAL_UIO_MSI=y

 #
 # Special configurations in PCI Config Space for high performance
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index 8fcfdf6..d283952 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -34,6 +34,9 @@ include $(RTE_SDK)/mk/rte.vars.mk
 ifeq ($(CONFIG_RTE_EAL_IGB_UIO),y)
 DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += igb_uio
 endif
+ifeq ($(CONFIG_RTE_EAL_UIO_MSI),y)
+DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += uio_msi
+endif
 DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal
 ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += kni
diff --git a/lib/librte_eal/linuxapp/uio_msi/Makefile 
b/lib/librte_eal/linuxapp/uio_msi/Makefile
new file mode 100644
index 0000000..275174c
--- /dev/null
+++ b/lib/librte_eal/linuxapp/uio_msi/Makefile
@@ -0,0 +1,13 @@
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+MODULE = uio_msi
+MODULE_PATH = drivers/uio/uio_msi
+
+MODULE_CFLAGS += -I$(SRCDIR)
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+
+SRCS-y := uio_msi.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/uio_msi/uio_msi.c 
b/lib/librte_eal/linuxapp/uio_msi/uio_msi.c
new file mode 100644
index 0000000..7b1dcea
--- /dev/null
+++ b/lib/librte_eal/linuxapp/uio_msi/uio_msi.c
@@ -0,0 +1,365 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (c) 2015 by Brocade Communications Systems, Inc.
+ * All rights reserved.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/eventfd.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/uio_driver.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+#include <linux/version.h>
+
+#include "uio_msi.h"
+
+#define DRIVER_VERSION "0.1.0"
+#define NON_Q_VECTORS  1
+
+/* MSI-X vector information */
+struct uio_msi_pci_dev {
+       struct uio_info info;           /* UIO driver info */
+       struct pci_dev *pdev;           /* PCI device */
+       struct mutex    mutex;          /* open/release/ioctl mutex */
+       int             ref_cnt;        /* references to device */
+       u16             num_vectors;    /* How many MSI-X slots are used */
+       struct msix_entry *msix;        /* MSI-x vector table */
+       struct uio_msi_irq_ctx {
+               struct eventfd_ctx *trigger; /* MSI-x vector to eventfd */
+               char *name;             /* name in /proc/interrupts */
+       } *ctx;
+};
+
+static unsigned int max_vectors = 33;
+module_param(max_vectors, uint, 0);
+MODULE_PARM_DESC(max_vectors, "Upper limit on # of MSI-X vectors used");
+
+static irqreturn_t uio_msi_irqhandler(int irq, void *arg)
+{
+       struct eventfd_ctx *trigger = arg;
+
+       pr_devel("irq %u trigger %p\n", irq, trigger);
+
+       eventfd_signal(trigger, 1);
+       return IRQ_HANDLED;
+}
+
+/* set the mapping between vector # and existing eventfd. */
+static int set_irq_eventfd(struct uio_msi_pci_dev *udev, u32 vec, int fd)
+{
+       struct uio_msi_irq_ctx *ctx;
+       struct eventfd_ctx *trigger;
+       int irq, err;
+
+       if (vec >= udev->num_vectors) {
+               dev_notice(&udev->pdev->dev, "vec %u >= num_vec %u\n",
+                          vec, udev->num_vectors);
+               return -ERANGE;
+       }
+
+       irq = udev->msix[vec].vector;
+
+       /* Clearup existing irq mapping */
+       ctx = &udev->ctx[vec];
+       if (ctx->trigger) {
+               free_irq(irq, ctx->trigger);
+               eventfd_ctx_put(ctx->trigger);
+               ctx->trigger = NULL;
+       }
+
+       /* Passing -1 is used to disable interrupt */
+       if (fd < 0)
+               return 0;
+
+
+       trigger = eventfd_ctx_fdget(fd);
+       if (IS_ERR(trigger)) {
+               err = PTR_ERR(trigger);
+               dev_notice(&udev->pdev->dev,
+                          "eventfd ctx get failed: %d\n", err);
+               return err;
+       }
+
+       err = request_irq(irq, uio_msi_irqhandler, 0, ctx->name, trigger);
+       if (err) {
+               dev_notice(&udev->pdev->dev,
+                          "request irq failed: %d\n", err);
+               eventfd_ctx_put(trigger);
+               return err;
+       }
+
+       dev_dbg(&udev->pdev->dev, "map vector %u to fd %d trigger %p\n",
+                 vec, fd, trigger);
+       ctx->trigger = trigger;
+       return 0;
+}
+
+static int
+uio_msi_ioctl(struct uio_info *info, unsigned int cmd, unsigned long arg)
+{
+       struct uio_msi_pci_dev *udev
+               = container_of(info, struct uio_msi_pci_dev, info);
+       struct uio_msi_irq_set hdr;
+       int err;
+
+       switch (cmd) {
+       case UIO_MSI_IRQ_SET:
+               if (copy_from_user(&hdr, (void __user *)arg, sizeof(hdr)))
+                       return -EFAULT;
+
+               mutex_lock(&udev->mutex);
+               err = set_irq_eventfd(udev, hdr.vec, hdr.fd);
+               mutex_unlock(&udev->mutex);
+               break;
+       default:
+               err = -EOPNOTSUPP;
+       }
+       return err;
+}
+
+/* Opening the UIO device for first time enables MSI-X */
+static int
+uio_msi_open(struct uio_info *info, struct inode *inode)
+{
+       struct uio_msi_pci_dev *udev
+               = container_of(info, struct uio_msi_pci_dev, info);
+       int err = 0;
+
+       mutex_lock(&udev->mutex);
+       if (udev->ref_cnt++ == 0)
+               err = pci_enable_msix(udev->pdev, udev->msix,
+                                     udev->num_vectors);
+       mutex_unlock(&udev->mutex);
+
+       return err;
+}
+
+/* Last close of the UIO device releases/disables all IRQ's */
+static int
+uio_msi_release(struct uio_info *info, struct inode *inode)
+{
+       struct uio_msi_pci_dev *udev
+               = container_of(info, struct uio_msi_pci_dev, info);
+
+       mutex_lock(&udev->mutex);
+       if (--udev->ref_cnt == 0) {
+               int i;
+
+               for (i = 0; i < udev->num_vectors; i++) {
+                       struct uio_msi_irq_ctx *ctx = &udev->ctx[i];
+
+                       if (!ctx->trigger)
+                               continue;
+
+                       free_irq(udev->msix[i].vector, ctx->trigger);
+                       eventfd_ctx_put(ctx->trigger);
+                       ctx->trigger = NULL;
+               }
+               pci_disable_msix(udev->pdev);
+       }
+       mutex_unlock(&udev->mutex);
+
+       return 0;
+}
+
+/* Unmap previously ioremap'd resources */
+static void
+release_iomaps(struct uio_mem *mem)
+{
+       int i;
+
+       for (i = 0; i < MAX_UIO_MAPS; i++, mem++) {
+               if (mem->internal_addr)
+                       iounmap(mem->internal_addr);
+       }
+}
+
+static int
+setup_maps(struct pci_dev *pdev, struct uio_info *info)
+{
+       int i, m = 0, p = 0, err;
+       static const char * const bar_names[] = {
+               "BAR0", "BAR1", "BAR2", "BAR3", "BAR4", "BAR5",
+       };
+
+       for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
+               unsigned long start = pci_resource_start(pdev, i);
+               unsigned long flags = pci_resource_flags(pdev, i);
+               unsigned long len = pci_resource_len(pdev, i);
+
+               if (start == 0 || len == 0)
+                       continue;
+
+               if (flags & IORESOURCE_MEM) {
+                       void *addr;
+
+                       if (m >= MAX_UIO_MAPS)
+                               continue;
+
+                       addr = ioremap(start, len);
+                       if (addr == NULL) {
+                               err = -EINVAL;
+                               goto fail;
+                       }
+
+                       info->mem[m].name = bar_names[i];
+                       info->mem[m].addr = start;
+                       info->mem[m].internal_addr = addr;
+                       info->mem[m].size = len;
+                       info->mem[m].memtype = UIO_MEM_PHYS;
+                       ++m;
+               } else if (flags & IORESOURCE_IO) {
+                       if (p >= MAX_UIO_PORT_REGIONS)
+                               continue;
+
+                       info->port[p].name = bar_names[i];
+                       info->port[p].start = start;
+                       info->port[p].size = len;
+                       info->port[p].porttype = UIO_PORT_X86;
+                       ++p;
+               }
+       }
+
+       return 0;
+ fail:
+       for (i = 0; i < m; i++)
+               iounmap(info->mem[i].internal_addr);
+       return err;
+}
+
+static int uio_msi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct uio_msi_pci_dev *udev;
+       int i, err, vectors;
+
+       udev = kzalloc(sizeof(struct uio_msi_pci_dev), GFP_KERNEL);
+       if (!udev)
+               return -ENOMEM;
+
+       err = pci_enable_device(pdev);
+       if (err != 0) {
+               dev_err(&pdev->dev, "cannot enable PCI device\n");
+               goto fail_free;
+       }
+
+       vectors = pci_msix_vec_count(pdev);
+       if (vectors < 0) {
+               dev_err(&pdev->dev, "device does not support MSI-X\n");
+               err = -EINVAL;
+               goto fail_disable;
+       }
+
+       udev->num_vectors = min_t(u16, vectors, max_vectors);
+       udev->msix = kcalloc(GFP_KERNEL, sizeof(struct msix_entry),
+                            udev->num_vectors);
+       err = -ENOMEM;
+       if (!udev->msix)
+               goto fail_disable;
+
+       udev->ctx = kcalloc(GFP_KERNEL, sizeof(struct uio_msi_irq_ctx),
+                           udev->num_vectors);
+       if (!udev->ctx)
+               goto fail_free_msix;
+
+       for (i = 0; i < udev->num_vectors; i++) {
+               udev->msix[i].entry = i;
+
+               udev->ctx[i].name = kasprintf(GFP_KERNEL,
+                                             KBUILD_MODNAME "[%d](%s)",
+                                             i, pci_name(pdev));
+               if (!udev->ctx[i].name)
+                       goto fail_free_ctx;
+       }
+
+       err = pci_request_regions(pdev, "uio_msi");
+       if (err != 0) {
+               dev_err(&pdev->dev, "Cannot request regions\n");
+               goto fail_free_ctx;
+       }
+
+       pci_set_master(pdev);
+
+       /* remap resources */
+       err = setup_maps(pdev, &udev->info);
+       if (err)
+               goto fail_release_iomem;
+
+       /* fill uio infos */
+       udev->info.name = "uio_msi";
+       udev->info.version = DRIVER_VERSION;
+       udev->info.priv = udev;
+       udev->pdev = pdev;
+       udev->info.ioctl = uio_msi_ioctl;
+       udev->info.open = uio_msi_open;
+       udev->info.release = uio_msi_release;
+       udev->info.irq = UIO_IRQ_CUSTOM;
+       mutex_init(&udev->mutex);
+
+       /* register uio driver */
+       err = uio_register_device(&pdev->dev, &udev->info);
+       if (err != 0)
+               goto fail_release_iomem;
+
+       pci_set_drvdata(pdev, udev);
+       return 0;
+
+fail_release_iomem:
+       release_iomaps(udev->info.mem);
+       pci_release_regions(pdev);
+fail_free_ctx:
+       for (i = 0; i < udev->num_vectors; i++)
+               kfree(udev->ctx[i].name);
+       kfree(udev->ctx);
+fail_free_msix:
+       kfree(udev->msix);
+fail_disable:
+       pci_disable_device(pdev);
+fail_free:
+       kfree(udev);
+
+       return err;
+}
+
+static void uio_msi_remove(struct pci_dev *pdev)
+{
+       struct uio_info *info = pci_get_drvdata(pdev);
+       struct uio_msi_pci_dev *udev
+               = container_of(info, struct uio_msi_pci_dev, info);
+       int i;
+
+       uio_unregister_device(info);
+       release_iomaps(info->mem);
+
+       pci_release_regions(pdev);
+       for (i = 0; i < udev->num_vectors; i++)
+               kfree(udev->ctx[i].name);
+       kfree(udev->ctx);
+       kfree(udev->msix);
+       pci_disable_device(pdev);
+
+       pci_set_drvdata(pdev, NULL);
+       kfree(info);
+}
+
+static struct pci_driver uio_msi_pci_driver = {
+       .name = "uio_msi",
+       .probe = uio_msi_probe,
+       .remove = uio_msi_remove,
+};
+
+module_pci_driver(uio_msi_pci_driver);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Stephen Hemminger <stephen at networkplumber.org>");
+MODULE_DESCRIPTION("UIO driver for MSI-X PCI devices");
diff --git a/lib/librte_eal/linuxapp/uio_msi/uio_msi.h 
b/lib/librte_eal/linuxapp/uio_msi/uio_msi.h
new file mode 100644
index 0000000..297de00
--- /dev/null
+++ b/lib/librte_eal/linuxapp/uio_msi/uio_msi.h
@@ -0,0 +1,22 @@
+/*
+ * UIO_MSI API definition
+ *
+ * Copyright (c) 2015 by Brocade Communications Systems, Inc.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _UIO_PCI_MSI_H
+#define _UIO_PCI_MSI_H
+
+struct uio_msi_irq_set {
+       u32 vec;
+       int fd;
+};
+
+#define UIO_MSI_BASE   0x86
+#define UIO_MSI_IRQ_SET        _IOW('I', UIO_MSI_BASE+1, struct 
uio_msi_irq_set)
+
+#endif
-- 
2.1.4

Reply via email to