This is a merge of igb_uio with the MSI-X support through eventfd (similar to VFIO). The driver requires a small change to upstream UIO driver to allow UIO drivers to support ioctl's.
See: http://marc.info/?l=linux-kernel&m=143197030217434&w=2 http://www.spinics.net/lists/kernel/msg1993359.html Signed-off-by: Stephen Hemminger <stephen at networkplumber.org> --- config/common_linuxapp | 1 + lib/librte_eal/linuxapp/Makefile | 3 + lib/librte_eal/linuxapp/uio_msi/Makefile | 13 ++ lib/librte_eal/linuxapp/uio_msi/uio_msi.c | 365 ++++++++++++++++++++++++++++++ lib/librte_eal/linuxapp/uio_msi/uio_msi.h | 22 ++ 5 files changed, 404 insertions(+) create mode 100644 lib/librte_eal/linuxapp/uio_msi/Makefile create mode 100644 lib/librte_eal/linuxapp/uio_msi/uio_msi.c create mode 100644 lib/librte_eal/linuxapp/uio_msi/uio_msi.h diff --git a/config/common_linuxapp b/config/common_linuxapp index 0078dc9..8299efe 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -100,6 +100,7 @@ CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID=n CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n CONFIG_RTE_EAL_IGB_UIO=y CONFIG_RTE_EAL_VFIO=y +CONFIG_RTE_EAL_UIO_MSI=y # # Special configurations in PCI Config Space for high performance diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile index 8fcfdf6..d283952 100644 --- a/lib/librte_eal/linuxapp/Makefile +++ b/lib/librte_eal/linuxapp/Makefile @@ -34,6 +34,9 @@ include $(RTE_SDK)/mk/rte.vars.mk ifeq ($(CONFIG_RTE_EAL_IGB_UIO),y) DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += igb_uio endif +ifeq ($(CONFIG_RTE_EAL_UIO_MSI),y) +DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += uio_msi +endif DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += kni diff --git a/lib/librte_eal/linuxapp/uio_msi/Makefile b/lib/librte_eal/linuxapp/uio_msi/Makefile new file mode 100644 index 0000000..275174c --- /dev/null +++ b/lib/librte_eal/linuxapp/uio_msi/Makefile @@ -0,0 +1,13 @@ + +include $(RTE_SDK)/mk/rte.vars.mk + +MODULE = uio_msi +MODULE_PATH = drivers/uio/uio_msi + +MODULE_CFLAGS += -I$(SRCDIR) +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include +MODULE_CFLAGS += -Winline -Wall -Werror + +SRCS-y := uio_msi.c + +include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/uio_msi/uio_msi.c b/lib/librte_eal/linuxapp/uio_msi/uio_msi.c new file mode 100644 index 0000000..7b1dcea --- /dev/null +++ b/lib/librte_eal/linuxapp/uio_msi/uio_msi.c @@ -0,0 +1,365 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright (c) 2015 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/device.h> +#include <linux/interrupt.h> +#include <linux/eventfd.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/uio_driver.h> +#include <linux/io.h> +#include <linux/msi.h> +#include <linux/version.h> + +#include "uio_msi.h" + +#define DRIVER_VERSION "0.1.0" +#define NON_Q_VECTORS 1 + +/* MSI-X vector information */ +struct uio_msi_pci_dev { + struct uio_info info; /* UIO driver info */ + struct pci_dev *pdev; /* PCI device */ + struct mutex mutex; /* open/release/ioctl mutex */ + int ref_cnt; /* references to device */ + u16 num_vectors; /* How many MSI-X slots are used */ + struct msix_entry *msix; /* MSI-x vector table */ + struct uio_msi_irq_ctx { + struct eventfd_ctx *trigger; /* MSI-x vector to eventfd */ + char *name; /* name in /proc/interrupts */ + } *ctx; +}; + +static unsigned int max_vectors = 33; +module_param(max_vectors, uint, 0); +MODULE_PARM_DESC(max_vectors, "Upper limit on # of MSI-X vectors used"); + +static irqreturn_t uio_msi_irqhandler(int irq, void *arg) +{ + struct eventfd_ctx *trigger = arg; + + pr_devel("irq %u trigger %p\n", irq, trigger); + + eventfd_signal(trigger, 1); + return IRQ_HANDLED; +} + +/* set the mapping between vector # and existing eventfd. */ +static int set_irq_eventfd(struct uio_msi_pci_dev *udev, u32 vec, int fd) +{ + struct uio_msi_irq_ctx *ctx; + struct eventfd_ctx *trigger; + int irq, err; + + if (vec >= udev->num_vectors) { + dev_notice(&udev->pdev->dev, "vec %u >= num_vec %u\n", + vec, udev->num_vectors); + return -ERANGE; + } + + irq = udev->msix[vec].vector; + + /* Clearup existing irq mapping */ + ctx = &udev->ctx[vec]; + if (ctx->trigger) { + free_irq(irq, ctx->trigger); + eventfd_ctx_put(ctx->trigger); + ctx->trigger = NULL; + } + + /* Passing -1 is used to disable interrupt */ + if (fd < 0) + return 0; + + + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) { + err = PTR_ERR(trigger); + dev_notice(&udev->pdev->dev, + "eventfd ctx get failed: %d\n", err); + return err; + } + + err = request_irq(irq, uio_msi_irqhandler, 0, ctx->name, trigger); + if (err) { + dev_notice(&udev->pdev->dev, + "request irq failed: %d\n", err); + eventfd_ctx_put(trigger); + return err; + } + + dev_dbg(&udev->pdev->dev, "map vector %u to fd %d trigger %p\n", + vec, fd, trigger); + ctx->trigger = trigger; + return 0; +} + +static int +uio_msi_ioctl(struct uio_info *info, unsigned int cmd, unsigned long arg) +{ + struct uio_msi_pci_dev *udev + = container_of(info, struct uio_msi_pci_dev, info); + struct uio_msi_irq_set hdr; + int err; + + switch (cmd) { + case UIO_MSI_IRQ_SET: + if (copy_from_user(&hdr, (void __user *)arg, sizeof(hdr))) + return -EFAULT; + + mutex_lock(&udev->mutex); + err = set_irq_eventfd(udev, hdr.vec, hdr.fd); + mutex_unlock(&udev->mutex); + break; + default: + err = -EOPNOTSUPP; + } + return err; +} + +/* Opening the UIO device for first time enables MSI-X */ +static int +uio_msi_open(struct uio_info *info, struct inode *inode) +{ + struct uio_msi_pci_dev *udev + = container_of(info, struct uio_msi_pci_dev, info); + int err = 0; + + mutex_lock(&udev->mutex); + if (udev->ref_cnt++ == 0) + err = pci_enable_msix(udev->pdev, udev->msix, + udev->num_vectors); + mutex_unlock(&udev->mutex); + + return err; +} + +/* Last close of the UIO device releases/disables all IRQ's */ +static int +uio_msi_release(struct uio_info *info, struct inode *inode) +{ + struct uio_msi_pci_dev *udev + = container_of(info, struct uio_msi_pci_dev, info); + + mutex_lock(&udev->mutex); + if (--udev->ref_cnt == 0) { + int i; + + for (i = 0; i < udev->num_vectors; i++) { + struct uio_msi_irq_ctx *ctx = &udev->ctx[i]; + + if (!ctx->trigger) + continue; + + free_irq(udev->msix[i].vector, ctx->trigger); + eventfd_ctx_put(ctx->trigger); + ctx->trigger = NULL; + } + pci_disable_msix(udev->pdev); + } + mutex_unlock(&udev->mutex); + + return 0; +} + +/* Unmap previously ioremap'd resources */ +static void +release_iomaps(struct uio_mem *mem) +{ + int i; + + for (i = 0; i < MAX_UIO_MAPS; i++, mem++) { + if (mem->internal_addr) + iounmap(mem->internal_addr); + } +} + +static int +setup_maps(struct pci_dev *pdev, struct uio_info *info) +{ + int i, m = 0, p = 0, err; + static const char * const bar_names[] = { + "BAR0", "BAR1", "BAR2", "BAR3", "BAR4", "BAR5", + }; + + for (i = 0; i < ARRAY_SIZE(bar_names); i++) { + unsigned long start = pci_resource_start(pdev, i); + unsigned long flags = pci_resource_flags(pdev, i); + unsigned long len = pci_resource_len(pdev, i); + + if (start == 0 || len == 0) + continue; + + if (flags & IORESOURCE_MEM) { + void *addr; + + if (m >= MAX_UIO_MAPS) + continue; + + addr = ioremap(start, len); + if (addr == NULL) { + err = -EINVAL; + goto fail; + } + + info->mem[m].name = bar_names[i]; + info->mem[m].addr = start; + info->mem[m].internal_addr = addr; + info->mem[m].size = len; + info->mem[m].memtype = UIO_MEM_PHYS; + ++m; + } else if (flags & IORESOURCE_IO) { + if (p >= MAX_UIO_PORT_REGIONS) + continue; + + info->port[p].name = bar_names[i]; + info->port[p].start = start; + info->port[p].size = len; + info->port[p].porttype = UIO_PORT_X86; + ++p; + } + } + + return 0; + fail: + for (i = 0; i < m; i++) + iounmap(info->mem[i].internal_addr); + return err; +} + +static int uio_msi_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct uio_msi_pci_dev *udev; + int i, err, vectors; + + udev = kzalloc(sizeof(struct uio_msi_pci_dev), GFP_KERNEL); + if (!udev) + return -ENOMEM; + + err = pci_enable_device(pdev); + if (err != 0) { + dev_err(&pdev->dev, "cannot enable PCI device\n"); + goto fail_free; + } + + vectors = pci_msix_vec_count(pdev); + if (vectors < 0) { + dev_err(&pdev->dev, "device does not support MSI-X\n"); + err = -EINVAL; + goto fail_disable; + } + + udev->num_vectors = min_t(u16, vectors, max_vectors); + udev->msix = kcalloc(GFP_KERNEL, sizeof(struct msix_entry), + udev->num_vectors); + err = -ENOMEM; + if (!udev->msix) + goto fail_disable; + + udev->ctx = kcalloc(GFP_KERNEL, sizeof(struct uio_msi_irq_ctx), + udev->num_vectors); + if (!udev->ctx) + goto fail_free_msix; + + for (i = 0; i < udev->num_vectors; i++) { + udev->msix[i].entry = i; + + udev->ctx[i].name = kasprintf(GFP_KERNEL, + KBUILD_MODNAME "[%d](%s)", + i, pci_name(pdev)); + if (!udev->ctx[i].name) + goto fail_free_ctx; + } + + err = pci_request_regions(pdev, "uio_msi"); + if (err != 0) { + dev_err(&pdev->dev, "Cannot request regions\n"); + goto fail_free_ctx; + } + + pci_set_master(pdev); + + /* remap resources */ + err = setup_maps(pdev, &udev->info); + if (err) + goto fail_release_iomem; + + /* fill uio infos */ + udev->info.name = "uio_msi"; + udev->info.version = DRIVER_VERSION; + udev->info.priv = udev; + udev->pdev = pdev; + udev->info.ioctl = uio_msi_ioctl; + udev->info.open = uio_msi_open; + udev->info.release = uio_msi_release; + udev->info.irq = UIO_IRQ_CUSTOM; + mutex_init(&udev->mutex); + + /* register uio driver */ + err = uio_register_device(&pdev->dev, &udev->info); + if (err != 0) + goto fail_release_iomem; + + pci_set_drvdata(pdev, udev); + return 0; + +fail_release_iomem: + release_iomaps(udev->info.mem); + pci_release_regions(pdev); +fail_free_ctx: + for (i = 0; i < udev->num_vectors; i++) + kfree(udev->ctx[i].name); + kfree(udev->ctx); +fail_free_msix: + kfree(udev->msix); +fail_disable: + pci_disable_device(pdev); +fail_free: + kfree(udev); + + return err; +} + +static void uio_msi_remove(struct pci_dev *pdev) +{ + struct uio_info *info = pci_get_drvdata(pdev); + struct uio_msi_pci_dev *udev + = container_of(info, struct uio_msi_pci_dev, info); + int i; + + uio_unregister_device(info); + release_iomaps(info->mem); + + pci_release_regions(pdev); + for (i = 0; i < udev->num_vectors; i++) + kfree(udev->ctx[i].name); + kfree(udev->ctx); + kfree(udev->msix); + pci_disable_device(pdev); + + pci_set_drvdata(pdev, NULL); + kfree(info); +} + +static struct pci_driver uio_msi_pci_driver = { + .name = "uio_msi", + .probe = uio_msi_probe, + .remove = uio_msi_remove, +}; + +module_pci_driver(uio_msi_pci_driver); +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Stephen Hemminger <stephen at networkplumber.org>"); +MODULE_DESCRIPTION("UIO driver for MSI-X PCI devices"); diff --git a/lib/librte_eal/linuxapp/uio_msi/uio_msi.h b/lib/librte_eal/linuxapp/uio_msi/uio_msi.h new file mode 100644 index 0000000..297de00 --- /dev/null +++ b/lib/librte_eal/linuxapp/uio_msi/uio_msi.h @@ -0,0 +1,22 @@ +/* + * UIO_MSI API definition + * + * Copyright (c) 2015 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _UIO_PCI_MSI_H +#define _UIO_PCI_MSI_H + +struct uio_msi_irq_set { + u32 vec; + int fd; +}; + +#define UIO_MSI_BASE 0x86 +#define UIO_MSI_IRQ_SET _IOW('I', UIO_MSI_BASE+1, struct uio_msi_irq_set) + +#endif -- 2.1.4