Module Name: src Committed By: bouyer Date: Mon May 23 15:03:05 UTC 2022
Modified Files: src/sys/arch/x86/pci: msipic.c msipic.h pci_machdep.c pci_msi_machdep.c src/sys/arch/xen/include: intr.h pci_machdep.h src/sys/arch/xen/x86: pintr.c Log Message: Work in progress on MSI/MSI-X on Xen (MSI works on my hardware, more work needed for MSI-X): - Xen silently rejects 32 bits writes to MSI configuration registers (especially when setting PCI_MSI_CTL_MSI_ENABLE/PCI_MSIX_CTL_ENABLE), it expects 16 bits writes. So introduce a pci_conf_write16(), only available on XENPV (and working only for mode 1 without PCI_OVERRIDE_CONF_WRITE) and use it to enable MSI or MSI-X on XENPV. - for multi-MSI vectors, Xen allocates all of them in a single hypercall, so it's not convenient to do it at intr_establish() time. So do it at alloc() time and register the pirqs in the msipic structure. xen_pic_to_gsi() now just returns the values cached in the msipic. As a bonus, if the PHYSDEVOP_map_pirq hypercall fails we can fail the alloc() and we don't need the xen_pci_msi*_probe() hacks. options NO_PCI_MSI_MSIX still on by default for XEN3_DOM0. To generate a diff of this commit: cvs rdiff -u -r1.25 -r1.26 src/sys/arch/x86/pci/msipic.c cvs rdiff -u -r1.3 -r1.4 src/sys/arch/x86/pci/msipic.h cvs rdiff -u -r1.89 -r1.90 src/sys/arch/x86/pci/pci_machdep.c cvs rdiff -u -r1.16 -r1.17 src/sys/arch/x86/pci/pci_msi_machdep.c cvs rdiff -u -r1.58 -r1.59 src/sys/arch/xen/include/intr.h cvs rdiff -u -r1.20 -r1.21 src/sys/arch/xen/include/pci_machdep.h cvs rdiff -u -r1.20 -r1.21 src/sys/arch/xen/x86/pintr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/pci/msipic.c diff -u src/sys/arch/x86/pci/msipic.c:1.25 src/sys/arch/x86/pci/msipic.c:1.26 --- src/sys/arch/x86/pci/msipic.c:1.25 Fri Dec 11 09:22:20 2020 +++ src/sys/arch/x86/pci/msipic.c Mon May 23 15:03:05 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: msipic.c,v 1.25 2020/12/11 09:22:20 knakahara Exp $ */ +/* $NetBSD: msipic.c,v 1.26 2022/05/23 15:03:05 bouyer Exp $ */ /* * Copyright (c) 2015 Internet Initiative Japan Inc. @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: msipic.c,v 1.25 2020/12/11 09:22:20 knakahara Exp $"); +__KERNEL_RCSID(0, "$NetBSD: msipic.c,v 1.26 2022/05/23 15:03:05 bouyer Exp $"); #include "opt_intrdebug.h" @@ -282,6 +282,16 @@ msipic_destruct_common_msi_pic(struct pi msipic_release_common_msi_devid(msipic->mp_devid); mutex_exit(&msipic_list_lock); + if (msipic->mp_i.mp_xen_pirq != NULL) { + KASSERT(msipic->mp_i.mp_veccnt > 0); +#ifdef DIAGNOSTIC + for (int i = 0; i < msipic->mp_i.mp_veccnt; i++) { + KASSERT(msipic->mp_i.mp_xen_pirq[i] == 0); + } +#endif + kmem_free(msipic->mp_i.mp_xen_pirq, + sizeof(*msipic->mp_i.mp_xen_pirq) * msipic->mp_i.mp_veccnt); + } kmem_free(msipic, sizeof(*msipic)); kmem_free(msi_pic, sizeof(*msi_pic)); } @@ -421,7 +431,11 @@ msi_addroute(struct pic *pic, struct cpu } #endif /* !XENPV */ ctl |= PCI_MSI_CTL_MSI_ENABLE; +#ifdef XENPV + pci_conf_write16(pc, tag, off + PCI_MSI_CTL + 2, ctl >> 16); +#else pci_conf_write(pc, tag, off + PCI_MSI_CTL, ctl); +#endif } /* @@ -546,9 +560,9 @@ msix_addroute(struct pic *pic, struct cp pci_chipset_tag_t pc; struct pci_attach_args *pa; pcitag_t tag; +#ifndef XENPV bus_space_tag_t bstag; bus_space_handle_t bshandle; -#ifndef XENPV uint64_t entry_base; pcireg_t addr, data; #endif @@ -567,6 +581,7 @@ msix_addroute(struct pic *pic, struct cp err = pci_get_capability(pc, tag, PCI_CAP_MSIX, &off, NULL); KASSERT(err != 0); +#ifndef XENPV /* Disable MSI-X before writing MSI-X table */ ctl = pci_conf_read(pc, tag, off + PCI_MSIX_CTL); ctl &= ~PCI_MSIX_CTL_ENABLE; @@ -574,7 +589,6 @@ msix_addroute(struct pic *pic, struct cp bstag = pic->pic_msipic->mp_bstag; bshandle = pic->pic_msipic->mp_bshandle; -#ifndef XENPV entry_base = PCI_MSIX_TABLE_ENTRY_SIZE * msix_vec; /* @@ -597,12 +611,19 @@ msix_addroute(struct pic *pic, struct cp entry_base + PCI_MSIX_TABLE_ENTRY_ADDR_HI, 0); bus_space_write_4(bstag, bshandle, entry_base + PCI_MSIX_TABLE_ENTRY_DATA, data); -#endif /* !XENPV */ BUS_SPACE_WRITE_FLUSH(bstag, bshandle); +#endif /* !XENPV */ ctl = pci_conf_read(pc, tag, off + PCI_MSIX_CTL); + if (ctl & PCI_MSIX_CTL_FUNCMASK) { + ctl &= ~PCI_MSIX_CTL_FUNCMASK; + } ctl |= PCI_MSIX_CTL_ENABLE; +#ifdef XENPV + pci_conf_write16(pc, tag, off + PCI_MSIX_CTL + 2, ctl >> 16); +#else pci_conf_write(pc, tag, off + PCI_MSIX_CTL, ctl); +#endif } /* @@ -803,6 +824,11 @@ msipic_set_msi_vectors(struct pic *msi_p } msi_pic->pic_msipic->mp_i.mp_veccnt = count; +#ifdef XENPV + msi_pic->pic_msipic->mp_i.mp_xen_pirq = + kmem_zalloc(sizeof(*msi_pic->pic_msipic->mp_i.mp_xen_pirq) * count, + KM_SLEEP); +#endif return 0; } Index: src/sys/arch/x86/pci/msipic.h diff -u src/sys/arch/x86/pci/msipic.h:1.3 src/sys/arch/x86/pci/msipic.h:1.4 --- src/sys/arch/x86/pci/msipic.h:1.3 Mon May 4 15:55:56 2020 +++ src/sys/arch/x86/pci/msipic.h Mon May 23 15:03:05 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: msipic.h,v 1.3 2020/05/04 15:55:56 jdolecek Exp $ */ +/* $NetBSD: msipic.h,v 1.4 2022/05/23 15:03:05 bouyer Exp $ */ /* * Copyright (c) 2015 Internet Initiative Japan Inc. @@ -44,7 +44,8 @@ int msipic_get_devid(struct pic *); struct msipic_pci_info { int mp_bus, mp_dev, mp_fun; int mp_veccnt; /* The number of MSI/MSI-X vectors. */ - uint32_t mp_table_base; /* MSI-X table location in memory space */ + uint32_t mp_table_base; /* Xen: MSI-X table location in memory space */ + int *mp_xen_pirq; /* Xen: pirq numbers */ }; const struct msipic_pci_info *msipic_get_pci_info(struct pic *); Index: src/sys/arch/x86/pci/pci_machdep.c diff -u src/sys/arch/x86/pci/pci_machdep.c:1.89 src/sys/arch/x86/pci/pci_machdep.c:1.90 --- src/sys/arch/x86/pci/pci_machdep.c:1.89 Fri Oct 15 18:51:38 2021 +++ src/sys/arch/x86/pci/pci_machdep.c Mon May 23 15:03:05 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: pci_machdep.c,v 1.89 2021/10/15 18:51:38 jmcneill Exp $ */ +/* $NetBSD: pci_machdep.c,v 1.90 2022/05/23 15:03:05 bouyer Exp $ */ /*- * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. @@ -73,7 +73,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pci_machdep.c,v 1.89 2021/10/15 18:51:38 jmcneill Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pci_machdep.c,v 1.90 2022/05/23 15:03:05 bouyer Exp $"); #include <sys/types.h> #include <sys/param.h> @@ -740,6 +740,44 @@ pci_conf_write(pci_chipset_tag_t pc, pci pci_conf_unlock(&ocl); } +#ifdef XENPV +void +pci_conf_write16(pci_chipset_tag_t pc, pcitag_t tag, int reg, uint16_t data) +{ + pci_chipset_tag_t ipc; + struct pci_conf_lock ocl; + int dev; + + KASSERT((reg & 0x1) == 0); + + for (ipc = pc; ipc != NULL; ipc = ipc->pc_super) { + if ((ipc->pc_present & PCI_OVERRIDE_CONF_WRITE) == 0) + continue; + panic("pci_conf_write16 and override"); + } + + pci_decompose_tag(pc, tag, NULL, &dev, NULL); + if (__predict_false(pci_mode == 2 && dev >= 16)) { + return; + } + + if (reg < 0) + return; + if (reg >= PCI_CONF_SIZE) { +#if NACPICA > 0 && !defined(NO_PCI_EXTENDED_CONFIG) + if (reg >= PCI_EXTCONF_SIZE) + return; + panic("pci_conf_write16 and reg >= PCI_CONF_SIZE"); +#endif + return; + } + + pci_conf_lock(&ocl, pci_conf_selector(tag, reg & ~0x3)); + outl(pci_conf_port(tag, reg & ~0x3) + (reg & 0x3), data); + pci_conf_unlock(&ocl); +} +#endif /* XENPV */ + void pci_mode_set(int mode) { Index: src/sys/arch/x86/pci/pci_msi_machdep.c diff -u src/sys/arch/x86/pci/pci_msi_machdep.c:1.16 src/sys/arch/x86/pci/pci_msi_machdep.c:1.17 --- src/sys/arch/x86/pci/pci_msi_machdep.c:1.16 Sun Dec 5 04:56:39 2021 +++ src/sys/arch/x86/pci/pci_msi_machdep.c Mon May 23 15:03:05 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: pci_msi_machdep.c,v 1.16 2021/12/05 04:56:39 msaitoh Exp $ */ +/* $NetBSD: pci_msi_machdep.c,v 1.17 2022/05/23 15:03:05 bouyer Exp $ */ /* * Copyright (c) 2015 Internet Initiative Japan Inc. @@ -34,7 +34,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pci_msi_machdep.c,v 1.16 2021/12/05 04:56:39 msaitoh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pci_msi_machdep.c,v 1.17 2022/05/23 15:03:05 bouyer Exp $"); #include "opt_intrdebug.h" #include "ioapic.h" @@ -175,14 +175,6 @@ pci_msi_alloc_common(pci_intr_handle_t * return EINVAL; } -#ifdef XENPV - if (xen_pci_msi_probe(msi_pic, *count)) { - DPRINTF(("xen_pci_msi_probe() failed\n")); - msipic_destruct_msi_pic(msi_pic); - return EINVAL; - } -#endif - vectors = NULL; while (*count > 0) { vectors = pci_msi_alloc_vectors(msi_pic, NULL, count); @@ -216,6 +208,15 @@ pci_msi_alloc_common(pci_intr_handle_t * } *ihps = vectors; +#ifdef XENPV + if (xen_map_msi_pirq(msi_pic, *count)) { + DPRINTF(("xen_map_msi_pirq() failed\n")); + pci_msi_free_vectors(msi_pic, vectors, *count); + msipic_destruct_msi_pic(msi_pic); + return EINVAL; + } +#endif + return 0; } #endif /* __HAVE_PCI_MSI_MSIX */ @@ -270,14 +271,6 @@ pci_msix_alloc_common(pci_intr_handle_t if (msix_pic == NULL) return EINVAL; -#ifdef XENPV - if (xen_pci_msi_probe(msix_pic, *count)) { - DPRINTF(("xen_pci_msi_probe() failed\n")); - msipic_destruct_msix_pic(msix_pic); - return EINVAL; - } -#endif - vectors = NULL; while (*count > 0) { vectors = pci_msi_alloc_vectors(msix_pic, table_indexes, count); @@ -311,6 +304,15 @@ pci_msix_alloc_common(pci_intr_handle_t } *ihps = vectors; + +#ifdef XENPV + if (xen_map_msix_pirq(msix_pic, *count)) { + DPRINTF(("xen_map_msi_pirq() failed\n")); + pci_msi_free_vectors(msix_pic, vectors, *count); + msipic_destruct_msix_pic(msix_pic); + return EINVAL; + } +#endif return 0; } @@ -340,6 +342,9 @@ x86_pci_msi_release_internal(pci_intr_ha if (pic == NULL) return; +#ifdef XENPV + xen_pci_msi_release(pic, count); +#endif pci_msi_free_vectors(pic, pihs, count); msipic_destruct_msi_pic(pic); } @@ -379,6 +384,9 @@ x86_pci_msix_release_internal(pci_intr_h if (pic == NULL) return; +#ifdef XENPV + xen_pci_msi_release(pic, count); +#endif pci_msi_free_vectors(pic, pihs, count); msipic_destruct_msix_pic(pic); } Index: src/sys/arch/xen/include/intr.h diff -u src/sys/arch/xen/include/intr.h:1.58 src/sys/arch/xen/include/intr.h:1.59 --- src/sys/arch/xen/include/intr.h:1.58 Sun Jul 19 14:27:07 2020 +++ src/sys/arch/xen/include/intr.h Mon May 23 15:03:05 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: intr.h,v 1.58 2020/07/19 14:27:07 jdolecek Exp $ */ +/* $NetBSD: intr.h,v 1.59 2022/05/23 15:03:05 bouyer Exp $ */ /* NetBSD intr.h,v 1.15 2004/10/31 10:39:34 yamt Exp */ /*- @@ -72,7 +72,9 @@ int xen_intr_biglock_wrapper(void *); #if defined(DOM0OPS) || NPCI > 0 int xen_pic_to_gsi(struct pic *, int); -int xen_pci_msi_probe(struct pic *, int); +int xen_map_msi_pirq(struct pic *, int); +int xen_map_msix_pirq(struct pic *, int); +void xen_pci_msi_release(struct pic *, int); #endif /* defined(DOM0OPS) || NPCI > 0 */ #ifdef MULTIPROCESSOR Index: src/sys/arch/xen/include/pci_machdep.h diff -u src/sys/arch/xen/include/pci_machdep.h:1.20 src/sys/arch/xen/include/pci_machdep.h:1.21 --- src/sys/arch/xen/include/pci_machdep.h:1.20 Tue Feb 12 07:58:26 2019 +++ src/sys/arch/xen/include/pci_machdep.h Mon May 23 15:03:05 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: pci_machdep.h,v 1.20 2019/02/12 07:58:26 cherry Exp $ */ +/* $NetBSD: pci_machdep.h,v 1.21 2022/05/23 15:03:05 bouyer Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -73,6 +73,10 @@ int xpci_enumerate_bus(struct pci_softc #define PCI_MACHDEP_ENUMERATE_BUS xpci_enumerate_bus #endif +#ifdef XENPV +void pci_conf_write16(pci_chipset_tag_t, pcitag_t, int, uint16_t); +#endif + /* functions provided to MI PCI */ int xen_pci_enumerate_bus(struct pci_softc *, const int *, Index: src/sys/arch/xen/x86/pintr.c diff -u src/sys/arch/xen/x86/pintr.c:1.20 src/sys/arch/xen/x86/pintr.c:1.21 --- src/sys/arch/xen/x86/pintr.c:1.20 Sat Aug 1 12:39:40 2020 +++ src/sys/arch/xen/x86/pintr.c Mon May 23 15:03:05 2022 @@ -103,7 +103,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pintr.c,v 1.20 2020/08/01 12:39:40 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pintr.c,v 1.21 2022/05/23 15:03:05 bouyer Exp $"); #include "opt_multiprocessor.h" #include "opt_xen.h" @@ -168,65 +168,119 @@ short irq2port[NR_EVENT_CHANNELS] = {0}; #if defined(DOM0OPS) || NPCI > 0 #ifdef __HAVE_PCI_MSI_MSIX -static int -xen_map_msi_pirq(struct pic *pic, int count, int *gsi) +int +xen_map_msi_pirq(struct pic *pic, int count) { struct physdev_map_pirq map_irq; - const struct msipic_pci_info *i = msipic_get_pci_info(pic); + const struct msipic_pci_info *msi_i = msipic_get_pci_info(pic); + int i; int ret; if (count == -1) - count = i->mp_veccnt; + count = msi_i->mp_veccnt; KASSERT(count > 0); + KASSERT(pic->pic_type == PIC_MSI); + memset(&map_irq, 0, sizeof(map_irq)); map_irq.domid = DOMID_SELF; map_irq.type = MAP_PIRQ_TYPE_MSI_SEG; map_irq.index = -1; map_irq.pirq = -1; - map_irq.bus = i->mp_bus; - map_irq.devfn = (i->mp_dev << 3) | i->mp_fun; + map_irq.bus = msi_i->mp_bus; + map_irq.devfn = (msi_i->mp_dev << 3) | msi_i->mp_fun; + aprint_debug("xen_map_msi_pirq bus %d devfn 0x%x (%d %d) entry_nr %d", + map_irq.bus, map_irq.devfn, msi_i->mp_dev, msi_i->mp_fun, + map_irq.entry_nr); map_irq.entry_nr = count; - if (pic->pic_type == PIC_MSI && i->mp_veccnt > 1) { + if (msi_i->mp_veccnt > 1) { map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI; - } else if (pic->pic_type == PIC_MSIX) { - map_irq.table_base = i->mp_table_base; } ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); if (ret == 0) { KASSERT(map_irq.entry_nr == count); - *gsi = map_irq.pirq; + aprint_debug(" pirq(s)"); + for (i = 0; i < count; i++) { + msi_i->mp_xen_pirq[i] = map_irq.pirq + i; + aprint_debug(" %d", msi_i->mp_xen_pirq[i]); + } + aprint_debug("\n"); + } else { + aprint_debug(" fail\n"); } - return ret; } -/* - * Check if we can map MSI interrupt. The Xen call fails if VT-d is not - * available or disabled. - */ int -xen_pci_msi_probe(struct pic *pic, int count) +xen_map_msix_pirq(struct pic *pic, int count) { - int pirq, ret; + struct physdev_map_pirq map_irq; + const struct msipic_pci_info *msi_i = msipic_get_pci_info(pic); + int i; + int ret; - ret = xen_map_msi_pirq(pic, count, &pirq); + if (count == -1) + count = msi_i->mp_veccnt; + KASSERT(count > 0); - if (ret == 0) { + KASSERT(pic->pic_type == PIC_MSIX); + + memset(&map_irq, 0, sizeof(map_irq)); + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_MSI_SEG; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = msi_i->mp_bus; + map_irq.devfn = (msi_i->mp_dev << 3) | msi_i->mp_fun; + aprint_debug("xen_map_msix_pirq bus %d devfn 0x%x (%d %d) count %d", + map_irq.bus, map_irq.devfn, msi_i->mp_dev, msi_i->mp_fun, + count); + + for (i = 0; i < count; i++) { + map_irq.entry_nr = i; + map_irq.pirq = -1; + aprint_debug(" map %d", i); + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (ret) { + aprint_debug(" fail\n"); + goto fail; + } + msi_i->mp_xen_pirq[i] = map_irq.pirq; + aprint_debug("->%d", msi_i->mp_xen_pirq[i]); + } + aprint_debug("\n"); + return 0; + +fail: + i--; + while(i >= 0) { struct physdev_unmap_pirq unmap_irq; unmap_irq.domid = DOMID_SELF; - unmap_irq.pirq = pirq; + unmap_irq.pirq = msi_i->mp_xen_pirq[i]; (void)HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); - } else { - aprint_debug("PHYSDEVOP_map_pirq() failed %d, MSI disabled\n", - ret); + msi_i->mp_xen_pirq[i] = 0; } - return ret; } + +void +xen_pci_msi_release(struct pic *pic, int count) +{ + const struct msipic_pci_info *msi_i = msipic_get_pci_info(pic); + KASSERT(count == msi_i->mp_veccnt); + for (int i = 0; i < count; i++) { + struct physdev_unmap_pirq unmap_irq; + unmap_irq.domid = DOMID_SELF; + unmap_irq.pirq = msi_i->mp_xen_pirq[i]; + + (void)HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); + msi_i->mp_xen_pirq[i] = 0; + } +} + #endif /* __HAVE_PCI_MSI_MSIX */ /* @@ -273,17 +327,25 @@ xen_pic_to_gsi(struct pic *pic, int pin) panic("physdev_op(PHYSDEVOP_alloc_irq_vector) %d" " fail %d", gsi, ret); } + aprint_debug("xen_pic_to_gsi %s pin %d gsi %d allocated %d\n", + (pic->pic_type == PIC_IOAPIC) ? "ioapic" : "i8259", + pin, gsi, irq_op.vector); + KASSERT(irq_op.vector == gsi); break; } case PIC_MSI: case PIC_MSIX: #ifdef __HAVE_PCI_MSI_MSIX - ret = xen_map_msi_pirq(pic, -1, &gsi); - if (ret != 0) - panic("physdev_op(PHYSDEVOP_map_pirq) MSI fail %d", - ret); + { + const struct msipic_pci_info *msi_i = msipic_get_pci_info(pic); + KASSERT(pin < msi_i->mp_veccnt); + gsi = msi_i->mp_xen_pirq[pin]; + aprint_debug("xen_pic_to_gsi %s pin %d gsi %d\n", + (pic->pic_type == PIC_MSI) ? "MSI" : "MSIX", + pin, gsi); break; + } #endif default: panic("unknown pic_type %d", pic->pic_type);