Author: imp
Date: Wed Aug 21 22:17:55 2019
New Revision: 351355
URL: https://svnweb.freebsd.org/changeset/base/351355

Log:
  Separate the pci attachment from the rest of nvme
  
  Nvme drives can be attached in a number of different ways. Separate out the 
PCI
  attachment so that we can have other attachment types, like ahci and various
  types of NVMeoF.
  
  Submitted by: cognet@

Added:
  head/sys/dev/nvme/nvme_pci.c   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/dev/nvme/nvme.c
  head/sys/dev/nvme/nvme_ctrlr.c
  head/sys/dev/nvme/nvme_private.h
  head/sys/modules/nvme/Makefile

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Wed Aug 21 21:05:15 2019        (r351354)
+++ head/sys/conf/files Wed Aug 21 22:17:55 2019        (r351355)
@@ -2483,6 +2483,7 @@ dev/nvme/nvme_ctrlr.c             optional nvme
 dev/nvme/nvme_ctrlr_cmd.c      optional nvme
 dev/nvme/nvme_ns.c             optional nvme
 dev/nvme/nvme_ns_cmd.c         optional nvme
+dev/nvme/nvme_pci.c            optional nvme pci
 dev/nvme/nvme_qpair.c          optional nvme
 dev/nvme/nvme_sim.c            optional nvme scbus
 dev/nvme/nvme_sysctl.c         optional nvme

Modified: head/sys/dev/nvme/nvme.c
==============================================================================
--- head/sys/dev/nvme/nvme.c    Wed Aug 21 21:05:15 2019        (r351354)
+++ head/sys/dev/nvme/nvme.c    Wed Aug 21 22:17:55 2019        (r351355)
@@ -36,9 +36,6 @@ __FBSDID("$FreeBSD$");
 
 #include <vm/uma.h>
 
-#include <dev/pci/pcireg.h>
-#include <dev/pci/pcivar.h>
-
 #include "nvme_private.h"
 
 struct nvme_consumer {
@@ -58,107 +55,8 @@ int32_t             nvme_retry_count;
 
 MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
 
-static int    nvme_probe(device_t);
-static int    nvme_attach(device_t);
-static int    nvme_detach(device_t);
-static int    nvme_shutdown(device_t);
+devclass_t nvme_devclass;
 
-static devclass_t nvme_devclass;
-
-static device_method_t nvme_pci_methods[] = {
-       /* Device interface */
-       DEVMETHOD(device_probe,     nvme_probe),
-       DEVMETHOD(device_attach,    nvme_attach),
-       DEVMETHOD(device_detach,    nvme_detach),
-       DEVMETHOD(device_shutdown,  nvme_shutdown),
-       { 0, 0 }
-};
-
-static driver_t nvme_pci_driver = {
-       "nvme",
-       nvme_pci_methods,
-       sizeof(struct nvme_controller),
-};
-
-DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, NULL, NULL);
-MODULE_VERSION(nvme, 1);
-MODULE_DEPEND(nvme, cam, 1, 1, 1);
-
-static struct _pcsid
-{
-       uint32_t        devid;
-       int             match_subdevice;
-       uint16_t        subdevice;
-       const char      *desc;
-       uint32_t        quirks;
-} pci_ids[] = {
-       { 0x01118086,           0, 0, "NVMe Controller"  },
-       { IDT32_PCI_ID,         0, 0, "IDT NVMe Controller (32 channel)"  },
-       { IDT8_PCI_ID,          0, 0, "IDT NVMe Controller (8 channel)" },
-       { 0x09538086,           1, 0x3702, "DC P3700 SSD" },
-       { 0x09538086,           1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
-       { 0x09538086,           1, 0x3704, "DC P3500 SSD [Add-in Card]" },
-       { 0x09538086,           1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
-       { 0x09538086,           1, 0x3709, "DC P3600 SSD [Add-in Card]" },
-       { 0x09538086,           1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
-       { 0x00031c58,           0, 0, "HGST SN100",     QUIRK_DELAY_B4_CHK_RDY 
},
-       { 0x00231c58,           0, 0, "WDC SN200",      QUIRK_DELAY_B4_CHK_RDY 
},
-       { 0x05401c5f,           0, 0, "Memblaze Pblaze4", 
QUIRK_DELAY_B4_CHK_RDY },
-       { 0xa821144d,           0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY 
},
-       { 0xa822144d,           0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY 
},
-       { 0x01161179,           0, 0, "Toshiba XG5", QUIRK_DISABLE_TIMEOUT },
-       { 0x00000000,           0, 0, NULL  }
-};
-
-static int
-nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
-{
-       if (devid != ep->devid)
-               return 0;
-
-       if (!ep->match_subdevice)
-               return 1;
-
-       if (subdevice == ep->subdevice)
-               return 1;
-       else
-               return 0;
-}
-
-static int
-nvme_probe (device_t device)
-{
-       struct _pcsid   *ep;
-       uint32_t        devid;
-       uint16_t        subdevice;
-
-       devid = pci_get_devid(device);
-       subdevice = pci_get_subdevice(device);
-       ep = pci_ids;
-
-       while (ep->devid) {
-               if (nvme_match(devid, subdevice, ep))
-                       break;
-               ++ep;
-       }
-
-       if (ep->desc) {
-               device_set_desc(device, ep->desc);
-               return (BUS_PROBE_DEFAULT);
-       }
-
-#if defined(PCIS_STORAGE_NVM)
-       if (pci_get_class(device)    == PCIC_STORAGE &&
-           pci_get_subclass(device) == PCIS_STORAGE_NVM &&
-           pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) 
{
-               device_set_desc(device, "Generic NVMe Device");
-               return (BUS_PROBE_GENERIC);
-       }
-#endif
-
-       return (ENXIO);
-}
-
 static void
 nvme_init(void)
 {
@@ -181,7 +79,7 @@ nvme_uninit(void)
 
 SYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
 
-static int
+int
 nvme_shutdown(device_t dev)
 {
        struct nvme_controller  *ctrlr;
@@ -225,25 +123,12 @@ nvme_dump_completion(struct nvme_completion *cpl)
            cpl->cid, p, sc, sct, m, dnr);
 }
 
-static int
+int
 nvme_attach(device_t dev)
 {
        struct nvme_controller  *ctrlr = DEVICE2SOFTC(dev);
        int                     status;
-       struct _pcsid           *ep;
-       uint32_t                devid;
-       uint16_t                subdevice;
 
-       devid = pci_get_devid(dev);
-       subdevice = pci_get_subdevice(dev);
-       ep = pci_ids;
-       while (ep->devid) {
-               if (nvme_match(devid, subdevice, ep))
-                       break;
-               ++ep;
-       }
-       ctrlr->quirks = ep->quirks;
-
        status = nvme_ctrlr_construct(ctrlr, dev);
 
        if (status != 0) {
@@ -252,32 +137,8 @@ nvme_attach(device_t dev)
        }
 
        /*
-        * Some drives do not implement the completion timeout feature
-        * correctly. There's a WAR from the manufacturer to just disable it.
-        * The driver wouldn't respond correctly to a timeout anyway.
-        */
-       if (ep->quirks & QUIRK_DISABLE_TIMEOUT) {
-               int ptr;
-               uint16_t devctl2;
-
-               status = pci_find_cap(dev, PCIY_EXPRESS, &ptr);
-               if (status) {
-                       device_printf(dev, "Can't locate PCIe capability?");
-                       return (status);
-               }
-               devctl2 = pci_read_config(dev, ptr + PCIER_DEVICE_CTL2, 
sizeof(devctl2));
-               devctl2 |= PCIEM_CTL2_COMP_TIMO_DISABLE;
-               pci_write_config(dev, ptr + PCIER_DEVICE_CTL2, devctl2, 
sizeof(devctl2));
-       }
-
-       /*
-        * Enable busmastering so the completion status messages can
-        * be busmastered back to the host.
-        */
-       pci_enable_busmaster(dev);
-
-       /*
         * Reset controller twice to ensure we do a transition from cc.en==1
+        * Reset controller twice to ensure we do a transition from cc.en==1
         *  to cc.en==0.  This is because we don't really know what status
         *  the controller was left in when boot handed off to OS.
         */
@@ -301,13 +162,12 @@ nvme_attach(device_t dev)
        return (0);
 }
 
-static int
+int
 nvme_detach (device_t dev)
 {
        struct nvme_controller  *ctrlr = DEVICE2SOFTC(dev);
 
        nvme_ctrlr_destruct(ctrlr, dev);
-       pci_disable_busmaster(dev);
        return (0);
 }
 

Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c      Wed Aug 21 21:05:15 2019        
(r351354)
+++ head/sys/dev/nvme/nvme_ctrlr.c      Wed Aug 21 22:17:55 2019        
(r351355)
@@ -42,50 +42,14 @@ __FBSDID("$FreeBSD$");
 #include <sys/uio.h>
 #include <sys/endian.h>
 
-#include <dev/pci/pcireg.h>
-#include <dev/pci/pcivar.h>
-
 #include "nvme_private.h"
 
 #define B4_CHK_RDY_DELAY_MS    2300            /* work around controller bug */
 
 static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
                                                struct nvme_async_event_request 
*aer);
-static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
 
 static int
-nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
-{
-
-       ctrlr->resource_id = PCIR_BAR(0);
-
-       ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
-           &ctrlr->resource_id, RF_ACTIVE);
-
-       if(ctrlr->resource == NULL) {
-               nvme_printf(ctrlr, "unable to allocate pci resource\n");
-               return (ENOMEM);
-       }
-
-       ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
-       ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
-       ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
-
-       /*
-        * The NVMe spec allows for the MSI-X table to be placed behind
-        *  BAR 4/5, separate from the control/doorbell registers.  Always
-        *  try to map this bar, because it must be mapped prior to calling
-        *  pci_alloc_msix().  If the table isn't behind BAR 4/5,
-        *  bus_alloc_resource() will just return NULL which is OK.
-        */
-       ctrlr->bar4_resource_id = PCIR_BAR(4);
-       ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, 
SYS_RES_MEMORY,
-           &ctrlr->bar4_resource_id, RF_ACTIVE);
-
-       return (0);
-}
-
-static int
 nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
 {
        struct nvme_qpair       *qpair;
@@ -876,9 +840,8 @@ nvme_ctrlr_start(void *ctrlr_arg)
         *  the number of I/O queues supported, so cannot reset
         *  the adminq again here.
         */
-       if (ctrlr->is_resetting) {
+       if (ctrlr->is_resetting)
                nvme_qpair_reset(&ctrlr->adminq);
-       }
 
        for (i = 0; i < ctrlr->num_io_queues; i++)
                nvme_qpair_reset(&ctrlr->ioq[i]);
@@ -1004,34 +967,6 @@ nvme_ctrlr_intx_handler(void *arg)
        nvme_mmio_write_4(ctrlr, intmc, 1);
 }
 
-static int
-nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
-{
-
-       ctrlr->msix_enabled = 0;
-       ctrlr->num_io_queues = 1;
-       ctrlr->num_cpus_per_ioq = mp_ncpus;
-       ctrlr->rid = 0;
-       ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
-           &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
-
-       if (ctrlr->res == NULL) {
-               nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
-               return (ENOMEM);
-       }
-
-       bus_setup_intr(ctrlr->dev, ctrlr->res,
-           INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
-           ctrlr, &ctrlr->tag);
-
-       if (ctrlr->tag == NULL) {
-               nvme_printf(ctrlr, "unable to setup intx handler\n");
-               return (ENOMEM);
-       }
-
-       return (0);
-}
-
 static void
 nvme_pt_done(void *arg, const struct nvme_completion *cpl)
 {
@@ -1177,88 +1112,6 @@ static struct cdevsw nvme_ctrlr_cdevsw = {
        .d_ioctl =      nvme_ctrlr_ioctl
 };
 
-static void
-nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
-{
-       device_t        dev;
-       int             per_cpu_io_queues;
-       int             min_cpus_per_ioq;
-       int             num_vectors_requested, num_vectors_allocated;
-       int             num_vectors_available;
-
-       dev = ctrlr->dev;
-       min_cpus_per_ioq = 1;
-       TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
-
-       if (min_cpus_per_ioq < 1) {
-               min_cpus_per_ioq = 1;
-       } else if (min_cpus_per_ioq > mp_ncpus) {
-               min_cpus_per_ioq = mp_ncpus;
-       }
-
-       per_cpu_io_queues = 1;
-       TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
-
-       if (per_cpu_io_queues == 0) {
-               min_cpus_per_ioq = mp_ncpus;
-       }
-
-       ctrlr->force_intx = 0;
-       TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
-
-       /*
-        * FreeBSD currently cannot allocate more than about 190 vectors at
-        *  boot, meaning that systems with high core count and many devices
-        *  requesting per-CPU interrupt vectors will not get their full
-        *  allotment.  So first, try to allocate as many as we may need to
-        *  understand what is available, then immediately release them.
-        *  Then figure out how many of those we will actually use, based on
-        *  assigning an equal number of cores to each I/O queue.
-        */
-
-       /* One vector for per core I/O queue, plus one vector for admin queue. 
*/
-       num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
-       if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
-               num_vectors_available = 0;
-       }
-       pci_release_msi(dev);
-
-       if (ctrlr->force_intx || num_vectors_available < 2) {
-               nvme_ctrlr_configure_intx(ctrlr);
-               return;
-       }
-
-       /*
-        * Do not use all vectors for I/O queues - one must be saved for the
-        *  admin queue.
-        */
-       ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
-           howmany(mp_ncpus, num_vectors_available - 1));
-
-       ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq);
-       num_vectors_requested = ctrlr->num_io_queues + 1;
-       num_vectors_allocated = num_vectors_requested;
-
-       /*
-        * Now just allocate the number of vectors we need.  This should
-        *  succeed, since we previously called pci_alloc_msix()
-        *  successfully returning at least this many vectors, but just to
-        *  be safe, if something goes wrong just revert to INTx.
-        */
-       if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
-               nvme_ctrlr_configure_intx(ctrlr);
-               return;
-       }
-
-       if (num_vectors_allocated < num_vectors_requested) {
-               pci_release_msi(dev);
-               nvme_ctrlr_configure_intx(ctrlr);
-               return;
-       }
-
-       ctrlr->msix_enabled = 1;
-}
-
 int
 nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 {
@@ -1274,11 +1127,6 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, de
 
        mtx_init(&ctrlr->lock, "nvme ctrlr lock", NULL, MTX_DEF);
 
-       status = nvme_ctrlr_allocate_bar(ctrlr);
-
-       if (status != 0)
-               return (status);
-
        /*
         * Software emulators may set the doorbell stride to something
         *  other than zero, but this driver is not set up to handle that.
@@ -1308,8 +1156,6 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, de
        ctrlr->enable_aborts = 0;
        TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
 
-       nvme_ctrlr_setup_interrupts(ctrlr);
-
        ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
        if (nvme_ctrlr_construct_admin_qpair(ctrlr) != 0)
                return (ENXIO);
@@ -1394,9 +1240,6 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, dev
        if (ctrlr->res)
                bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
                    rman_get_rid(ctrlr->res), ctrlr->res);
-
-       if (ctrlr->msix_enabled)
-               pci_release_msi(dev);
 
        if (ctrlr->bar4_resource != NULL) {
                bus_release_resource(dev, SYS_RES_MEMORY,

Added: head/sys/dev/nvme/nvme_pci.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/dev/nvme/nvme_pci.c        Wed Aug 21 22:17:55 2019        
(r351355)
@@ -0,0 +1,333 @@
+/*-
+ * Copyright (C) 2012-2016 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "nvme_private.h"
+
+static int    nvme_pci_probe(device_t);
+static int    nvme_pci_attach(device_t);
+static int    nvme_pci_detach(device_t);
+
+static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
+
+static device_method_t nvme_pci_methods[] = {
+       /* Device interface */
+       DEVMETHOD(device_probe,     nvme_pci_probe),
+       DEVMETHOD(device_attach,    nvme_pci_attach),
+       DEVMETHOD(device_detach,    nvme_pci_detach),
+       DEVMETHOD(device_shutdown,  nvme_shutdown),
+       { 0, 0 }
+};
+
+static driver_t nvme_pci_driver = {
+       "nvme",
+       nvme_pci_methods,
+       sizeof(struct nvme_controller),
+};
+
+DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, NULL, 0);
+MODULE_VERSION(nvme_pci, 1);
+
+static struct _pcsid
+{
+       uint32_t        devid;
+       int             match_subdevice;
+       uint16_t        subdevice;
+       const char      *desc;
+       uint32_t        quirks;
+} pci_ids[] = {
+       { 0x01118086,           0, 0, "NVMe Controller"  },
+       { IDT32_PCI_ID,         0, 0, "IDT NVMe Controller (32 channel)"  },
+       { IDT8_PCI_ID,          0, 0, "IDT NVMe Controller (8 channel)" },
+       { 0x09538086,           1, 0x3702, "DC P3700 SSD" },
+       { 0x09538086,           1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
+       { 0x09538086,           1, 0x3704, "DC P3500 SSD [Add-in Card]" },
+       { 0x09538086,           1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
+       { 0x09538086,           1, 0x3709, "DC P3600 SSD [Add-in Card]" },
+       { 0x09538086,           1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
+       { 0x00031c58,           0, 0, "HGST SN100",     QUIRK_DELAY_B4_CHK_RDY 
},
+       { 0x00231c58,           0, 0, "WDC SN200",      QUIRK_DELAY_B4_CHK_RDY 
},
+       { 0x05401c5f,           0, 0, "Memblaze Pblaze4", 
QUIRK_DELAY_B4_CHK_RDY },
+       { 0xa821144d,           0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY 
},
+       { 0xa822144d,           0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY 
},
+       { 0x00000000,           0, 0, NULL  }
+};
+
+
+static int
+nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
+{
+       if (devid != ep->devid)
+               return 0;
+
+       if (!ep->match_subdevice)
+               return 1;
+
+       if (subdevice == ep->subdevice)
+               return 1;
+       else
+               return 0;
+}
+
+static int
+nvme_pci_probe (device_t device)
+{
+       struct nvme_controller *ctrlr = DEVICE2SOFTC(device);
+       struct _pcsid   *ep;
+       uint32_t        devid;
+       uint16_t        subdevice;
+
+       devid = pci_get_devid(device);
+       subdevice = pci_get_subdevice(device);
+       ep = pci_ids;
+
+       while (ep->devid) {
+               if (nvme_match(devid, subdevice, ep))
+                       break;
+               ++ep;
+       }
+       if (ep->devid)
+               ctrlr->quirks = ep->quirks;
+
+       if (ep->desc) {
+               device_set_desc(device, ep->desc);
+               return (BUS_PROBE_DEFAULT);
+       }
+
+#if defined(PCIS_STORAGE_NVM)
+       if (pci_get_class(device)    == PCIC_STORAGE &&
+           pci_get_subclass(device) == PCIS_STORAGE_NVM &&
+           pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) 
{
+               device_set_desc(device, "Generic NVMe Device");
+               return (BUS_PROBE_GENERIC);
+       }
+#endif
+
+       return (ENXIO);
+}
+
+static int
+nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
+{
+
+       ctrlr->resource_id = PCIR_BAR(0);
+
+       ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
+           &ctrlr->resource_id, RF_ACTIVE);
+
+       if(ctrlr->resource == NULL) {
+               nvme_printf(ctrlr, "unable to allocate pci resource\n");
+               return (ENOMEM);
+       }
+
+       ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
+       ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
+       ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
+
+       /*
+        * The NVMe spec allows for the MSI-X table to be placed behind
+        *  BAR 4/5, separate from the control/doorbell registers.  Always
+        *  try to map this bar, because it must be mapped prior to calling
+        *  pci_alloc_msix().  If the table isn't behind BAR 4/5,
+        *  bus_alloc_resource() will just return NULL which is OK.
+        */
+       ctrlr->bar4_resource_id = PCIR_BAR(4);
+       ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, 
SYS_RES_MEMORY,
+           &ctrlr->bar4_resource_id, RF_ACTIVE);
+
+       return (0);
+}
+
+static int
+nvme_pci_attach(device_t dev)
+{
+       struct nvme_controller*ctrlr = DEVICE2SOFTC(dev);
+       int status;
+
+       ctrlr->dev = dev;
+       status = nvme_ctrlr_allocate_bar(ctrlr);
+       if (status != 0)
+               goto bad;
+       pci_enable_busmaster(dev);
+       nvme_ctrlr_setup_interrupts(ctrlr);
+       return nvme_attach(dev);
+bad:
+       if (ctrlr->resource != NULL) {
+               bus_release_resource(dev, SYS_RES_MEMORY,
+                   ctrlr->resource_id, ctrlr->resource);
+       }
+
+       if (ctrlr->bar4_resource != NULL) {
+               bus_release_resource(dev, SYS_RES_MEMORY,
+                   ctrlr->bar4_resource_id, ctrlr->bar4_resource);
+       }
+
+       if (ctrlr->tag)
+               bus_teardown_intr(dev, ctrlr->res, ctrlr->tag);
+
+       if (ctrlr->res)
+               bus_release_resource(dev, SYS_RES_IRQ,
+                   rman_get_rid(ctrlr->res), ctrlr->res);
+
+       if (ctrlr->msix_enabled)
+               pci_release_msi(dev);
+
+       return status;
+}
+
+static int
+nvme_pci_detach(device_t dev)
+{
+       struct nvme_controller*ctrlr = DEVICE2SOFTC(dev);
+
+       if (ctrlr->msix_enabled)
+               pci_release_msi(dev);
+       pci_disable_busmaster(dev);
+       return (nvme_detach(dev));
+}
+
+static int
+nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
+{
+
+       ctrlr->msix_enabled = 0;
+       ctrlr->num_io_queues = 1;
+       ctrlr->num_cpus_per_ioq = mp_ncpus;
+       ctrlr->rid = 0;
+       ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
+           &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
+
+       if (ctrlr->res == NULL) {
+               nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
+               return (ENOMEM);
+       }
+
+       bus_setup_intr(ctrlr->dev, ctrlr->res,
+           INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
+           ctrlr, &ctrlr->tag);
+
+       if (ctrlr->tag == NULL) {
+               nvme_printf(ctrlr, "unable to setup intx handler\n");
+               return (ENOMEM);
+       }
+
+       return (0);
+}
+
+static void
+nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
+{
+       device_t        dev;
+       int             per_cpu_io_queues;
+       int             min_cpus_per_ioq;
+       int             num_vectors_requested, num_vectors_allocated;
+       int             num_vectors_available;
+
+       dev = ctrlr->dev;
+       min_cpus_per_ioq = 1;
+       TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
+
+       if (min_cpus_per_ioq < 1) {
+               min_cpus_per_ioq = 1;
+       } else if (min_cpus_per_ioq > mp_ncpus) {
+               min_cpus_per_ioq = mp_ncpus;
+       }
+
+       per_cpu_io_queues = 1;
+       TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
+
+       if (per_cpu_io_queues == 0) {
+               min_cpus_per_ioq = mp_ncpus;
+       }
+
+       ctrlr->force_intx = 0;
+       TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
+
+       /*
+        * FreeBSD currently cannot allocate more than about 190 vectors at
+        *  boot, meaning that systems with high core count and many devices
+        *  requesting per-CPU interrupt vectors will not get their full
+        *  allotment.  So first, try to allocate as many as we may need to
+        *  understand what is available, then immediately release them.
+        *  Then figure out how many of those we will actually use, based on
+        *  assigning an equal number of cores to each I/O queue.
+        */
+
+       /* One vector for per core I/O queue, plus one vector for admin queue. 
*/
+       num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
+       if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
+               num_vectors_available = 0;
+       }
+       pci_release_msi(dev);
+
+       if (ctrlr->force_intx || num_vectors_available < 2) {
+               nvme_ctrlr_configure_intx(ctrlr);
+               return;
+       }
+
+       /*
+        * Do not use all vectors for I/O queues - one must be saved for the
+        *  admin queue.
+        */
+       ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
+           howmany(mp_ncpus, num_vectors_available - 1));
+
+       ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq);
+       num_vectors_requested = ctrlr->num_io_queues + 1;
+       num_vectors_allocated = num_vectors_requested;
+
+       /*
+        * Now just allocate the number of vectors we need.  This should
+        *  succeed, since we previously called pci_alloc_msix()
+        *  successfully returning at least this many vectors, but just to
+        *  be safe, if something goes wrong just revert to INTx.
+        */
+       if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
+               nvme_ctrlr_configure_intx(ctrlr);
+               return;
+       }
+
+       if (num_vectors_allocated < num_vectors_requested) {
+               pci_release_msi(dev);
+               nvme_ctrlr_configure_intx(ctrlr);
+               return;
+       }
+
+       ctrlr->msix_enabled = 1;
+}

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h    Wed Aug 21 21:05:15 2019        
(r351354)
+++ head/sys/dev/nvme/nvme_private.h    Wed Aug 21 22:17:55 2019        
(r351355)
@@ -37,6 +37,7 @@
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
+#include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/rman.h>
 #include <sys/systm.h>
@@ -122,6 +123,8 @@ struct nvme_completion_poll_status {
        int                     done;
 };
 
+extern devclass_t nvme_devclass;
+
 #define NVME_REQUEST_VADDR     1
 #define NVME_REQUEST_NULL      2 /* For requests with no payload. */
 #define NVME_REQUEST_UIO       3
@@ -438,6 +441,10 @@ void       nvme_sysctl_initialize_ctrlr(struct 
nvme_controll
 
 void   nvme_dump_command(struct nvme_command *cmd);
 void   nvme_dump_completion(struct nvme_completion *cpl);
+
+int    nvme_attach(device_t dev);
+int    nvme_shutdown(device_t dev);
+int    nvme_detach(device_t dev);
 
 static __inline void
 nvme_single_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)

Modified: head/sys/modules/nvme/Makefile
==============================================================================
--- head/sys/modules/nvme/Makefile      Wed Aug 21 21:05:15 2019        
(r351354)
+++ head/sys/modules/nvme/Makefile      Wed Aug 21 22:17:55 2019        
(r351355)
@@ -9,6 +9,7 @@ SRCS =  nvme.c                  \
        nvme_ctrlr_cmd.c        \
        nvme_ns.c               \
        nvme_ns_cmd.c           \
+       nvme_pci.c              \
        nvme_qpair.c            \
        nvme_sim.c              \
        nvme_sysctl.c           \
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to