git: 5c59cec2d5e1 - main - nvmf: Auto-reconnect periodically after a disconnect

John Baldwin Wed, 09 Jul 2025 07:24:46 -0700

The branch main has been updated by jhb:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5c59cec2d5e141de54bbc737477a8e498685186a


commit 5c59cec2d5e141de54bbc737477a8e498685186a
Author:     John Baldwin <j...@freebsd.org>
AuthorDate: 2025-07-09 14:17:41 +0000
Commit:     John Baldwin <j...@freebsd.org>
CommitDate: 2025-07-09 14:19:45 +0000

    nvmf: Auto-reconnect periodically after a disconnect
    
    Use a timer in the nvmf(4) driver to periodically trigger a devctl
    "RECONNECT" notification.  A trigger in the /etc/devd/nvmf.conf file
    invokes "nvmecontrol reconnect nvmeX" upon each notification.  This
    differs from iSCSI which uses a dedicated daemon (iscsid(8)) to wait
    inside a custom ioctl for an iSCSI initiator event to occur, but I
    think this design might be simpler.
    
    Similar to nvme-cli, the interval between reconnection attempts is
    specified in seconds by the --reconnect-delay argument to the connect
    and reconnect commands.  Note that nvme-cli uses -c for short letter
    of this command, but that was already taken so nvmecontrol uses -r.
    The default is 10 seconds to match Linux.
    
    In addition, a second timeout can be used to force a full detach of a
    disconnected the nvmeX device after the controller loss timeout
    expires.  The timeout for this is specified in seconds by the
    --ctrl-loss-tmo/-l options (identical to nvme-cli).  The default is
    600 seconds.
    
    Either of these timers can be disabled by setting the timer to 0.  In
    that case, the associated action (devctl notifications or full detach)
    will not occur after a disconnect.
    
    Note that this adds a dedicated taskqueue for nvmf tasks instead of
    using taskqueue_thread as the controller loss task could deadlock
    waiting for the completion of other tasks queued to taskqueue_thread.
    (Specifically, tearing down the CAM SIM can trigger
    destroy_dev_sched_cb() and waits for the callback to run, but the
    callback is scheduled to run in a task on taskqueue_thread.  Possibly,
    destroy_dev_sched should be using a dedicated taskqueue.)
    
    Reviewed by:    imp (earlier version)
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D50222
---
 lib/libnvmf/libnvmf.h          |   6 ++-
 lib/libnvmf/nvmf_host.c        |  21 ++++++--
 sbin/devd/Makefile             |   5 ++
 sbin/devd/devd.conf.5          |   4 +-
 sbin/devd/nvmf.conf            |   7 +++
 sbin/nvmecontrol/connect.c     |  11 +++-
 sbin/nvmecontrol/nvmecontrol.8 |  31 ++++++++++-
 sbin/nvmecontrol/reconnect.c   |  17 +++++-
 sys/dev/nvmf/host/nvmf.c       | 119 +++++++++++++++++++++++++++++++++++++++--
 sys/dev/nvmf/host/nvmf_var.h   |   6 +++
 sys/dev/nvmf/nvmf.h            |  11 ++++
 11 files changed, 221 insertions(+), 17 deletions(-)

diff --git a/lib/libnvmf/libnvmf.h b/lib/libnvmf/libnvmf.h
index 9840e190a24f..7cdd7e433455 100644
--- a/lib/libnvmf/libnvmf.h
+++ b/lib/libnvmf/libnvmf.h
@@ -342,7 +342,8 @@ int nvmf_host_request_queues(struct nvmf_qpair *qp, u_int 
requested,
  */
 int    nvmf_handoff_host(const struct nvme_discovery_log_entry *dle,
     const char *hostnqn, struct nvmf_qpair *admin_qp, u_int num_queues,
-    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata);
+    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata,
+    uint32_t reconnect_delay, uint32_t controller_loss_timeout);
 
 /*
  * Disconnect an active host association previously handed off to the
@@ -370,7 +371,8 @@ int nvmf_reconnect_params(int fd, nvlist_t **nvlp);
  */
 int    nvmf_reconnect_host(int fd, const struct nvme_discovery_log_entry *dle,
     const char *hostnqn, struct nvmf_qpair *admin_qp, u_int num_queues,
-    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata);
+    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata,
+    uint32_t reconnect_delay, uint32_t controller_loss_timeout);
 
 /*
  * Fetch connection status from an existing kernel host.
diff --git a/lib/libnvmf/nvmf_host.c b/lib/libnvmf/nvmf_host.c
index 89cdd5c6bb70..3266f8898296 100644
--- a/lib/libnvmf/nvmf_host.c
+++ b/lib/libnvmf/nvmf_host.c
@@ -792,7 +792,8 @@ static int
 prepare_queues_for_handoff(struct nvmf_ioc_nv *nv,
     const struct nvme_discovery_log_entry *dle, const char *hostnqn,
     struct nvmf_qpair *admin_qp, u_int num_queues,
-    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
+    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata,
+    uint32_t reconnect_delay, uint32_t controller_loss_timeout)
 {
        const struct nvmf_association *na = admin_qp->nq_association;
        nvlist_t *nvl, *nvl_qp, *nvl_rparams;
@@ -820,6 +821,9 @@ prepare_queues_for_handoff(struct nvmf_ioc_nv *nv,
        nvlist_add_string(nvl_rparams, "hostnqn", hostnqn);
        nvlist_add_number(nvl_rparams, "num_io_queues", num_queues);
        nvlist_add_number(nvl_rparams, "kato", admin_qp->nq_kato);
+       nvlist_add_number(nvl_rparams, "reconnect_delay", reconnect_delay);
+       nvlist_add_number(nvl_rparams, "controller_loss_timeout",
+           controller_loss_timeout);
        nvlist_add_number(nvl_rparams, "io_qsize", io_queues[0]->nq_qsize);
        nvlist_add_bool(nvl_rparams, "sq_flow_control",
            na->na_params.sq_flow_control);
@@ -842,6 +846,9 @@ prepare_queues_for_handoff(struct nvmf_ioc_nv *nv,
        nvl = nvlist_create(0);
        nvlist_add_number(nvl, "trtype", na->na_trtype);
        nvlist_add_number(nvl, "kato", admin_qp->nq_kato);
+       nvlist_add_number(nvl, "reconnect_delay", reconnect_delay);
+       nvlist_add_number(nvl, "controller_loss_timeout",
+           controller_loss_timeout);
        nvlist_move_nvlist(nvl, "rparams", nvl_rparams);
 
        /* First, the admin queue. */
@@ -872,7 +879,8 @@ prepare_queues_for_handoff(struct nvmf_ioc_nv *nv,
 int
 nvmf_handoff_host(const struct nvme_discovery_log_entry *dle,
     const char *hostnqn, struct nvmf_qpair *admin_qp, u_int num_queues,
-    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
+    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata,
+    uint32_t reconnect_delay, uint32_t controller_loss_timeout)
 {
        struct nvmf_ioc_nv nv;
        u_int i;
@@ -885,7 +893,8 @@ nvmf_handoff_host(const struct nvme_discovery_log_entry 
*dle,
        }
 
        error = prepare_queues_for_handoff(&nv, dle, hostnqn, admin_qp,
-           num_queues, io_queues, cdata);
+           num_queues, io_queues, cdata, reconnect_delay,
+           controller_loss_timeout);
        if (error != 0)
                goto out;
 
@@ -981,14 +990,16 @@ nvmf_reconnect_params(int fd, nvlist_t **nvlp)
 int
 nvmf_reconnect_host(int fd, const struct nvme_discovery_log_entry *dle,
     const char *hostnqn, struct nvmf_qpair *admin_qp, u_int num_queues,
-    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
+    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata,
+    uint32_t reconnect_delay, uint32_t controller_loss_timeout)
 {
        struct nvmf_ioc_nv nv;
        u_int i;
        int error;
 
        error = prepare_queues_for_handoff(&nv, dle, hostnqn, admin_qp,
-           num_queues, io_queues, cdata);
+           num_queues, io_queues, cdata, reconnect_delay,
+           controller_loss_timeout);
        if (error != 0)
                goto out;
 
diff --git a/sbin/devd/Makefile b/sbin/devd/Makefile
index 4ff0187a5a22..5d5721d16884 100644
--- a/sbin/devd/Makefile
+++ b/sbin/devd/Makefile
@@ -46,6 +46,11 @@ HYPERV+=     hyperv.conf
 HYPERVPACKAGE= hyperv-tools
 .endif
 
+CONFGROUPS+=           NVME
+NVMEDIR=               ${DEVDDIR}
+NVME+=                 nvmf.conf
+NVMEPACKAGE=           nvme-tools
+
 .if ${MK_USB} != "no"
 DEVD+= uath.conf ulpt.conf
 .endif
diff --git a/sbin/devd/devd.conf.5 b/sbin/devd/devd.conf.5
index 4dbd7338edb1..baf4b9d3a183 100644
--- a/sbin/devd/devd.conf.5
+++ b/sbin/devd/devd.conf.5
@@ -38,7 +38,7 @@
 .\" ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 .\" SOFTWARE.
 .\"
-.Dd July 8, 2025
+.Dd July 9, 2025
 .Dt DEVD.CONF 5
 .Os
 .Sh NAME
@@ -517,6 +517,8 @@ and
 representing the start of a controller reset, the successful completion of a
 controller reset, or a timeout while waiting for the controller to reset,
 respectively.
+.It Li nvme Ta Li controller Ta Li RECONNECT Ta
+An NVMe over Fabrics host has disconnected and is requesting a reconnect.
 .El
 .Pp
 .Bl -column "SYSTEM" "SUBSYSTEM" "SHUTDOWN-THRESHOLD" -compact
diff --git a/sbin/devd/nvmf.conf b/sbin/devd/nvmf.conf
new file mode 100644
index 000000000000..eaf3ebe86cec
--- /dev/null
+++ b/sbin/devd/nvmf.conf
@@ -0,0 +1,7 @@
+# Attempt to reconnect NVMeoF host devices when requested
+notify 100 {
+       match "system" "nvme";
+       match "subsystem" "controller";
+       match "type" "RECONNECT";
+       action "nvmecontrol reconnect $name";
+};
diff --git a/sbin/nvmecontrol/connect.c b/sbin/nvmecontrol/connect.c
index c1d5d2cbaf5a..3d6d12bf2c48 100644
--- a/sbin/nvmecontrol/connect.c
+++ b/sbin/nvmecontrol/connect.c
@@ -31,6 +31,8 @@ static struct options {
        const char      *subnqn;
        const char      *hostnqn;
        uint32_t        kato;
+       uint32_t        reconnect_delay;
+       uint32_t        controller_loss_timeout;
        uint16_t        num_io_queues;
        uint16_t        queue_size;
        bool            data_digests;
@@ -43,6 +45,8 @@ static struct options {
        .subnqn = NULL,
        .hostnqn = NULL,
        .kato = NVMF_KATO_DEFAULT / 1000,
+       .reconnect_delay = NVMF_DEFAULT_RECONNECT_DELAY,
+       .controller_loss_timeout = NVMF_DEFAULT_CONTROLLER_LOSS,
        .num_io_queues = 1,
        .queue_size = 0,
        .data_digests = false,
@@ -107,7 +111,7 @@ connect_nvm_controller(enum nvmf_trtype trtype, int adrfam, 
const char *address,
        }
 
        error = nvmf_handoff_host(dle, hostnqn, admin, opt.num_io_queues, io,
-           &cdata);
+           &cdata, opt.reconnect_delay, opt.controller_loss_timeout);
        if (error != 0) {
                warnc(error, "Failed to handoff queues to kernel");
                free(io);
@@ -259,6 +263,11 @@ static const struct opts connect_opts[] = {
            "Number of entries in each I/O queue"),
        OPT("keep-alive-tmo", 'k', arg_uint32, opt, kato,
            "Keep Alive timeout (in seconds)"),
+       OPT("reconnect-delay", 'r', arg_uint32, opt, reconnect_delay,
+           "Delay between reconnect attempts after connection loss "
+           "(in seconds)"),
+       OPT("ctrl-loss-tmo", 'l', arg_uint32, opt, controller_loss_timeout,
+           "Controller loss timeout after connection loss (in seconds)"),
        OPT("hostnqn", 'q', arg_string, opt, hostnqn,
            "Host NQN"),
        OPT("flow_control", 'F', arg_none, opt, flow_control,
diff --git a/sbin/nvmecontrol/nvmecontrol.8 b/sbin/nvmecontrol/nvmecontrol.8
index d886b60a2545..624a0c93719b 100644
--- a/sbin/nvmecontrol/nvmecontrol.8
+++ b/sbin/nvmecontrol/nvmecontrol.8
@@ -33,7 +33,7 @@
 .\"
 .\" Author: Jim Harris <jimhar...@freebsd.org>
 .\"
-.Dd April 29, 2025
+.Dd July 9, 2025
 .Dt NVMECONTROL 8
 .Os
 .Sh NAME
@@ -216,6 +216,8 @@
 .Op Fl c Ar cntl-id
 .Op Fl i Ar queues
 .Op Fl k Ar seconds
+.Op Fl l Ar seconds
+.Op Fl r Ar seconds
 .Op Fl t Ar transport
 .Op Fl q Ar HostNQN
 .Op Fl Q Ar entries
@@ -226,6 +228,8 @@
 .Op Fl FGg
 .Op Fl i Ar queues
 .Op Fl k Ar seconds
+.Op Fl l Ar seconds
+.Op Fl r Ar seconds
 .Op Fl t Ar transport
 .Op Fl q Ar HostNQN
 .Op Fl Q Ar entries
@@ -241,6 +245,8 @@
 .Op Fl FGg
 .Op Fl i Ar queues
 .Op Fl k Ar seconds
+.Op Fl l Ar seconds
+.Op Fl r Ar seconds
 .Op Fl t Ar transport
 .Op Fl q Ar HostNQN
 .Op Fl Q Ar entries
@@ -786,6 +792,29 @@ The default is 1.
 .It Fl k Ar seconds
 Keep Alive timer duration in seconds.
 The default is 120.
+.It Fl l Ar seconds
+Controller Loss timer duration in seconds.
+The default is 600.
+.Pp
+This timer starts when an association is lost with a remote I/O controller
+and is cancelled when a new association is established.
+If the timer expires, the controller device is deleted.
+A setting of zero disables this timer.
+.It Fl r Ar seconds
+Reconnect timer duration in seconds.
+The default is 10.
+.Pp
+When an association is lost with a remote I/O controller,
+the controller device will request reconnection via periodic
+.Xr devctl 4
+notifications until either a new association is established or the controller
+device is deleted.
+This timer sets the interval between each
+.Xr devctl 4
+notification.
+Note that the first notification is triggered immediately after an association
+is lost.
+A setting of zero disables this timer.
 .It Fl t Ar transport
 Transport to use.
 The default is
diff --git a/sbin/nvmecontrol/reconnect.c b/sbin/nvmecontrol/reconnect.c
index adf1edac662b..06af40624177 100644
--- a/sbin/nvmecontrol/reconnect.c
+++ b/sbin/nvmecontrol/reconnect.c
@@ -27,6 +27,8 @@ static struct options {
        const char      *transport;
        const char      *hostnqn;
        uint32_t        kato;
+       uint32_t        reconnect_delay;
+       uint32_t        controller_loss_timeout;
        uint16_t        num_io_queues;
        uint16_t        queue_size;
        bool            data_digests;
@@ -37,6 +39,8 @@ static struct options {
        .transport = "tcp",
        .hostnqn = NULL,
        .kato = NVMF_KATO_DEFAULT / 1000,
+       .reconnect_delay = NVMF_DEFAULT_RECONNECT_DELAY,
+       .controller_loss_timeout = NVMF_DEFAULT_CONTROLLER_LOSS,
        .num_io_queues = 1,
        .queue_size = 0,
        .data_digests = false,
@@ -59,6 +63,7 @@ static int
 reconnect_nvm_controller(int fd, const struct nvmf_association_params *aparams,
     enum nvmf_trtype trtype, int adrfam, const char *address, const char *port,
     uint16_t cntlid, const char *subnqn, const char *hostnqn, uint32_t kato,
+    uint32_t reconnect_delay, uint32_t controller_loss_timeout,
     u_int num_io_queues, u_int queue_size,
     const struct nvme_discovery_log_entry *dle)
 {
@@ -88,7 +93,7 @@ reconnect_nvm_controller(int fd, const struct 
nvmf_association_params *aparams,
        }
 
        error = nvmf_reconnect_host(fd, dle, hostnqn, admin, num_io_queues, io,
-           &cdata);
+           &cdata, reconnect_delay, controller_loss_timeout);
        if (error != 0) {
                warnc(error, "Failed to handoff queues to kernel");
                free(io);
@@ -137,7 +142,8 @@ reconnect_by_address(int fd, const nvlist_t *rparams, const 
char *addr)
 
        error = reconnect_nvm_controller(fd, &aparams, trtype, AF_UNSPEC,
            address, port, le16toh(dle->cntlid), subnqn, hostnqn,
-           opt.kato * 1000, opt.num_io_queues, opt.queue_size, NULL);
+           opt.kato * 1000, opt.reconnect_delay, opt.controller_loss_timeout,
+           opt.num_io_queues, opt.queue_size, NULL);
        free(subnqn);
        free(tofree);
        return (error);
@@ -196,6 +202,8 @@ reconnect_by_params(int fd, const nvlist_t *rparams)
            address, port, le16toh(dle->cntlid), dle->subnqn,
            nvlist_get_string(rparams, "hostnqn"),
            dnvlist_get_number(rparams, "kato", 0),
+           dnvlist_get_number(rparams, "reconnect_delay", 0),
+           dnvlist_get_number(rparams, "controller_loss_timeout", 0),
            nvlist_get_number(rparams, "num_io_queues"),
            nvlist_get_number(rparams, "io_qsize"), dle);
        free(subnqn);
@@ -291,6 +299,11 @@ static const struct opts reconnect_opts[] = {
            "Number of entries in each I/O queue"),
        OPT("keep-alive-tmo", 'k', arg_uint32, opt, kato,
            "Keep Alive timeout (in seconds)"),
+       OPT("reconnect-delay", 'r', arg_uint32, opt, reconnect_delay,
+           "Delay between reconnect attempts after connection loss "
+           "(in seconds)"),
+       OPT("ctrl-loss-tmo", 'l', arg_uint32, opt, controller_loss_timeout,
+           "Controller loss timeout after connection loss (in seconds)"),
        OPT("hostnqn", 'q', arg_string, opt, hostnqn,
            "Host NQN"),
        OPT("flow_control", 'F', arg_none, opt, flow_control,
diff --git a/sys/dev/nvmf/host/nvmf.c b/sys/dev/nvmf/host/nvmf.c
index dbdd4568bdf1..1ac0d142443b 100644
--- a/sys/dev/nvmf/host/nvmf.c
+++ b/sys/dev/nvmf/host/nvmf.c
@@ -27,6 +27,7 @@
 #include <dev/nvmf/host/nvmf_var.h>
 
 static struct cdevsw nvmf_cdevsw;
+static struct taskqueue *nvmf_tq;
 
 bool nvmf_fail_disconnect = false;
 SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
@@ -34,7 +35,10 @@ SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, 
CTLFLAG_RWTUN,
 
 MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host");
 
+static void    nvmf_controller_loss_task(void *arg, int pending);
 static void    nvmf_disconnect_task(void *arg, int pending);
+static void    nvmf_request_reconnect(struct nvmf_softc *sc);
+static void    nvmf_request_reconnect_task(void *arg, int pending);
 static void    nvmf_shutdown_pre_sync(void *arg, int howto);
 static void    nvmf_shutdown_post_sync(void *arg, int howto);
 
@@ -294,6 +298,9 @@ nvmf_establish_connection(struct nvmf_softc *sc, nvlist_t 
*nvl)
        admin = nvlist_get_nvlist(nvl, "admin");
        io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
        kato = dnvlist_get_number(nvl, "kato", 0);
+       sc->reconnect_delay = dnvlist_get_number(nvl, "reconnect_delay", 0);
+       sc->controller_loss_timeout = dnvlist_get_number(nvl,
+           "controller_loss_timeout", 0);
 
        /* Setup the admin queue. */
        sc->admin = nvmf_init_qp(sc, trtype, admin, "admin queue", 0);
@@ -504,6 +511,10 @@ nvmf_attach(device_t dev)
        callout_init(&sc->ka_tx_timer, 1);
        sx_init(&sc->connection_lock, "nvmf connection");
        TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc);
+       TIMEOUT_TASK_INIT(nvmf_tq, &sc->controller_loss_task, 0,
+           nvmf_controller_loss_task, sc);
+       TIMEOUT_TASK_INIT(nvmf_tq, &sc->request_reconnect_task, 0,
+           nvmf_request_reconnect_task, sc);
 
        oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev),
            SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ioq",
@@ -603,7 +614,9 @@ out:
 
        nvmf_destroy_aer(sc);
 
-       taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
+       taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
+       taskqueue_drain_timeout(nvmf_tq, &sc->controller_loss_task);
+       taskqueue_drain(nvmf_tq, &sc->disconnect_task);
        sx_destroy(&sc->connection_lock);
        nvlist_destroy(sc->rparams);
        free(sc->cdata, M_NVMF);
@@ -613,7 +626,7 @@ out:
 void
 nvmf_disconnect(struct nvmf_softc *sc)
 {
-       taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task);
+       taskqueue_enqueue(nvmf_tq, &sc->disconnect_task);
 }
 
 static void
@@ -676,6 +689,74 @@ nvmf_disconnect_task(void *arg, int pending __unused)
        nvmf_destroy_qp(sc->admin);
        sc->admin = NULL;
 
+       if (sc->reconnect_delay != 0)
+               nvmf_request_reconnect(sc);
+       if (sc->controller_loss_timeout != 0)
+               taskqueue_enqueue_timeout(nvmf_tq,
+                   &sc->controller_loss_task, sc->controller_loss_timeout *
+                   hz);
+
+       sx_xunlock(&sc->connection_lock);
+}
+
+static void
+nvmf_controller_loss_task(void *arg, int pending)
+{
+       struct nvmf_softc *sc = arg;
+       device_t dev;
+       int error;
+
+       bus_topo_lock();
+       sx_xlock(&sc->connection_lock);
+       if (sc->admin != NULL || sc->detaching) {
+               /* Reconnected or already detaching. */
+               sx_xunlock(&sc->connection_lock);
+               bus_topo_unlock();
+               return;
+       }
+
+       sc->controller_timedout = true;
+       sx_xunlock(&sc->connection_lock);
+
+       /*
+        * XXX: Doing this from here is a bit ugly.  We don't have an
+        * extra reference on `dev` but bus_topo_lock should block any
+        * concurrent device_delete_child invocations.
+        */
+       dev = sc->dev;
+       error = device_delete_child(root_bus, dev);
+       if (error != 0)
+               device_printf(dev,
+                   "failed to detach after controller loss: %d\n", error);
+       bus_topo_unlock();
+}
+
+static void
+nvmf_request_reconnect(struct nvmf_softc *sc)
+{
+       char buf[64];
+
+       sx_assert(&sc->connection_lock, SX_LOCKED);
+
+       snprintf(buf, sizeof(buf), "name=\"%s\"", device_get_nameunit(sc->dev));
+       devctl_notify("nvme", "controller", "RECONNECT", buf);
+       taskqueue_enqueue_timeout(nvmf_tq, &sc->request_reconnect_task,
+           sc->reconnect_delay * hz);
+}
+
+static void
+nvmf_request_reconnect_task(void *arg, int pending)
+{
+       struct nvmf_softc *sc = arg;
+
+       sx_xlock(&sc->connection_lock);
+       if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
+               /* Reconnected or already detaching. */
+               sx_xunlock(&sc->connection_lock);
+               return;
+       }
+
+       nvmf_request_reconnect(sc);
        sx_xunlock(&sc->connection_lock);
 }
 
@@ -699,7 +780,7 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct 
nvmf_ioc_nv *nv)
        }
 
        sx_xlock(&sc->connection_lock);
-       if (sc->admin != NULL || sc->detaching) {
+       if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
                error = EBUSY;
                goto out;
        }
@@ -745,6 +826,9 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct 
nvmf_ioc_nv *nv)
        nvmf_reconnect_sim(sc);
 
        nvmf_rescan_all_ns(sc);
+
+       taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task, NULL);
+       taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task, NULL);
 out:
        sx_xunlock(&sc->connection_lock);
        nvlist_destroy(nvl);
@@ -852,7 +936,21 @@ nvmf_detach(device_t dev)
        }
        free(sc->io, M_NVMF);
 
-       taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
+       taskqueue_drain(nvmf_tq, &sc->disconnect_task);
+       if (taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task,
+           NULL) != 0)
+               taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
+
+       /*
+        * Don't cancel/drain the controller loss task if that task
+        * has fired and is triggering the detach.
+        */
+       if (!sc->controller_timedout) {
+               if (taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task,
+                   NULL) != 0)
+                       taskqueue_drain_timeout(nvmf_tq,
+                           &sc->controller_loss_task);
+       }
 
        if (sc->admin != NULL)
                nvmf_destroy_qp(sc->admin);
@@ -1154,14 +1252,25 @@ static struct cdevsw nvmf_cdevsw = {
 static int
 nvmf_modevent(module_t mod, int what, void *arg)
 {
+       int error;
+
        switch (what) {
        case MOD_LOAD:
-               return (nvmf_ctl_load());
+               error = nvmf_ctl_load();
+               if (error != 0)
+                       return (error);
+
+               nvmf_tq = taskqueue_create("nvmf", M_WAITOK | M_ZERO,
+                   taskqueue_thread_enqueue, &nvmf_tq);
+               taskqueue_start_threads(&nvmf_tq, 1, PWAIT, "nvmf taskq");
+               return (0);
        case MOD_QUIESCE:
                return (0);
        case MOD_UNLOAD:
                nvmf_ctl_unload();
                destroy_dev_drain(&nvmf_cdevsw);
+               if (nvmf_tq != NULL)
+                       taskqueue_free(nvmf_tq);
                return (0);
        default:
                return (EOPNOTSUPP);
diff --git a/sys/dev/nvmf/host/nvmf_var.h b/sys/dev/nvmf/host/nvmf_var.h
index e45a31f413a4..606245b3969c 100644
--- a/sys/dev/nvmf/host/nvmf_var.h
+++ b/sys/dev/nvmf/host/nvmf_var.h
@@ -75,9 +75,15 @@ struct nvmf_softc {
        struct callout ka_rx_timer;
        sbintime_t ka_rx_sbt;
 
+       struct timeout_task request_reconnect_task;
+       struct timeout_task controller_loss_task;
+       uint32_t reconnect_delay;
+       uint32_t controller_loss_timeout;
+
        struct sx connection_lock;
        struct task disconnect_task;
        bool detaching;
+       bool controller_timedout;
 
        u_int num_aer;
        struct nvmf_aer *aer;
diff --git a/sys/dev/nvmf/nvmf.h b/sys/dev/nvmf/nvmf.h
index d4e7b1511e9d..9b2b4c1dea40 100644
--- a/sys/dev/nvmf/nvmf.h
+++ b/sys/dev/nvmf/nvmf.h
@@ -26,6 +26,13 @@
 
 #define        NVMF_NN                 (1024)
 
+/*
+ * Default timeouts for Fabrics hosts.  These match values used by
+ * Linux.
+ */
+#define        NVMF_DEFAULT_RECONNECT_DELAY    10
+#define        NVMF_DEFAULT_CONTROLLER_LOSS    600
+
 /*
  * (data, size) is the userspace buffer for a packed nvlist.
  *
@@ -68,6 +75,8 @@ struct nvmf_ioc_nv {
  *
  * number                      trtype
  * number                      kato    (optional)
+ * number                       reconnect_delay (optional)
+ * number                       controller_loss_timeout (optional)
  * qpair handoff nvlist                admin
  * qpair handoff nvlist array  io
  * binary                      cdata   struct nvme_controller_data
@@ -81,6 +90,8 @@ struct nvmf_ioc_nv {
  * string                      hostnqn
  * number                      num_io_queues
  * number                      kato    (optional)
+ * number                       reconnect_delay (optional)
+ * number                       controller_loss_timeout (optional)
  * number                      io_qsize
  * bool                                sq_flow_control
  *

git: 5c59cec2d5e1 - main - nvmf: Auto-reconnect periodically after a disconnect

Reply via email to