Author: jimharris
Date: Tue Mar 26 21:58:38 2013
New Revision: 248767
URL: http://svnweb.freebsd.org/changeset/base/248767

Log:
  Add the ability to internally mark a controller as failed, if it is unable to
  start or reset.  Also add a notifier for NVMe consumers for controller fail
  conditions and plumb this notifier for nvd(4) to destroy the associated
  GEOM disks when a failure occurs.
  
  This requires a bit of work to cover the races when a consumer is sending
  I/O requests to a controller that is transitioning to the failed state.  To
  help cover this condition, add a task to defer completion of I/Os submitted
  to a failed controller, so that the consumer will still always receive its
  completions in a different context than the submission.
  
  Sponsored by: Intel
  Reviewed by:  carl

Modified:
  head/sys/dev/nvd/nvd.c
  head/sys/dev/nvme/nvme.c
  head/sys/dev/nvme/nvme.h
  head/sys/dev/nvme/nvme_ctrlr.c
  head/sys/dev/nvme/nvme_private.h
  head/sys/dev/nvme/nvme_qpair.c

Modified: head/sys/dev/nvd/nvd.c
==============================================================================
--- head/sys/dev/nvd/nvd.c      Tue Mar 26 21:48:41 2013        (r248766)
+++ head/sys/dev/nvd/nvd.c      Tue Mar 26 21:58:38 2013        (r248767)
@@ -49,6 +49,7 @@ static void *nvd_new_disk(struct nvme_na
 static void destroy_geom_disk(struct nvd_disk *ndisk);
 
 static void *nvd_new_controller(struct nvme_controller *ctrlr);
+static void nvd_controller_fail(void *ctrlr);
 
 static int nvd_load(void);
 static void nvd_unload(void);
@@ -118,7 +119,7 @@ nvd_load()
        TAILQ_INIT(&disk_head);
 
        consumer_handle = nvme_register_consumer(nvd_new_disk,
-           nvd_new_controller, NULL);
+           nvd_new_controller, NULL, nvd_controller_fail);
 
        return (consumer_handle != NULL ? 0 : -1);
 }
@@ -351,3 +352,22 @@ destroy_geom_disk(struct nvd_disk *ndisk
 
        mtx_destroy(&ndisk->bioqlock);
 }
+
+static void
+nvd_controller_fail(void *ctrlr_arg)
+{
+       struct nvd_controller   *ctrlr = ctrlr_arg;
+       struct nvd_disk         *disk;
+
+       while (!TAILQ_EMPTY(&ctrlr->disk_head)) {
+               disk = TAILQ_FIRST(&ctrlr->disk_head);
+               TAILQ_REMOVE(&disk_head, disk, global_tailq);
+               TAILQ_REMOVE(&ctrlr->disk_head, disk, ctrlr_tailq);
+               destroy_geom_disk(disk);
+               free(disk, M_NVD);
+       }
+
+       TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
+       free(ctrlr, M_NVD);
+}
+

Modified: head/sys/dev/nvme/nvme.c
==============================================================================
--- head/sys/dev/nvme/nvme.c    Tue Mar 26 21:48:41 2013        (r248766)
+++ head/sys/dev/nvme/nvme.c    Tue Mar 26 21:58:38 2013        (r248767)
@@ -44,6 +44,7 @@ struct nvme_consumer {
        nvme_cons_ns_fn_t       ns_fn;
        nvme_cons_ctrlr_fn_t    ctrlr_fn;
        nvme_cons_async_fn_t    async_fn;
+       nvme_cons_fail_fn_t     fail_fn;
 };
 
 struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
@@ -349,9 +350,23 @@ nvme_notify_async_consumers(struct nvme_
        }
 }
 
+void
+nvme_notify_fail_consumers(struct nvme_controller *ctrlr)
+{
+       struct nvme_consumer    *cons;
+       uint32_t                i;
+
+       for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
+               cons = &nvme_consumer[i];
+               if (cons->id != INVALID_CONSUMER_ID && cons->fail_fn != NULL)
+                       cons->fail_fn(ctrlr->cons_cookie[i]);
+       }
+}
+
 struct nvme_consumer *
 nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn,
-                      nvme_cons_async_fn_t async_fn)
+                      nvme_cons_async_fn_t async_fn,
+                      nvme_cons_fail_fn_t fail_fn)
 {
        int i;
 
@@ -365,6 +380,7 @@ nvme_register_consumer(nvme_cons_ns_fn_t
                        nvme_consumer[i].ns_fn = ns_fn;
                        nvme_consumer[i].ctrlr_fn = ctrlr_fn;
                        nvme_consumer[i].async_fn = async_fn;
+                       nvme_consumer[i].fail_fn = fail_fn;
 
                        nvme_notify_consumer(&nvme_consumer[i]);
                        return (&nvme_consumer[i]);

Modified: head/sys/dev/nvme/nvme.h
==============================================================================
--- head/sys/dev/nvme/nvme.h    Tue Mar 26 21:48:41 2013        (r248766)
+++ head/sys/dev/nvme/nvme.h    Tue Mar 26 21:58:38 2013        (r248767)
@@ -733,6 +733,7 @@ typedef void *(*nvme_cons_ns_fn_t)(struc
 typedef void *(*nvme_cons_ctrlr_fn_t)(struct nvme_controller *);
 typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *,
                                     uint32_t, void *, uint32_t);
+typedef void (*nvme_cons_fail_fn_t)(void *);
 
 enum nvme_namespace_flags {
        NVME_NS_DEALLOCATE_SUPPORTED    = 0x1,
@@ -769,7 +770,8 @@ int nvme_ns_cmd_flush(struct nvme_namesp
 /* Registration functions */
 struct nvme_consumer * nvme_register_consumer(nvme_cons_ns_fn_t    ns_fn,
                                               nvme_cons_ctrlr_fn_t ctrlr_fn,
-                                              nvme_cons_async_fn_t async_fn);
+                                              nvme_cons_async_fn_t async_fn,
+                                              nvme_cons_fail_fn_t  fail_fn);
 void           nvme_unregister_consumer(struct nvme_consumer *consumer);
 
 /* Controller helper functions */

Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c      Tue Mar 26 21:48:41 2013        
(r248766)
+++ head/sys/dev/nvme/nvme_ctrlr.c      Tue Mar 26 21:58:38 2013        
(r248767)
@@ -311,6 +311,45 @@ nvme_ctrlr_construct_io_qpairs(struct nv
        return (0);
 }
 
+static void
+nvme_ctrlr_fail(struct nvme_controller *ctrlr)
+{
+       int i;
+
+       ctrlr->is_failed = TRUE;
+       nvme_qpair_fail(&ctrlr->adminq);
+       for (i = 0; i < ctrlr->num_io_queues; i++)
+               nvme_qpair_fail(&ctrlr->ioq[i]);
+       nvme_notify_fail_consumers(ctrlr);
+}
+
+void
+nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr,
+    struct nvme_request *req)
+{
+
+       mtx_lock(&ctrlr->fail_req_lock);
+       STAILQ_INSERT_TAIL(&ctrlr->fail_req, req, stailq);
+       mtx_unlock(&ctrlr->fail_req_lock);
+       taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->fail_req_task);
+}
+
+static void
+nvme_ctrlr_fail_req_task(void *arg, int pending)
+{
+       struct nvme_controller  *ctrlr = arg;
+       struct nvme_request     *req;
+
+       mtx_lock(&ctrlr->fail_req_lock);
+       while (!STAILQ_EMPTY(&ctrlr->fail_req)) {
+               req = STAILQ_FIRST(&ctrlr->fail_req);
+               STAILQ_REMOVE_HEAD(&ctrlr->fail_req, stailq);
+               nvme_qpair_manual_complete_request(req->qpair, req,
+                   NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE);
+       }
+       mtx_unlock(&ctrlr->fail_req_lock);
+}
+
 static int
 nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr)
 {
@@ -426,8 +465,12 @@ nvme_ctrlr_reset(struct nvme_controller 
 
        cmpset = atomic_cmpset_32(&ctrlr->is_resetting, 0, 1);
 
-       if (cmpset == 0)
-               /* Controller is already resetting. */
+       if (cmpset == 0 || ctrlr->is_failed)
+               /*
+                * Controller is already resetting or has failed.  Return
+                *  immediately since there is no need to kick off another
+                *  reset in these cases.
+                */
                return;
 
        taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
@@ -745,17 +788,25 @@ nvme_ctrlr_start(void *ctrlr_arg)
 
        nvme_admin_qpair_enable(&ctrlr->adminq);
 
-       if (nvme_ctrlr_identify(ctrlr) != 0)
+       if (nvme_ctrlr_identify(ctrlr) != 0) {
+               nvme_ctrlr_fail(ctrlr);
                return;
+       }
 
-       if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0)
+       if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
+               nvme_ctrlr_fail(ctrlr);
                return;
+       }
 
-       if (nvme_ctrlr_create_qpairs(ctrlr) != 0)
+       if (nvme_ctrlr_create_qpairs(ctrlr) != 0) {
+               nvme_ctrlr_fail(ctrlr);
                return;
+       }
 
-       if (nvme_ctrlr_construct_namespaces(ctrlr) != 0)
+       if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) {
+               nvme_ctrlr_fail(ctrlr);
                return;
+       }
 
        nvme_ctrlr_configure_aer(ctrlr);
        nvme_ctrlr_configure_int_coalescing(ctrlr);
@@ -802,6 +853,8 @@ nvme_ctrlr_reset_task(void *arg, int pen
        pause("nvmereset", hz / 10);
        if (status == 0)
                nvme_ctrlr_start(ctrlr);
+       else
+               nvme_ctrlr_fail(ctrlr);
 
        atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
 }
@@ -998,12 +1051,18 @@ intx:
 
        ctrlr->cdev->si_drv1 = (void *)ctrlr;
 
-       TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
        ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
            taskqueue_thread_enqueue, &ctrlr->taskqueue);
        taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
 
        ctrlr->is_resetting = 0;
+       TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
+
+       TASK_INIT(&ctrlr->fail_req_task, 0, nvme_ctrlr_fail_req_task, ctrlr);
+       mtx_init(&ctrlr->fail_req_lock, "nvme ctrlr fail req lock", NULL,
+           MTX_DEF);
+       STAILQ_INIT(&ctrlr->fail_req);
+       ctrlr->is_failed = FALSE;
 
        return (0);
 }

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h    Tue Mar 26 21:48:41 2013        
(r248766)
+++ head/sys/dev/nvme/nvme_private.h    Tue Mar 26 21:58:38 2013        
(r248767)
@@ -85,7 +85,7 @@ MALLOC_DECLARE(M_NVME);
  */
 #define NVME_IO_ENTRIES                (256)
 #define NVME_IO_TRACKERS       (128)
-#define NVME_MIN_IO_TRACKERS   (16)
+#define NVME_MIN_IO_TRACKERS   (4)
 #define NVME_MAX_IO_TRACKERS   (1024)
 
 /*
@@ -119,6 +119,7 @@ extern int32_t              nvme_retry_count;
 struct nvme_request {
 
        struct nvme_command             cmd;
+       struct nvme_qpair               *qpair;
        void                            *payload;
        uint32_t                        payload_size;
        boolean_t                       timeout;
@@ -250,7 +251,9 @@ struct nvme_controller {
        struct intr_config_hook config_hook;
        uint32_t                ns_identified;
        uint32_t                queues_created;
+
        struct task             reset_task;
+       struct task             fail_req_task;
        struct taskqueue        *taskqueue;
 
        /* For shared legacy interrupt. */
@@ -293,6 +296,10 @@ struct nvme_controller {
 
        uint32_t                is_resetting;
 
+       struct mtx                      fail_req_lock;
+       boolean_t                       is_failed;
+       STAILQ_HEAD(, nvme_request)     fail_req;
+
 #ifdef CHATHAM2
        uint64_t                chatham_size;
        uint64_t                chatham_lbas;
@@ -403,6 +410,8 @@ void        nvme_ctrlr_submit_admin_request(str
                                        struct nvme_request *req);
 void   nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
                                     struct nvme_request *req);
+void   nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr,
+                                      struct nvme_request *req);
 
 void   nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
                             uint16_t vector, uint32_t num_entries,
@@ -414,6 +423,11 @@ void       nvme_qpair_process_completions(stru
 void   nvme_qpair_submit_request(struct nvme_qpair *qpair,
                                  struct nvme_request *req);
 void   nvme_qpair_reset(struct nvme_qpair *qpair);
+void   nvme_qpair_fail(struct nvme_qpair *qpair);
+void   nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
+                                          struct nvme_request *req,
+                                          uint32_t sct, uint32_t sc,
+                                          boolean_t print_on_error);
 
 void   nvme_admin_qpair_enable(struct nvme_qpair *qpair);
 void   nvme_admin_qpair_disable(struct nvme_qpair *qpair);
@@ -484,5 +498,6 @@ void        nvme_notify_async_consumers(struct 
                                    const struct nvme_completion *async_cpl,
                                    uint32_t log_page_id, void *log_page_buffer,
                                    uint32_t log_page_size);
+void   nvme_notify_fail_consumers(struct nvme_controller *ctrlr);
 
 #endif /* __NVME_PRIVATE_H__ */

Modified: head/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- head/sys/dev/nvme/nvme_qpair.c      Tue Mar 26 21:48:41 2013        
(r248766)
+++ head/sys/dev/nvme/nvme_qpair.c      Tue Mar 26 21:58:38 2013        
(r248767)
@@ -50,7 +50,6 @@ nvme_completion_is_retry(const struct nv
        case NVME_SCT_GENERIC:
                switch (cpl->status.sc) {
                case NVME_SC_ABORTED_BY_REQUEST:
-                       return (1);
                case NVME_SC_NAMESPACE_NOT_READY:
                        if (cpl->status.dnr)
                                return (0);
@@ -155,7 +154,7 @@ nvme_qpair_complete_tracker(struct nvme_
 
 static void
 nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair,
-    struct nvme_tracker *tr, uint32_t sct, uint32_t sc,
+    struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
     boolean_t print_on_error)
 {
        struct nvme_completion  cpl;
@@ -165,10 +164,37 @@ nvme_qpair_manual_complete_tracker(struc
        cpl.cid = tr->cid;
        cpl.status.sct = sct;
        cpl.status.sc = sc;
+       cpl.status.dnr = dnr;
        nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
 }
 
 void
+nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
+    struct nvme_request *req, uint32_t sct, uint32_t sc,
+    boolean_t print_on_error)
+{
+       struct nvme_completion  cpl;
+       boolean_t               error;
+
+       memset(&cpl, 0, sizeof(cpl));
+       cpl.sqid = qpair->id;
+       cpl.status.sct = sct;
+       cpl.status.sc = sc;
+
+       error = nvme_completion_is_error(&cpl);
+
+       if (error && print_on_error) {
+               nvme_dump_completion(&cpl);
+               nvme_dump_command(&req->cmd);
+       }
+
+       if (req->cb_fn)
+               req->cb_fn(req->cb_arg, &cpl);
+
+       nvme_free_request(req);
+}
+
+void
 nvme_qpair_process_completions(struct nvme_qpair *qpair)
 {
        struct nvme_tracker     *tr;
@@ -363,7 +389,7 @@ nvme_admin_qpair_abort_aers(struct nvme_
        while (tr != NULL) {
                if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) {
                        nvme_qpair_manual_complete_tracker(qpair, tr,
-                           NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION,
+                           NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 0,
                            FALSE);
                        tr = TAILQ_FIRST(&qpair->outstanding_tr);
                } else {
@@ -406,7 +432,7 @@ nvme_abort_complete(void *arg, const str
                 */
                printf("abort command failed, aborting command manually\n");
                nvme_qpair_manual_complete_tracker(tr->qpair, tr,
-                   NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE);
+                   NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 0, TRUE);
        }
 }
 
@@ -476,17 +502,32 @@ _nvme_qpair_submit_request(struct nvme_q
        mtx_assert(&qpair->lock, MA_OWNED);
 
        tr = TAILQ_FIRST(&qpair->free_tr);
+       req->qpair = qpair;
 
        if (tr == NULL || !qpair->is_enabled) {
                /*
                 * No tracker is available, or the qpair is disabled due to
-                *  an in-progress controller-level reset.
-                *
-                * Put the request on the qpair's request queue to be processed
-                *  when a tracker frees up via a command completion or when
-                *  the controller reset is completed.
+                *  an in-progress controller-level reset or controller
+                *  failure.
                 */
-               STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
+
+               if (qpair->ctrlr->is_failed) {
+                       /*
+                        * The controller has failed.  Post the request to a
+                        *  task where it will be aborted, so that we do not
+                        *  invoke the request's callback in the context
+                        *  of the submission.
+                        */
+                       nvme_ctrlr_post_failed_request(qpair->ctrlr, req);
+               } else {
+                       /*
+                        * Put the request on the qpair's request queue to be
+                        *  processed when a tracker frees up via a command
+                        *  completion or when the controller reset is
+                        *  completed.
+                        */
+                       STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
+               }
                return;
        }
 
@@ -574,7 +615,7 @@ nvme_io_qpair_enable(struct nvme_qpair *
                device_printf(qpair->ctrlr->dev,
                    "aborting outstanding i/o\n");
                nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
-                   NVME_SC_ABORTED_BY_REQUEST, TRUE);
+                   NVME_SC_ABORTED_BY_REQUEST, 0, TRUE);
        }
 
        mtx_lock(&qpair->lock);
@@ -622,3 +663,41 @@ nvme_io_qpair_disable(struct nvme_qpair 
 
        nvme_qpair_disable(qpair);
 }
+
+void
+nvme_qpair_fail(struct nvme_qpair *qpair)
+{
+       struct nvme_tracker             *tr;
+       struct nvme_request             *req;
+
+       mtx_lock(&qpair->lock);
+
+       while (!STAILQ_EMPTY(&qpair->queued_req)) {
+               req = STAILQ_FIRST(&qpair->queued_req);
+               STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
+               device_printf(qpair->ctrlr->dev,
+                   "failing queued i/o\n");
+               mtx_unlock(&qpair->lock);
+               nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC,
+                   NVME_SC_ABORTED_BY_REQUEST, TRUE);
+               mtx_lock(&qpair->lock);
+       }
+
+       /* Manually abort each outstanding I/O. */
+       while (!TAILQ_EMPTY(&qpair->outstanding_tr)) {
+               tr = TAILQ_FIRST(&qpair->outstanding_tr);
+               /*
+                * Do not remove the tracker.  The abort_tracker path will
+                *  do that for us.
+                */
+               device_printf(qpair->ctrlr->dev,
+                   "failing outstanding i/o\n");
+               mtx_unlock(&qpair->lock);
+               nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
+                   NVME_SC_ABORTED_BY_REQUEST, 1 /* do not retry */, TRUE);
+               mtx_lock(&qpair->lock);
+       }
+
+       mtx_unlock(&qpair->lock);
+}
+
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to