Author: jimharris
Date: Tue Mar 26 20:32:57 2013
New Revision: 248754
URL: http://svnweb.freebsd.org/changeset/base/248754

Log:
  By default, always escalate to controller reset when an I/O times out.
  
  While aborts are typically cleaner than a full controller reset, many times
  an I/O timeout indicates other controller-level issues where aborts may not
  work.  NVMe drivers for other operating systems are also defaulting to
  controller reset rather than aborts for timed out I/O.
  
  Sponsored by: Intel
  Reviewed by:  carl

Modified:
  head/sys/dev/nvme/nvme_ctrlr.c
  head/sys/dev/nvme/nvme_private.h
  head/sys/dev/nvme/nvme_qpair.c

Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c      Tue Mar 26 20:32:46 2013        
(r248753)
+++ head/sys/dev/nvme/nvme_ctrlr.c      Tue Mar 26 20:32:57 2013        
(r248754)
@@ -422,12 +422,8 @@ nvme_ctrlr_hw_reset(struct nvme_controll
 void
 nvme_ctrlr_reset(struct nvme_controller *ctrlr)
 {
-       int status;
 
-       status = nvme_ctrlr_hw_reset(ctrlr);
-       DELAY(100*1000);
-       if (status == 0)
-               taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->restart_task);
+       taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
 }
 
 static int
@@ -686,11 +682,24 @@ err:
 }
 
 static void
-nvme_ctrlr_restart_task(void *arg, int pending)
+nvme_ctrlr_reset_task(void *arg, int pending)
 {
-       struct nvme_controller *ctrlr = arg;
+       struct nvme_controller  *ctrlr = arg;
+       int                     status;
 
-       nvme_ctrlr_start(ctrlr);
+       device_printf(ctrlr->dev, "resetting controller");
+       status = nvme_ctrlr_hw_reset(ctrlr);
+       /*
+        * Use pause instead of DELAY, so that we yield to any nvme interrupt
+        *  handlers on this CPU that were blocked on a qpair lock. We want
+        *  all nvme interrupts completed before proceeding with restarting the
+        *  controller.
+        *
+        * XXX - any way to guarantee the interrupt handlers have quiesced?
+        */
+       pause("nvmereset", hz / 10);
+       if (status == 0)
+               nvme_ctrlr_start(ctrlr);
 }
 
 static void
@@ -841,6 +850,9 @@ nvme_ctrlr_construct(struct nvme_control
        ctrlr->force_intx = 0;
        TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
 
+       ctrlr->enable_aborts = 0;
+       TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
+
        ctrlr->msix_enabled = 1;
 
        if (ctrlr->force_intx) {
@@ -879,7 +891,7 @@ intx:
 
        ctrlr->cdev->si_drv1 = (void *)ctrlr;
 
-       TASK_INIT(&ctrlr->restart_task, 0, nvme_ctrlr_restart_task, ctrlr);
+       TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
        ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
            taskqueue_thread_enqueue, &ctrlr->taskqueue);
        taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h    Tue Mar 26 20:32:46 2013        
(r248753)
+++ head/sys/dev/nvme/nvme_private.h    Tue Mar 26 20:32:57 2013        
(r248754)
@@ -230,6 +230,7 @@ struct nvme_controller {
 
        uint32_t                msix_enabled;
        uint32_t                force_intx;
+       uint32_t                enable_aborts;
 
        uint32_t                num_io_queues;
        boolean_t               per_cpu_io_queues;
@@ -239,7 +240,7 @@ struct nvme_controller {
        uint32_t                ns_identified;
        uint32_t                queues_created;
        uint32_t                num_start_attempts;
-       struct task             restart_task;
+       struct task             reset_task;
        struct taskqueue        *taskqueue;
 
        /* For shared legacy interrupt. */

Modified: head/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- head/sys/dev/nvme/nvme_qpair.c      Tue Mar 26 20:32:46 2013        
(r248753)
+++ head/sys/dev/nvme/nvme_qpair.c      Tue Mar 26 20:32:57 2013        
(r248754)
@@ -460,21 +460,20 @@ nvme_timeout(void *arg)
        struct nvme_controller  *ctrlr = qpair->ctrlr;
        union csts_register     csts;
 
+       /* Read csts to get value of cfs - controller fatal status. */
        csts.raw = nvme_mmio_read_4(ctrlr, csts);
-       if (csts.bits.cfs == 1) {
+       device_printf(ctrlr->dev, "i/o timeout, csts.cfs=%d\n", csts.bits.cfs);
+       nvme_dump_command(&tr->req->cmd);
+
+       if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
                /*
-                * The controller is reporting fatal status.  Don't bother
-                *  trying to abort the timed out command - proceed
-                *  immediately to a controller-level reset.
+                * If aborts are enabled, only use them if the controller is
+                *  not reporting fatal status.
                 */
-               device_printf(ctrlr->dev,
-                   "controller reports fatal status, resetting...\n");
+               nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
+                   nvme_abort_complete, tr);
+       } else
                nvme_ctrlr_reset(ctrlr);
-               return;
-       }
-
-       nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
-           nvme_abort_complete, tr);
 }
 
 void
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to