Author: jimharris Date: Tue Mar 26 20:32:57 2013 New Revision: 248754 URL: http://svnweb.freebsd.org/changeset/base/248754
Log: By default, always escalate to controller reset when an I/O times out. While aborts are typically cleaner than a full controller reset, many times an I/O timeout indicates other controller-level issues where aborts may not work. NVMe drivers for other operating systems are also defaulting to controller reset rather than aborts for timed out I/O. Sponsored by: Intel Reviewed by: carl Modified: head/sys/dev/nvme/nvme_ctrlr.c head/sys/dev/nvme/nvme_private.h head/sys/dev/nvme/nvme_qpair.c Modified: head/sys/dev/nvme/nvme_ctrlr.c ============================================================================== --- head/sys/dev/nvme/nvme_ctrlr.c Tue Mar 26 20:32:46 2013 (r248753) +++ head/sys/dev/nvme/nvme_ctrlr.c Tue Mar 26 20:32:57 2013 (r248754) @@ -422,12 +422,8 @@ nvme_ctrlr_hw_reset(struct nvme_controll void nvme_ctrlr_reset(struct nvme_controller *ctrlr) { - int status; - status = nvme_ctrlr_hw_reset(ctrlr); - DELAY(100*1000); - if (status == 0) - taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->restart_task); + taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task); } static int @@ -686,11 +682,24 @@ err: } static void -nvme_ctrlr_restart_task(void *arg, int pending) +nvme_ctrlr_reset_task(void *arg, int pending) { - struct nvme_controller *ctrlr = arg; + struct nvme_controller *ctrlr = arg; + int status; - nvme_ctrlr_start(ctrlr); + device_printf(ctrlr->dev, "resetting controller"); + status = nvme_ctrlr_hw_reset(ctrlr); + /* + * Use pause instead of DELAY, so that we yield to any nvme interrupt + * handlers on this CPU that were blocked on a qpair lock. We want + * all nvme interrupts completed before proceeding with restarting the + * controller. + * + * XXX - any way to guarantee the interrupt handlers have quiesced? + */ + pause("nvmereset", hz / 10); + if (status == 0) + nvme_ctrlr_start(ctrlr); } static void @@ -841,6 +850,9 @@ nvme_ctrlr_construct(struct nvme_control ctrlr->force_intx = 0; TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx); + ctrlr->enable_aborts = 0; + TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts); + ctrlr->msix_enabled = 1; if (ctrlr->force_intx) { @@ -879,7 +891,7 @@ intx: ctrlr->cdev->si_drv1 = (void *)ctrlr; - TASK_INIT(&ctrlr->restart_task, 0, nvme_ctrlr_restart_task, ctrlr); + TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr); ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK, taskqueue_thread_enqueue, &ctrlr->taskqueue); taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq"); Modified: head/sys/dev/nvme/nvme_private.h ============================================================================== --- head/sys/dev/nvme/nvme_private.h Tue Mar 26 20:32:46 2013 (r248753) +++ head/sys/dev/nvme/nvme_private.h Tue Mar 26 20:32:57 2013 (r248754) @@ -230,6 +230,7 @@ struct nvme_controller { uint32_t msix_enabled; uint32_t force_intx; + uint32_t enable_aborts; uint32_t num_io_queues; boolean_t per_cpu_io_queues; @@ -239,7 +240,7 @@ struct nvme_controller { uint32_t ns_identified; uint32_t queues_created; uint32_t num_start_attempts; - struct task restart_task; + struct task reset_task; struct taskqueue *taskqueue; /* For shared legacy interrupt. */ Modified: head/sys/dev/nvme/nvme_qpair.c ============================================================================== --- head/sys/dev/nvme/nvme_qpair.c Tue Mar 26 20:32:46 2013 (r248753) +++ head/sys/dev/nvme/nvme_qpair.c Tue Mar 26 20:32:57 2013 (r248754) @@ -460,21 +460,20 @@ nvme_timeout(void *arg) struct nvme_controller *ctrlr = qpair->ctrlr; union csts_register csts; + /* Read csts to get value of cfs - controller fatal status. */ csts.raw = nvme_mmio_read_4(ctrlr, csts); - if (csts.bits.cfs == 1) { + device_printf(ctrlr->dev, "i/o timeout, csts.cfs=%d\n", csts.bits.cfs); + nvme_dump_command(&tr->req->cmd); + + if (ctrlr->enable_aborts && csts.bits.cfs == 0) { /* - * The controller is reporting fatal status. Don't bother - * trying to abort the timed out command - proceed - * immediately to a controller-level reset. + * If aborts are enabled, only use them if the controller is + * not reporting fatal status. */ - device_printf(ctrlr->dev, - "controller reports fatal status, resetting...\n"); + nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id, + nvme_abort_complete, tr); + } else nvme_ctrlr_reset(ctrlr); - return; - } - - nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id, - nvme_abort_complete, tr); } void _______________________________________________ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"