[PATCH] scsi: Do not call do_div() with a 64-bit divisor
do_div() is meant for divisions of 64-bit number by 32-bit numbers. Passing 64-bit divisor types caused issues in the past on 32-bit platforms, cfr. commit ea077b1b96e073eac5c3c5590529e964767fc5f7 ("m68k: Truncate base in do_div()"). As scsi_device.sector_size is unsigned (int), factor should be unsigned int, too. Signed-off-by: Geert Uytterhoeven --- drivers/scsi/sd.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5693f6d7eddb..d6645c70cceb 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1607,7 +1607,7 @@ static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) end_lba <<= 1; } else { /* be careful ... don't want any overflows */ - u64 factor = scmd->device->sector_size / 512; + unsigned int factor = scmd->device->sector_size / 512; do_div(start_lba, factor); do_div(end_lba, factor); } -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 11/11] pm80xx : gpio feature support for motherboard controllers
Hi Jack, We wanted to control the GPIO in the HBA only. Bsg interface gets created only for enclosure or expander. For our HBA, bsg interface will not be created since it does not have an enclosure target inside. That's why we wanted to use IOCTL. Please advise. Best Regards, Viswas G -Original Message- From: Jack Wang [mailto:xjtu...@gmail.com] Sent: Tuesday, October 29, 2013 3:49 PM To: Viswas G Cc: linux-scsi@vger.kernel.org; Sangeetha Gnanasekaran; Nikith Ganigarakoppal; Anand Kumar Santhanam Subject: Re: [PATCH 11/11] pm80xx : gpio feature support for motherboard controllers Hi Viswas, As ioctl interface is not welcome for new feature, that's why we removed ioctl interface when pm8001 accepted into mainline. I suggest you use bsg interface for this, see sas_host_smp.c for details. Regards, Jack On 10/22/2013 02:20 PM, Viswas G wrote: > > Signed-off-by: Viswas G > --- > drivers/scsi/pm8001/pm8001_ctl.c | 248 > - > drivers/scsi/pm8001/pm8001_ctl.h | 55 > drivers/scsi/pm8001/pm8001_init.c | 37 ++ > drivers/scsi/pm8001/pm8001_sas.h | 30 + > drivers/scsi/pm8001/pm80xx_hwi.c | 106 > drivers/scsi/pm8001/pm80xx_hwi.h | 49 +++ > 6 files changed, 524 insertions(+), 1 deletions(-) > > diff --git a/drivers/scsi/pm8001/pm8001_ctl.c > b/drivers/scsi/pm8001/pm8001_ctl.c > index 247cb1c..3c9ca21 100644 > --- a/drivers/scsi/pm8001/pm8001_ctl.c > +++ b/drivers/scsi/pm8001/pm8001_ctl.c > @@ -40,7 +40,8 @@ > #include > #include > #include "pm8001_sas.h" > -#include "pm8001_ctl.h" > + > +int pm8001_major = -1; > > /* scsi host attributes */ > > @@ -759,3 +760,248 @@ struct device_attribute *pm8001_host_attrs[] = { > NULL, > }; > > +/** > + * pm8001_open - open the configuration file > + * @inode: inode being opened > + * @file: file handle attached > + * > + * Called when the configuration device is opened. Does the needed > + * set up on the handle and then returns > + * > + */ > +static int pm8001_open(struct inode *inode, struct file *file) { > + struct pm8001_hba_info *pm8001_ha; > + unsigned minor_number = iminor(inode); > + int err = -ENODEV; > + > + list_for_each_entry(pm8001_ha, &hba_list, list) { > + if (pm8001_ha->id == minor_number) { > + file->private_data = pm8001_ha; > + err = 0; > + break; > + } > + } > + > + return err; > +} > + > +/** > + * pm8001_close - close the configuration file > + * @inode: inode being opened > + * @file: file handle attached > + * > + * Called when the configuration device is closed. Does the needed > + * set up on the handle and then returns > + * > + */ > +static int pm8001_close(struct inode *inode, struct file *file) { > + return 0; > +} > + > +static long pm8001_info_ioctl(struct pm8001_hba_info *pm8001_ha, > + unsigned long arg) { > + u32 ret = 0; > + struct ioctl_info_buffer info_buf; > + union main_cfg_table main_tbl = pm8001_ha->main_cfg_tbl; > + > + strcpy(info_buf.information.sz_name, DRV_NAME); > + > + info_buf.information.usmajor_revision = DRV_MAJOR; > + info_buf.information.usminor_revision = DRV_MINOR; > + info_buf.information.usbuild_revision = DRV_BUILD; > + if (pm8001_ha->chip_id == chip_8001) { > + info_buf.information.maxoutstandingIO = > + main_tbl.pm8001_tbl.max_out_io; > + info_buf.information.maxdevices = > + (main_tbl.pm8001_tbl.max_sgl >> 16) & 0x; > + } else { > + info_buf.information.maxoutstandingIO = > + main_tbl.pm80xx_tbl.max_out_io; > + info_buf.information.maxdevices = > + (main_tbl.pm80xx_tbl.max_sgl >> 16) & 0x; > + } > + info_buf.header.return_code = ADPT_IOCTL_CALL_SUCCESS; > + > + if (copy_to_user((void *)arg, (void *)&info_buf, > +sizeof(struct ioctl_info_buffer))) { > + ret = ADPT_IOCTL_CALL_FAILED; > + } > + > + return ret; > +} > + > +static long pm8001_gpio_ioctl(struct pm8001_hba_info *pm8001_ha, > + unsigned long arg) { > + struct gpio_buffer buffer; > + struct pm8001_gpio *payload; > + struct gpio_ioctl_resp *gpio_resp; > + DECLARE_COMPLETION_ONSTACK(completion); > + unsigned long timeout; > + u32 ret = 0, operation; > + > + mutex_lock(&pm8001_ha->ioctl_mutex); > + > + if (copy_from_user(&buffer, (struct gpio_buffer *)arg, > + sizeof(struct gpio_buffer))) { > + ret = ADPT_IOCTL_CALL_FAILED; > + goto exit; > + } > + > + pm8001_ha->ioctl_completion = &completion; > + payload = &buffer.gpio_payload; > + operation = payload->oper
Re: [PATCH 11/11] pm80xx : gpio feature support for motherboard controllers
On 11/04/2013 11:13 AM, Viswas G wrote: > Hi Jack, > > We wanted to control the GPIO in the HBA only. Bsg interface gets created > only for enclosure or expander. > > For our HBA, bsg interface will not be created since it does not have an > enclosure target inside. That's why we wanted to use IOCTL. Please advise. > > Best Regards, > Viswas G > Hi Viswas, No, bsg interface also support HBA. Here is two example output from LSI mpt2sas: smp_rep_manufacturer /dev/bsg/sas_host0 Report manufacturer response: SAS-1.1 format: 0 vendor identification: LSI product identification: Virtual SMP Port product revision level: smp_read_gpio /dev/bsg/sas_host0 Read GPIO register response: GPIO_CFG[0]: version: 0 GPIO enable: 1 cfg register count: 2 gp register count: 1 supported drive count: 16 Regards, Jack > -Original Message- > From: Jack Wang [mailto:xjtu...@gmail.com] > Sent: Tuesday, October 29, 2013 3:49 PM > To: Viswas G > Cc: linux-scsi@vger.kernel.org; Sangeetha Gnanasekaran; Nikith > Ganigarakoppal; Anand Kumar Santhanam > Subject: Re: [PATCH 11/11] pm80xx : gpio feature support for motherboard > controllers > > Hi Viswas, > > As ioctl interface is not welcome for new feature, that's why we removed > ioctl interface when pm8001 accepted into mainline. > > I suggest you use bsg interface for this, see sas_host_smp.c for details. > > Regards, > Jack > > On 10/22/2013 02:20 PM, Viswas G wrote: >> >> Signed-off-by: Viswas G >> --- >> drivers/scsi/pm8001/pm8001_ctl.c | 248 >> - >> drivers/scsi/pm8001/pm8001_ctl.h | 55 >> drivers/scsi/pm8001/pm8001_init.c | 37 ++ >> drivers/scsi/pm8001/pm8001_sas.h | 30 + >> drivers/scsi/pm8001/pm80xx_hwi.c | 106 >> drivers/scsi/pm8001/pm80xx_hwi.h | 49 +++ >> 6 files changed, 524 insertions(+), 1 deletions(-) >> >> diff --git a/drivers/scsi/pm8001/pm8001_ctl.c >> b/drivers/scsi/pm8001/pm8001_ctl.c >> index 247cb1c..3c9ca21 100644 >> --- a/drivers/scsi/pm8001/pm8001_ctl.c >> +++ b/drivers/scsi/pm8001/pm8001_ctl.c >> @@ -40,7 +40,8 @@ >> #include >> #include >> #include "pm8001_sas.h" >> -#include "pm8001_ctl.h" >> + >> +int pm8001_major = -1; >> >> /* scsi host attributes */ >> >> @@ -759,3 +760,248 @@ struct device_attribute *pm8001_host_attrs[] = { >> NULL, >> }; >> >> +/** >> + * pm8001_open - open the configuration file >> + * @inode: inode being opened >> + * @file: file handle attached >> + * >> + * Called when the configuration device is opened. Does the needed >> + * set up on the handle and then returns >> + * >> + */ >> +static int pm8001_open(struct inode *inode, struct file *file) { >> + struct pm8001_hba_info *pm8001_ha; >> + unsigned minor_number = iminor(inode); >> + int err = -ENODEV; >> + >> + list_for_each_entry(pm8001_ha, &hba_list, list) { >> + if (pm8001_ha->id == minor_number) { >> + file->private_data = pm8001_ha; >> + err = 0; >> + break; >> + } >> + } >> + >> + return err; >> +} >> + >> +/** >> + * pm8001_close - close the configuration file >> + * @inode: inode being opened >> + * @file: file handle attached >> + * >> + * Called when the configuration device is closed. Does the needed >> + * set up on the handle and then returns >> + * >> + */ >> +static int pm8001_close(struct inode *inode, struct file *file) { >> + return 0; >> +} >> + >> +static long pm8001_info_ioctl(struct pm8001_hba_info *pm8001_ha, >> + unsigned long arg) { >> + u32 ret = 0; >> + struct ioctl_info_buffer info_buf; >> + union main_cfg_table main_tbl = pm8001_ha->main_cfg_tbl; >> + >> + strcpy(info_buf.information.sz_name, DRV_NAME); >> + >> + info_buf.information.usmajor_revision = DRV_MAJOR; >> + info_buf.information.usminor_revision = DRV_MINOR; >> + info_buf.information.usbuild_revision = DRV_BUILD; >> + if (pm8001_ha->chip_id == chip_8001) { >> + info_buf.information.maxoutstandingIO = >> + main_tbl.pm8001_tbl.max_out_io; >> + info_buf.information.maxdevices = >> + (main_tbl.pm8001_tbl.max_sgl >> 16) & 0x; >> + } else { >> + info_buf.information.maxoutstandingIO = >> + main_tbl.pm80xx_tbl.max_out_io; >> + info_buf.information.maxdevices = >> + (main_tbl.pm80xx_tbl.max_sgl >> 16) & 0x; >> + } >> + info_buf.header.return_code = ADPT_IOCTL_CALL_SUCCESS; >> + >> + if (copy_to_user((void *)arg, (void *)&info_buf, >> +sizeof(struct ioctl_info_buffer))) { >> + ret = ADPT_IOCTL_CALL_FAILED; >> + } >> + >> + return ret; >> +} >> + >> +static long pm8001_gpio_ioctl(struct pm8001_hba_info *pm800
Re: [PATCH 2/3] scsi: improved eh timeout handler
On 10/31/2013 04:49 PM, Christoph Hellwig wrote: > Looks reasonable to me, but a few minor nitpicks: > >> +spin_lock_irqsave(sdev->host->host_lock, flags); >> +if (scsi_host_eh_past_deadline(sdev->host)) { > > I don't have the implementation of scsi_host_eh_past_deadline in my > local tree, but do we really need the host lock for it? > Yes. The eh_deadline variable might be set from an interrupt context or from userland, so we need to protect access to it. >> +int >> +scsi_abort_command(struct scsi_cmnd *scmd) > > Seems like this should be static and not exported in the current version > of the code? > Yep. Will be doing so. Cheers, Hannes -- Dr. Hannes Reinecke zSeries & Storage h...@suse.de +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/4] scsi: Set the minimum valid value of 'eh_deadline' as 0
From: Ren Mingxin The former minimum valid value of 'eh_deadline' is 1s, which means the earliest occasion to shorten EH is 1 second later since a command is failed or timed out. But if we want to skip EH steps ASAP, we have to wait until the first EH step is finished. If the duration of the first EH step is long, this waiting time is excruciating. So, it is necessary to accept 0 as the minimum valid value for 'eh_deadline'. According to my test, with Hannes' patchset 'New EH command timeout handler' as well, the minimum IO time is improved from 73s (eh_deadline = 1) to 43s(eh_deadline = 0) when commands are timed out by disabling RSCN and target port. Another thing: scsi_finish_command() should be invoked if scsi_eh_scmd_add() is returned on failure - let EH finish those commands. Signed-off-by: Ren Mingxin Signed-off-by: Hannes Reinecke --- drivers/scsi/hosts.c | 16 drivers/scsi/scsi_error.c | 40 +++- drivers/scsi/scsi_sysfs.c | 36 +--- 3 files changed, 64 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index f334859..81529ba 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -316,11 +316,11 @@ static void scsi_host_dev_release(struct device *dev) kfree(shost); } -static unsigned int shost_eh_deadline; +static int shost_eh_deadline = -1; -module_param_named(eh_deadline, shost_eh_deadline, uint, S_IRUGO|S_IWUSR); +module_param_named(eh_deadline, shost_eh_deadline, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(eh_deadline, -"SCSI EH timeout in seconds (should be between 1 and 2^32-1)"); +"SCSI EH timeout in seconds (should be between 0 and 2^31-1)"); static struct device_type scsi_host_type = { .name = "scsi_host", @@ -394,7 +394,15 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->unchecked_isa_dma = sht->unchecked_isa_dma; shost->use_clustering = sht->use_clustering; shost->ordered_tag = sht->ordered_tag; - shost->eh_deadline = shost_eh_deadline * HZ; + if (shost_eh_deadline == -1) + shost->eh_deadline = -1; + else if ((ulong) shost_eh_deadline * HZ > INT_MAX) { + printk(KERN_WARNING "scsi%d: eh_deadline %u exceeds the " + "maximum, setting to %u\n", + shost->host_no, shost_eh_deadline, INT_MAX / HZ); + shost->eh_deadline = INT_MAX; + } else + shost->eh_deadline = shost_eh_deadline * HZ; if (sht->supported_mode == MODE_UNKNOWN) /* means we didn't set it ... default to INITIATOR */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 6a137fa..2487293 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -91,7 +91,7 @@ EXPORT_SYMBOL_GPL(scsi_schedule_eh); static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) { - if (!shost->last_reset || !shost->eh_deadline) + if (!shost->last_reset || shost->eh_deadline == -1) return 0; if (time_before(jiffies, @@ -129,29 +129,43 @@ scmd_eh_abort_handler(struct work_struct *work) rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd); if (rtn == SUCCESS) { scmd->result |= DID_TIME_OUT << 16; - if (!scsi_noretry_cmd(scmd) && + spin_lock_irqsave(sdev->host->host_lock, flags); + if (scsi_host_eh_past_deadline(sdev->host)) { + spin_unlock_irqrestore(sdev->host->host_lock, + flags); + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "scmd %p eh timeout, " + "not retrying aborted " + "command\n", scmd)); + } else if (!scsi_noretry_cmd(scmd) && (++scmd->retries <= scmd->allowed)) { + spin_unlock_irqrestore(sdev->host->host_lock, + flags); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_WARNING, scmd, "scmd %p retry " "aborted command\n", scmd)); scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); + return; } else { + spin_unlock_irqrestore(sdev->host->host_lock, + flags);
[PATCH 4/4] scsi: Update documentation
The documentation has gone out-of-sync, so update it to the current status. Signed-off-by: Hannes Reinecke --- Documentation/scsi/scsi_eh.txt | 69 +++-- Documentation/scsi/scsi_mid_low_api.txt | 9 - drivers/scsi/scsi.c | 6 +-- 3 files changed, 48 insertions(+), 36 deletions(-) diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt index 6ff16b6..a0c8511 100644 --- a/Documentation/scsi/scsi_eh.txt +++ b/Documentation/scsi/scsi_eh.txt @@ -42,20 +42,14 @@ discussion. Once LLDD gets hold of a scmd, either the LLDD will complete the command by calling scsi_done callback passed from midlayer when -invoking hostt->queuecommand() or SCSI midlayer will time it out. +invoking hostt->queuecommand() or the block layer will time it out. [1-2-1] Completing a scmd w/ scsi_done For all non-EH commands, scsi_done() is the completion callback. It -does the following. - - 1. Delete timeout timer. If it fails, it means that timeout timer -has expired and is going to finish the command. Just return. - - 2. Link scmd to per-cpu scsi_done_q using scmd->en_entry - - 3. Raise SCSI_SOFTIRQ +just calls blk_complete_request() to delete the block layer timer and +raise SCSI_SOFTIRQ SCSI_SOFTIRQ handler scsi_softirq calls scsi_decide_disposition() to determine what to do with the command. scsi_decide_disposition() @@ -64,10 +58,12 @@ with the command. - SUCCESS scsi_finish_command() is invoked for the command. The - function does some maintenance choirs and notify completion by - calling scmd->done() callback, which, for fs requests, would - be HLD completion callback - sd:sd_rw_intr, sr:rw_intr, - st:st_intr. + function does some maintenance chores and then calls + scsi_io_completion() to finish the I/O. + scsi_io_completion() then notifies the block layer on + the completed request by calling blk_end_request and + friends or figures out what to do with the remainder + of the data in case of an error. - NEEDS_RETRY - ADD_TO_MLQUEUE @@ -86,33 +82,45 @@ function 1. invokes optional hostt->eh_timed_out() callback. Return value can be one of -- EH_HANDLED - This indicates that eh_timed_out() dealt with the timeout. The - scmd is passed to __scsi_done() and thus linked into per-cpu - scsi_done_q. Normal command completion described in [1-2-1] - follows. +- BLK_EH_HANDLED + This indicates that eh_timed_out() dealt with the timeout. + The command is passed back to the block layer and completed + via __blk_complete_requests(). + + *NOTE* After returning BLK_EH_HANDLED the SCSI layer is + assumed to be finished with the command, and no other + functions from the SCSI layer will be called. So this + should typically only be returned if the eh_timed_out() + handler raced with normal completion. -- EH_RESET_TIMER +- BLK_EH_RESET_TIMER This indicates that more time is required to finish the command. Timer is restarted. This action is counted as a retry and only allowed scmd->allowed + 1(!) times. Once the - limit is reached, action for EH_NOT_HANDLED is taken instead. + limit is reached, action for BLK_EH_NOT_HANDLED is taken instead. - *NOTE* This action is racy as the LLDD could finish the scmd - after the timeout has expired but before it's added back. In - such cases, scsi_done() would think that timeout has occurred - and return without doing anything. We lose completion and the - command will time out again. - -- EH_NOT_HANDLED - This is the same as when eh_timed_out() callback doesn't exist. +- BLK_EH_NOT_HANDLED +eh_timed_out() callback did not handle the command. Step #2 is taken. + 2. If the host supports asynchronous completion (as indicated by the +no_async_abort setting in the host template) scsi_abort_command() +is invoked to schedule an asynchrous abort. If that fails +Step #3 is taken. + 2. scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD) is invoked for the command. See [1-3] for more information. +[1-3] Asynchronous command aborts + + After a timeout occurs a command abort is scheduled from + scsi_abort_command(). If the abort is successful the command + will either be retried (if the number of retries is not exhausted) + or terminated with DID_TIME_OUT. + Otherwise scsi_eh_scmd_add() is invoked for the command. + See [1-4] for more information. -[1-3] How EH takes over +[1-4] How EH takes over scmds enter EH via scsi_eh_scmd_add(), which does the following. @@ -320,7 +328,8 @@ scmd->allowed. <> - This action is taken for each timed out command. + This action is taken for each timed out command when + no_async_abort is enabled in the host template. hostt->eh_abort_handler() i
[PATCH 1/4] scsi: Fix erratic device offline during EH
From: James Bottomley Commit 18a4d0a22ed6c54b67af7718c305cd010f09ddf8 (Handle disk devices which can not process medium access commands) was introduced to offline any device which cannot process medium access commands. However, commit 3eef6257de48ff84a5d98ca533685df8a3beaeb8 (Reduce error recovery time by reducing use of TURs) reduced the number of TURs by sending it only on the first failing command, which might or might not be a medium access command. So in combination this results in an erratic device offlining during EH; if the command where the TUR was sent upon happens to be a medium access command the device will be set offline, if not everything proceeds as normal. This patch moves the check to the final test, eliminating this problem. Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley --- drivers/scsi/scsi_error.c | 26 +- drivers/scsi/sd.c | 26 -- include/scsi/scsi_driver.h | 2 +- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index e8bee9f..67c0014 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -941,12 +941,6 @@ retry: scsi_eh_restore_cmnd(scmd, &ses); - if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { - struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); - if (sdrv->eh_action) - rtn = sdrv->eh_action(scmd, cmnd, cmnd_size, rtn); - } - return rtn; } @@ -964,6 +958,16 @@ static int scsi_request_sense(struct scsi_cmnd *scmd) return scsi_send_eh_cmnd(scmd, NULL, 0, scmd->device->eh_timeout, ~0); } +static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn) +{ + if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { + struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); + if (sdrv->eh_action) + rtn = sdrv->eh_action(scmd, rtn); + } + return rtn; +} + /** * scsi_eh_finish_cmd - Handle a cmd that eh is finished with. * @scmd: Original SCSI cmd that eh has finished. @@ -1142,7 +1146,9 @@ static int scsi_eh_test_devices(struct list_head *cmd_list, list_for_each_entry_safe(scmd, next, cmd_list, eh_entry) if (scmd->device == sdev) { - if (finish_cmds) + if (finish_cmds && + (try_stu || +scsi_eh_action(scmd, SUCCESS) == SUCCESS)) scsi_eh_finish_cmd(scmd, done_q); else list_move_tail(&scmd->eh_entry, work_q); @@ -1283,7 +1289,8 @@ static int scsi_eh_stu(struct Scsi_Host *shost, !scsi_eh_tur(stu_scmd)) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { - if (scmd->device == sdev) + if (scmd->device == sdev && + scsi_eh_action(scmd, SUCCESS) == SUCCESS) scsi_eh_finish_cmd(scmd, done_q); } } @@ -1350,7 +1357,8 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, !scsi_eh_tur(bdr_scmd)) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { - if (scmd->device == sdev) + if (scmd->device == sdev && + scsi_eh_action(scmd, rtn) != FAILED) scsi_eh_finish_cmd(scmd, done_q); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fd874b8..1929838 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -110,7 +110,7 @@ static int sd_suspend_runtime(struct device *); static int sd_resume(struct device *); static void sd_rescan(struct device *); static int sd_done(struct scsi_cmnd *); -static int sd_eh_action(struct scsi_cmnd *, unsigned char *, int, int); +static int sd_eh_action(struct scsi_cmnd *, int); static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer); static void scsi_disk_release(struct device *cdev); static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *); @@ -1551,23 +1551,23 @@ static const struct block_device_operations sd_fops = { /** * sd_eh_action - error handling callback * @scmd: sd-issued command that has failed - * @eh_cmnd: The command that was sent during
[PATCHv9 0/4] New EH command timeout handler
Hi all, this patchset implements a new SCSI EH command timeout handler which will be sending command aborts inline without actually engaging SCSI EH. SCSI EH will only be invoked if command abort fails. In addition the commands will be returned directly if the command abort succeeded, cutting down recovery times dramatically. With the original SCSI EH I got: # time dd if=/dev/zero of=/dev/dm-2 bs=4k count=4k oflag=direct 4096+0 records in 4096+0 records out 16777216 bytes (17 MB) copied, 142.652 s, 118 kB/s real2m22.657s user0m0.013s sys 0m0.145s With this patchset I got: # time dd if=/dev/zero of=/dev/dm-2 bs=4k count=4k oflag=direct 4096+0 records in 4096+0 records out 16777216 bytes (17 MB) copied, 52.1579 s, 322 kB/s real0m52.163s user0m0.012s sys 0m0.145s Test was to disable RSCN on the target port, disable the target port, and then start the 'dd' command as indicated. Changes to the original version: - Use a private list in scsi_eh_abort_handler to avoid list starvation (pointed out by Joern Engel) - Terminate command aborts when the first abort fails - Do not attempt command aborts if the host is already in recovery or if the device is removed. - Flush abort workqueue if the device is removed. Changes to v2: - Removed eh_entry initialisation - Convert to per-command workqueue Changes to v3: - Use delayed_work - Enable new eh timeout handler for virtio, SAS, and FC - Modify logging messages to include scmd pointer Changes to v4: - Remove stubs when enabling new eh timeout handler for other drivers Changes to v5: - Enable new eh timeout handler per default - Update documentation Changes to v6: - Include changes from James Bottomley for erratic device offline patch - Rearrange patches - Update SCSI midlayer documentation Changes to v7: - Merge obsolete patch Changes to v8: - Include fixes from hch - Include patch from Ren Mingxin to set eh_deadline to '0' Hannes Reinecke (2): scsi: improved eh timeout handler scsi: Update documentation James Bottomley (1): scsi: Fix erratic device offline during EH Ren Mingxin (1): scsi: Set the minimum valid value of 'eh_deadline' as 0 Documentation/scsi/scsi_eh.txt | 69 ++- Documentation/scsi/scsi_mid_low_api.txt | 9 +- drivers/scsi/hosts.c| 16 ++- drivers/scsi/scsi.c | 9 +- drivers/scsi/scsi_error.c | 199 +++- drivers/scsi/scsi_priv.h| 2 + drivers/scsi/scsi_sysfs.c | 36 -- drivers/scsi/sd.c | 26 ++--- include/scsi/scsi_cmnd.h| 1 + include/scsi/scsi_driver.h | 2 +- include/scsi/scsi_host.h| 5 + 11 files changed, 282 insertions(+), 92 deletions(-) -- 1.7.12.4 -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/4] scsi: improved eh timeout handler
When a command runs into a timeout we need to send an 'ABORT TASK' TMF. This is typically done by the 'eh_abort_handler' LLDD callback. Conceptually, however, this function is a normal SCSI command, so there is no need to enter the error handler. This patch implements a new scsi_abort_command() function which invokes an asynchronous function scsi_eh_abort_handler() to abort the commands via the usual 'eh_abort_handler'. If abort succeeds the command is either retried or terminated, depending on the number of allowed retries. However, 'eh_eflags' records the abort, so if the retry would fail again the command is pushed onto the error handler without trying to abort it (again); it'll be cleared up from SCSI EH. Signed-off-by: Hannes Reinecke Cc: Ren Mingxin Cc: Christoph Hellwig --- drivers/scsi/scsi.c | 3 + drivers/scsi/scsi_error.c | 151 ++ drivers/scsi/scsi_priv.h | 2 + include/scsi/scsi_cmnd.h | 1 + include/scsi/scsi_host.h | 5 ++ 5 files changed, 149 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index fe0bcb1..2b04a57 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -297,6 +297,7 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask) cmd->device = dev; INIT_LIST_HEAD(&cmd->list); + INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler); spin_lock_irqsave(&dev->list_lock, flags); list_add_tail(&cmd->list, &dev->cmd_list); spin_unlock_irqrestore(&dev->list_lock, flags); @@ -353,6 +354,8 @@ void scsi_put_command(struct scsi_cmnd *cmd) list_del_init(&cmd->list); spin_unlock_irqrestore(&cmd->device->list_lock, flags); + cancel_delayed_work(&cmd->abort_work); + __scsi_put_command(cmd->device->host, cmd, &sdev->sdev_gendev); } EXPORT_SYMBOL(scsi_put_command); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 67c0014..6a137fa 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -53,6 +53,8 @@ static void scsi_eh_done(struct scsi_cmnd *scmd); #define HOST_RESET_SETTLE_TIME (10) static int scsi_eh_try_stu(struct scsi_cmnd *scmd); +static int scsi_try_to_abort_cmd(struct scsi_host_template *, +struct scsi_cmnd *); /* called with shost->host_lock held */ void scsi_eh_wakeup(struct Scsi_Host *shost) @@ -100,6 +102,116 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) } /** + * scmd_eh_abort_handler - Handle command aborts + * @work: command to be aborted. + */ +void +scmd_eh_abort_handler(struct work_struct *work) +{ + struct scsi_cmnd *scmd = + container_of(work, struct scsi_cmnd, abort_work.work); + struct scsi_device *sdev = scmd->device; + unsigned long flags; + int rtn; + + spin_lock_irqsave(sdev->host->host_lock, flags); + if (scsi_host_eh_past_deadline(sdev->host)) { + spin_unlock_irqrestore(sdev->host->host_lock, flags); + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "scmd %p eh timeout, not aborting\n", + scmd)); + } else { + spin_unlock_irqrestore(sdev->host->host_lock, flags); + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "aborting command %p\n", scmd)); + rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd); + if (rtn == SUCCESS) { + scmd->result |= DID_TIME_OUT << 16; + if (!scsi_noretry_cmd(scmd) && + (++scmd->retries <= scmd->allowed)) { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_WARNING, scmd, + "scmd %p retry " + "aborted command\n", scmd)); + scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); + } else { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_WARNING, scmd, + "scmd %p finish " + "aborted command\n", scmd)); + scsi_finish_command(scmd); + } + return; + } + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "scmd %p abort failed, rtn %d\n", + scmd, rtn)); + } + + if (scsi_eh_scmd_add(scmd, 0)) { + SCSI_LOG_ERROR_RECOV
Re: [PATCH 2/3] scsi: improved eh timeout handler
On Mon, 2013-11-04 at 14:36 +0100, Hannes Reinecke wrote: > On 10/31/2013 04:49 PM, Christoph Hellwig wrote: > > Looks reasonable to me, but a few minor nitpicks: > > > >> + spin_lock_irqsave(sdev->host->host_lock, flags); > >> + if (scsi_host_eh_past_deadline(sdev->host)) { > > > > I don't have the implementation of scsi_host_eh_past_deadline in my > > local tree, but do we really need the host lock for it? > > > Yes. The eh_deadline variable might be set from an interrupt context > or from userland, so we need to protect access to it. That's not really true. on all our supported architectures 32 bit reads/writes are atomic, which means that if one CPU writes a word at the same time another reads one, the reader is guaranteed to see either the old or the new data. Given the expense of lock cache line bouncing on the newer architectures, we really want to avoid a spinlock where possible. In this case, the problem with the implementation is that the writer might set eh_deadline to zero, but this is fixable in scsi_host_eh_past_deadline() by checking for zero before and after the time_before (for the zero to non-zero and non-zero to zero cases). James -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] scsi: improved eh timeout handler
On 11/04/2013 03:25 PM, James Bottomley wrote: > On Mon, 2013-11-04 at 14:36 +0100, Hannes Reinecke wrote: >> On 10/31/2013 04:49 PM, Christoph Hellwig wrote: >>> Looks reasonable to me, but a few minor nitpicks: >>> + spin_lock_irqsave(sdev->host->host_lock, flags); + if (scsi_host_eh_past_deadline(sdev->host)) { >>> >>> I don't have the implementation of scsi_host_eh_past_deadline in my >>> local tree, but do we really need the host lock for it? >>> >> Yes. The eh_deadline variable might be set from an interrupt context >> or from userland, so we need to protect access to it. > > That's not really true. on all our supported architectures 32 bit > reads/writes are atomic, which means that if one CPU writes a word at > the same time another reads one, the reader is guaranteed to see either > the old or the new data. Given the expense of lock cache line bouncing > on the newer architectures, we really want to avoid a spinlock where > possible. > > In this case, the problem with the implementation is that the writer > might set eh_deadline to zero, but this is fixable in > scsi_host_eh_past_deadline() by checking for zero before and after the > time_before (for the zero to non-zero and non-zero to zero cases). > IE you mean something like that attached patch? Cheers, Hannes -- Dr. Hannes Reinecke zSeries & Storage h...@suse.de +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 6a137fa..8abf7ba 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -94,8 +94,10 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) if (!shost->last_reset || !shost->eh_deadline) return 0; + /* Double check eh_deadline to catch atomic updates */ if (time_before(jiffies, - shost->last_reset + shost->eh_deadline)) + shost->last_reset + shost->eh_deadline) && + shost->eh_deadline) return 0; return 1; @@ -114,15 +116,12 @@ scmd_eh_abort_handler(struct work_struct *work) unsigned long flags; int rtn; - spin_lock_irqsave(sdev->host->host_lock, flags); if (scsi_host_eh_past_deadline(sdev->host)) { - spin_unlock_irqrestore(sdev->host->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "scmd %p eh timeout, not aborting\n", scmd)); } else { - spin_unlock_irqrestore(sdev->host->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "aborting command %p\n", scmd)); @@ -1140,16 +1139,13 @@ int scsi_eh_get_sense(struct list_head *work_q, continue; shost = scmd->device->host; - spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_eh_past_deadline(shost)) { - spin_unlock_irqrestore(shost->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "skip %s, past eh deadline\n", __func__)); break; } - spin_unlock_irqrestore(shost->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd, "%s: requesting sense\n", current->comm)); @@ -1242,19 +1238,15 @@ static int scsi_eh_test_devices(struct list_head *cmd_list, sdev = scmd->device; if (!try_stu) { - spin_lock_irqsave(sdev->host->host_lock, flags); if (scsi_host_eh_past_deadline(sdev->host)) { /* Push items back onto work_q */ list_splice_init(cmd_list, work_q); -spin_unlock_irqrestore(sdev->host->host_lock, - flags); SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, sdev->host, "skip %s, past eh deadline", __func__)); break; } - spin_unlock_irqrestore(sdev->host->host_lock, flags); } finish_cmds = !scsi_device_online(scmd->device) || @@ -1301,9 +1293,7 @@ static int scsi_eh_abort_cmds(struct list_head *work_q, if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD)) continue; shost = scmd->device->host; - spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_eh_past_deadline(shost)) { - spin_unlock_irqrestore(shost->host_lock, flags); list_splice_init(&check_list, work_q); SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, @@ -1311,7 +1301,6 @@ static int scsi_eh_abort_cmds(struct list_head *work_q, __func__)); return list_empty(work_q); } - spin_unlock_irqrestore(shost->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:" "0x%p\n", current->comm, scmd)); @@ -1378,16 +1367,13 @@ static int scsi_eh_stu(struct Scsi_Host *shost, unsigned long flags; shost_for_each_device(sdev, shost) { - spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_eh_past_deadline(shost)) { - spin_unlock_irqrestore(shost->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "skip %s, past eh deadline\n",
Re: [PATCH 2/3] scsi: improved eh timeout handler
On Mon, 2013-11-04 at 15:46 +0100, Hannes Reinecke wrote: > On 11/04/2013 03:25 PM, James Bottomley wrote: > > On Mon, 2013-11-04 at 14:36 +0100, Hannes Reinecke wrote: > >> On 10/31/2013 04:49 PM, Christoph Hellwig wrote: > >>> Looks reasonable to me, but a few minor nitpicks: > >>> > +spin_lock_irqsave(sdev->host->host_lock, flags); > +if (scsi_host_eh_past_deadline(sdev->host)) { > >>> > >>> I don't have the implementation of scsi_host_eh_past_deadline in my > >>> local tree, but do we really need the host lock for it? > >>> > >> Yes. The eh_deadline variable might be set from an interrupt context > >> or from userland, so we need to protect access to it. > > > > That's not really true. on all our supported architectures 32 bit > > reads/writes are atomic, which means that if one CPU writes a word at > > the same time another reads one, the reader is guaranteed to see either > > the old or the new data. Given the expense of lock cache line bouncing > > on the newer architectures, we really want to avoid a spinlock where > > possible. > > > > In this case, the problem with the implementation is that the writer > > might set eh_deadline to zero, but this is fixable in > > scsi_host_eh_past_deadline() by checking for zero before and after the > > time_before (for the zero to non-zero and non-zero to zero cases). > > > IE you mean something like that attached patch? Yes (except that there should be a comment explaining why we do the read twice), I think the cost of the extra read check is much less than the spinlock on all of our platforms. James -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 2/2] block,scsi: convert and handle ERR_PTR from blk_get_request
On 10/31/2013 03:50 PM, Joe Lawrence wrote: > The blk_get_request function may fail in low-memory conditions or during > device removal (even if __GFP_WAIT is set). To distinguish between these > errors, modify the blk_get_request call stack to return the appropriate > ERR_PTR. Verify that all callers check the return status and consider > IS_ERR instead of a simple NULL pointer check. > > Signed-off-by: Joe Lawrence > --- <> > diff --git a/drivers/scsi/osd/osd_initiator.c > b/drivers/scsi/osd/osd_initiator.c > index aa66361ed44b..0250efda647b 100644 > --- a/drivers/scsi/osd/osd_initiator.c > +++ b/drivers/scsi/osd/osd_initiator.c > @@ -1567,8 +1567,8 @@ static struct request *_make_request(struct > request_queue *q, bool has_write, > struct request *req; > > req = blk_get_request(q, has_write ? WRITE : READ, flags); > - if (unlikely(!req)) > - return ERR_PTR(-ENOMEM); > + if (unlikely(IS_ERR(req))) Just a nit IS_ERR already has an unlikely so it can be dropped here. (No bigy) ACK-by: Boaz Harrosh > + return req; > > return req; > } <> Thanks Boaz -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] scsi: improved eh timeout handler
On 11/04/2013 03:50 PM, James Bottomley wrote: > On Mon, 2013-11-04 at 15:46 +0100, Hannes Reinecke wrote: >> On 11/04/2013 03:25 PM, James Bottomley wrote: >>> On Mon, 2013-11-04 at 14:36 +0100, Hannes Reinecke wrote: On 10/31/2013 04:49 PM, Christoph Hellwig wrote: > Looks reasonable to me, but a few minor nitpicks: > >> +spin_lock_irqsave(sdev->host->host_lock, flags); >> +if (scsi_host_eh_past_deadline(sdev->host)) { > > I don't have the implementation of scsi_host_eh_past_deadline in my > local tree, but do we really need the host lock for it? > Yes. The eh_deadline variable might be set from an interrupt context or from userland, so we need to protect access to it. >>> >>> That's not really true. on all our supported architectures 32 bit >>> reads/writes are atomic, which means that if one CPU writes a word at >>> the same time another reads one, the reader is guaranteed to see either >>> the old or the new data. Given the expense of lock cache line bouncing >>> on the newer architectures, we really want to avoid a spinlock where >>> possible. >>> >>> In this case, the problem with the implementation is that the writer >>> might set eh_deadline to zero, but this is fixable in >>> scsi_host_eh_past_deadline() by checking for zero before and after the >>> time_before (for the zero to non-zero and non-zero to zero cases). >>> >> IE you mean something like that attached patch? > > Yes (except that there should be a comment explaining why we do the read > twice), I think the cost of the extra read check is much less than the > spinlock on all of our platforms. > So, this is what I've ended up with; sadly I had to use 'volatile' here which checkpatch doesn't like. I _could_ move eh_deadline to be atomic, that would avoid the 'volatile' setting. Feels like an overkill, though. Cheers, Hannes -- Dr. Hannes Reinecke zSeries & Storage h...@suse.de +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) >From 283f1b50e833fad969323531ccd0ce889a5e4044 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 4 Nov 2013 16:23:36 +0100 Subject: [PATCH 3/5] scsi: Unlock accesses to eh_deadline 32bit accesses are guaranteed to be atomic, so we can remove the spinlock when checking for eh_deadline. We only need to make sure to catch any updates which might happened during the call to time_before(); if so we just recheck with the correct value. Signed-off-by: Hannes Reinecke --- drivers/scsi/scsi_error.c | 54 +++ 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 7eecbb5..d122e89 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -91,13 +91,26 @@ EXPORT_SYMBOL_GPL(scsi_schedule_eh); static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) { - if (!shost->last_reset || !shost->eh_deadline) - return 0; + volatile int eh_deadline; - if (time_before(jiffies, - shost->last_reset + shost->eh_deadline)) +recheck: + eh_deadline = shost->eh_deadline; + if (!shost->last_reset || !eh_deadline) return 0; + /* + * 32bit accesses are guaranteed to be atomic + * (on all supported architectures), so instead + * of using a spinlock we can as well double check + * if eh_deadline has been modified after the + * time_before call; if so we need to recheck + * with the correct values. + */ + if (time_before(jiffies, shost->last_reset + eh_deadline)) { + if (eh_deadline != shost->eh_deadline) + goto recheck; + return 0; + } return 1; } @@ -111,18 +124,14 @@ scmd_eh_abort_handler(struct work_struct *work) struct scsi_cmnd *scmd = container_of(work, struct scsi_cmnd, abort_work.work); struct scsi_device *sdev = scmd->device; - unsigned long flags; int rtn; - spin_lock_irqsave(sdev->host->host_lock, flags); if (scsi_host_eh_past_deadline(sdev->host)) { - spin_unlock_irqrestore(sdev->host->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "scmd %p eh timeout, not aborting\n", scmd)); } else { - spin_unlock_irqrestore(sdev->host->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "aborting command %p\n", scmd)); @@ -1132,7 +1141,6 @@ int scsi_eh_get_sense(struct list_head *work_q, struct scsi_cmnd *scmd, *next; struct Scsi_Host *shost; int rtn; - unsigned long flags; list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) || @@ -1140,16 +1148,13 @@ int scsi_eh_get_sense(struct list_head *work_q, continue; shost = scmd->device->host; - spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_eh_past_deadline(shost)) { - spin_unlock_irqrestore(shost->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3,
Re: scsi-mq + open-iscsi support patches..?
On Sat, 2013-11-02 at 16:10 +, Jayamohan Kallickal wrote: > >> On a related note, some iscsi vendor has been hitting a crash with > >> your tree. > > >I got an email from Jayamohan recently, but the OOPs did not appear to be > >scsi-mq related.. > > I do see the crash on my Ubuntu VM running on VirtualBox but don't see the > crash on a Standalone system. > Attaching the screenshot > Hi Jayamohan, Care to enable the virtual serial port for this VM in order to grab the full dmesg output..? Also, an idea of what SATA emulation is being used here would be helpful as well. (eg: lspci -vvv for the ATA device on the running system) --nab -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] libata: use sleep instead of standby command
The ATA SLEEP mode saves some more power than SUSPEND, and has basically the same recovery time, so use it instead. Signed-off-by: Phillip Susi --- drivers/ata/libata-scsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 97a0cef..79b75fd 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1362,8 +1362,8 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) system_entering_hibernation()) goto skip; - /* Issue ATA STANDBY IMMEDIATE command */ - tf->command = ATA_CMD_STANDBYNOW1; + /* Issue ATA SLEEP command */ + tf->command = ATA_CMD_SLEEP; } /* -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] libata: avoid waking disk to check power
When a disk is in SLEEP mode it can not respond to commands, including the CHECK POWER command. Instead of waking up the sleeping disk, fake the reply to the CHECK POWER command to indicate the disk is in standby mode. This prevents udisks from waking up sleeping disks when it polls to see if they are awake or not before trying to read their smart status. Signed-off-by: Phillip Susi --- drivers/ata/libata-core.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 83b1a9f..573d151 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5084,6 +5084,13 @@ void ata_qc_issue(struct ata_queued_cmd *qc) /* if device is sleeping, schedule reset and abort the link */ if (unlikely(qc->dev->flags & ATA_DFLAG_SLEEPING)) { + if (unlikely(qc->tf.command == ATA_CMD_CHK_POWER)) + { + /* fake reply to avoid waking drive */ + qc->result_tf.nsect = 0; + ata_qc_complete(qc); + return; + } link->eh_info.action |= ATA_EH_RESET; ata_ehi_push_desc(&link->eh_info, "waking up from sleep"); ata_link_abort(link); -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] scsi: improved eh timeout handler
On Mon, 2013-11-04 at 16:43 +0100, Hannes Reinecke wrote: > On 11/04/2013 03:50 PM, James Bottomley wrote: > > On Mon, 2013-11-04 at 15:46 +0100, Hannes Reinecke wrote: > >> On 11/04/2013 03:25 PM, James Bottomley wrote: > >>> On Mon, 2013-11-04 at 14:36 +0100, Hannes Reinecke wrote: > On 10/31/2013 04:49 PM, Christoph Hellwig wrote: > > Looks reasonable to me, but a few minor nitpicks: > > > >> + spin_lock_irqsave(sdev->host->host_lock, flags); > >> + if (scsi_host_eh_past_deadline(sdev->host)) { > > > > I don't have the implementation of scsi_host_eh_past_deadline in my > > local tree, but do we really need the host lock for it? > > > Yes. The eh_deadline variable might be set from an interrupt context > or from userland, so we need to protect access to it. > >>> > >>> That's not really true. on all our supported architectures 32 bit > >>> reads/writes are atomic, which means that if one CPU writes a word at > >>> the same time another reads one, the reader is guaranteed to see either > >>> the old or the new data. Given the expense of lock cache line bouncing > >>> on the newer architectures, we really want to avoid a spinlock where > >>> possible. > >>> > >>> In this case, the problem with the implementation is that the writer > >>> might set eh_deadline to zero, but this is fixable in > >>> scsi_host_eh_past_deadline() by checking for zero before and after the > >>> time_before (for the zero to non-zero and non-zero to zero cases). > >>> > >> IE you mean something like that attached patch? > > > > Yes (except that there should be a comment explaining why we do the read > > twice), I think the cost of the extra read check is much less than the > > spinlock on all of our platforms. > > > So, this is what I've ended up with; sadly I had to use 'volatile' > here which checkpatch doesn't like. Why? Volatile has no real meaning on a local variable. You can just do an ordinary eh_deadline = shost->eh_deadline; and it will see either the before or after value. > I _could_ move eh_deadline to be atomic, that would avoid the > 'volatile' setting. Feels like an overkill, though. Please dump the recheck loop and just check for zero again. The race is acceptable because we're not trying to mediate it in a meaningful way. As long as the result is consistent with either the before or after value, that's fine. James -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] libata: use sleep instead of standby command
+linux-ide On 11/05/2013 08:52 AM, Phillip Susi wrote: > The ATA SLEEP mode saves some more power than SUSPEND, and > has basically the same recovery time, so use it instead. I suppose this is mainly for runtime PM? Since for system suspend/hibernation, the disk and its controller will be powered off anyway. Best regards, Aaron > > Signed-off-by: Phillip Susi > --- > drivers/ata/libata-scsi.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c > index 97a0cef..79b75fd 100644 > --- a/drivers/ata/libata-scsi.c > +++ b/drivers/ata/libata-scsi.c > @@ -1362,8 +1362,8 @@ static unsigned int ata_scsi_start_stop_xlat(struct > ata_queued_cmd *qc) >system_entering_hibernation()) > goto skip; > > - /* Issue ATA STANDBY IMMEDIATE command */ > - tf->command = ATA_CMD_STANDBYNOW1; > + /* Issue ATA SLEEP command */ > + tf->command = ATA_CMD_SLEEP; > } > > /* > -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Disk wakeup on resume
-BEGIN PGP SIGNED MESSAGE- Hash: SHA512 I can not figure out what is waking up disks on resume from suspend. I thought it was sd.c, and setting manage_start_stop = 0 should stop that. It does stop the message printed saying it is being started, yet the disk is still started, and this makes the resume take nearly 10 seconds. So it seems sd_resume()'s attempt to start the disk is pointless and redundant, and something else is starting up the disk. Oddly, if I unbind the sd driver ( echo 1 > /sys/block/sdx/device/delete ), then after a resume the disk remains off ( and spins up again on rescan ). I don't see why the kernel needs to delay completing the resume until after the disks have spun up when they will automatically spin up when accessed, which may be never ( thus they should be left spun down ), but I can not find the culprit causing this. -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.14 (GNU/Linux) Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/ iQEcBAEBCgAGBQJSeFnWAAoJEJrBOlT6nu758QwIAJAhlQ5o6qDG9fm98w0j7MY9 OYp/NTd/nO4LGPGaTob/JCyuMDPMtXCOdueLxStlqnBLnPgfANZWD+VSSbsm/uae pzvjihe6uU+mTCbPW0MNPEiFpH2mfVY8NMhrAEI/n0j9fjgd7E33NI2fedymEDPk LcgdOtdQP3VSMMs6pjPRpkpVBJG1g3Y6XPLV8IFj8RPz6lUj3k5qj45lsgf//Am8 5vD8SRoZZd7bZghqOCeUU2I/1kntKI5qz4cDUQq6wTVwqAuhLbpjFMOk5lX3gg/h Q25FGU6Rc8nMyLw4eLbRA8Tvh2aLX5anPkdJqkb132ulbmqP7b8hjUZ4qtNQ/wA= =JmIT -END PGP SIGNATURE- -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] libata: avoid waking disk to check power
+linux-ide On 11/05/2013 08:53 AM, Phillip Susi wrote: > When a disk is in SLEEP mode it can not respond to commands, > including the CHECK POWER command. Instead of waking up the > sleeping disk, fake the reply to the CHECK POWER command to > indicate the disk is in standby mode. This prevents udisks > from waking up sleeping disks when it polls to see if they > are awake or not before trying to read their smart status. If the disk entered sleep mode due to runtime PM, then udisks can easily tell by checking the device's runtime status and not send out the query. But if the disk entered sleep mode due to other reason, the patch may be necessary. So what's your scenario? Best regards, Aaron > > Signed-off-by: Phillip Susi > --- > drivers/ata/libata-core.c | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c > index 83b1a9f..573d151 100644 > --- a/drivers/ata/libata-core.c > +++ b/drivers/ata/libata-core.c > @@ -5084,6 +5084,13 @@ void ata_qc_issue(struct ata_queued_cmd *qc) > > /* if device is sleeping, schedule reset and abort the link */ > if (unlikely(qc->dev->flags & ATA_DFLAG_SLEEPING)) { > + if (unlikely(qc->tf.command == ATA_CMD_CHK_POWER)) > + { > + /* fake reply to avoid waking drive */ > + qc->result_tf.nsect = 0; > + ata_qc_complete(qc); > + return; > + } > link->eh_info.action |= ATA_EH_RESET; > ata_ehi_push_desc(&link->eh_info, "waking up from sleep"); > ata_link_abort(link); > -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] libata: use sleep instead of standby command
-BEGIN PGP SIGNED MESSAGE- Hash: SHA512 On 11/04/2013 09:23 PM, Aaron Lu wrote: > I suppose this is mainly for runtime PM? Since for system > suspend/hibernation, the disk and its controller will be powered > off anyway. Yes, or the second patch also helps when one manually issues hdparm - -Y, which otherwise will cause the drive to be woken up to answer the CHECK POWER command, which udisks issues to decide if it should skip polling the SMART status of the drive. -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.14 (GNU/Linux) Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/ iQEcBAEBCgAGBQJSeFpOAAoJEJrBOlT6nu75JdwIALFvwncNyf1ENxnIho2Y3CXC N6BpHQ7CzD1tap/5ybSFPBvUvJfyC/UPAY69bjldOtbwnArDIWRFqEWW7JWm6G0z J1C5MMCIRHSuRo4RbN/rnsPQFsaFJy0Z5kJ0uXX+aOrZwEov5vo2MEuj/DEdCBDC SKpYkogDlSHTNhFsaEF5iJiJlLU9FF5pEM6S6P0/+Z+SrZ5PVCDOm9rK+oeqrn0U TBX5tKPxWRSjJ/2F094DYia5l8ODNgzl1gPDXA50a+A4M0CZi3d5Liz5Ctfnv88A ac5cG8Ae8MDss0kJmSMCuRwkKVIROM3qMxf1wDWjBwm3eCTAJV1gxRYSN0adOvc= =Rfny -END PGP SIGNATURE- -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] libata: use sleep instead of standby command
On 11/05/2013 10:39 AM, Phillip Susi wrote: > -BEGIN PGP SIGNED MESSAGE- > Hash: SHA512 > > On 11/04/2013 09:23 PM, Aaron Lu wrote: >> I suppose this is mainly for runtime PM? Since for system >> suspend/hibernation, the disk and its controller will be powered >> off anyway. > > Yes, or the second patch also helps when one manually issues hdparm > - -Y, which otherwise will cause the drive to be woken up to answer the > CHECK POWER command, which udisks issues to decide if it should skip > polling the SMART status of the drive. OK, I see. I wish udidks can do both of these(polling smart status and put the disk into sleep mode), so that it knows when the disk is in sleep mode and it will not send out the needless poll... -Aaron -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] libata: avoid waking disk to check power
-BEGIN PGP SIGNED MESSAGE- Hash: SHA512 On 11/04/2013 09:39 PM, Aaron Lu wrote: > If the disk entered sleep mode due to runtime PM, then udisks can > easily tell by checking the device's runtime status and not send > out the query. > > But if the disk entered sleep mode due to other reason, the patch > may be necessary. So what's your scenario? I am planning on patching udisks to do that as well, but if you have a current udsisk or manually run hdparm -C, or some other program is doing something similar, then this patch takes care of that. -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.14 (GNU/Linux) Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/ iQEcBAEBCgAGBQJSeFvFAAoJEJrBOlT6nu75hcYH/0YsYcs3u9Dmuqq9z5kFXL3k C4xYQtvOxw2z+B/vEFzdym5J3ZfVyT4+ZdcHMvRgbbPH+R8/rzNC+F3OjdGtDIRA 5EUo0BUT2IfY0Yp2SzP44+8aRC4iF5jLLcH9XJKQx7WHedJQ7puCgJXXHAgUhpwW 52gJ0UEHnjlBgjwxY9RKqppnJs57x2VwgwR2BeFns6Hcry0S0HN0U4gwROmb1GZR kjpOUUYHb3MH/gfzCaY97t0duAAp7jzZeOIuu+XHfkyV3APwsnbasLOOoRYtpbvQ M++xmYQdNAh2DcZLEmAo6H6HLB/Z5d9K0+SKUFnsKHFaUCH0GPHm8SZ7E/NN7ig= =7c7v -END PGP SIGNATURE- -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/5] scsi: improved eh timeout handler
When a command runs into a timeout we need to send an 'ABORT TASK' TMF. This is typically done by the 'eh_abort_handler' LLDD callback. Conceptually, however, this function is a normal SCSI command, so there is no need to enter the error handler. This patch implements a new scsi_abort_command() function which invokes an asynchronous function scsi_eh_abort_handler() to abort the commands via the usual 'eh_abort_handler'. If abort succeeds the command is either retried or terminated, depending on the number of allowed retries. However, 'eh_eflags' records the abort, so if the retry would fail again the command is pushed onto the error handler without trying to abort it (again); it'll be cleared up from SCSI EH. Signed-off-by: Hannes Reinecke Cc: Ren Mingxin Cc: Christoph Hellwig --- drivers/scsi/scsi.c | 3 + drivers/scsi/scsi_error.c | 151 ++ drivers/scsi/scsi_priv.h | 2 + include/scsi/scsi_cmnd.h | 1 + include/scsi/scsi_host.h | 5 ++ 5 files changed, 149 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index fe0bcb1..2b04a57 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -297,6 +297,7 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask) cmd->device = dev; INIT_LIST_HEAD(&cmd->list); + INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler); spin_lock_irqsave(&dev->list_lock, flags); list_add_tail(&cmd->list, &dev->cmd_list); spin_unlock_irqrestore(&dev->list_lock, flags); @@ -353,6 +354,8 @@ void scsi_put_command(struct scsi_cmnd *cmd) list_del_init(&cmd->list); spin_unlock_irqrestore(&cmd->device->list_lock, flags); + cancel_delayed_work(&cmd->abort_work); + __scsi_put_command(cmd->device->host, cmd, &sdev->sdev_gendev); } EXPORT_SYMBOL(scsi_put_command); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 67c0014..7eecbb5 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -53,6 +53,8 @@ static void scsi_eh_done(struct scsi_cmnd *scmd); #define HOST_RESET_SETTLE_TIME (10) static int scsi_eh_try_stu(struct scsi_cmnd *scmd); +static int scsi_try_to_abort_cmd(struct scsi_host_template *, +struct scsi_cmnd *); /* called with shost->host_lock held */ void scsi_eh_wakeup(struct Scsi_Host *shost) @@ -100,6 +102,116 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) } /** + * scmd_eh_abort_handler - Handle command aborts + * @work: command to be aborted. + */ +void +scmd_eh_abort_handler(struct work_struct *work) +{ + struct scsi_cmnd *scmd = + container_of(work, struct scsi_cmnd, abort_work.work); + struct scsi_device *sdev = scmd->device; + unsigned long flags; + int rtn; + + spin_lock_irqsave(sdev->host->host_lock, flags); + if (scsi_host_eh_past_deadline(sdev->host)) { + spin_unlock_irqrestore(sdev->host->host_lock, flags); + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "scmd %p eh timeout, not aborting\n", + scmd)); + } else { + spin_unlock_irqrestore(sdev->host->host_lock, flags); + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "aborting command %p\n", scmd)); + rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd); + if (rtn == SUCCESS) { + scmd->result |= DID_TIME_OUT << 16; + if (!scsi_noretry_cmd(scmd) && + (++scmd->retries <= scmd->allowed)) { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_WARNING, scmd, + "scmd %p retry " + "aborted command\n", scmd)); + scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); + } else { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_WARNING, scmd, + "scmd %p finish " + "aborted command\n", scmd)); + scsi_finish_command(scmd); + } + return; + } + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "scmd %p abort failed, rtn %d\n", + scmd, rtn)); + } + + if (!scsi_eh_scmd_add(scmd, 0)) { + SCSI_LOG_ERROR_RECO
[PATCH 5/5] scsi: Update documentation
The documentation has gone out-of-sync, so update it to the current status. Signed-off-by: Hannes Reinecke --- Documentation/scsi/scsi_eh.txt | 69 +++-- Documentation/scsi/scsi_mid_low_api.txt | 9 - drivers/scsi/scsi.c | 6 +-- 3 files changed, 48 insertions(+), 36 deletions(-) diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt index 6ff16b6..a0c8511 100644 --- a/Documentation/scsi/scsi_eh.txt +++ b/Documentation/scsi/scsi_eh.txt @@ -42,20 +42,14 @@ discussion. Once LLDD gets hold of a scmd, either the LLDD will complete the command by calling scsi_done callback passed from midlayer when -invoking hostt->queuecommand() or SCSI midlayer will time it out. +invoking hostt->queuecommand() or the block layer will time it out. [1-2-1] Completing a scmd w/ scsi_done For all non-EH commands, scsi_done() is the completion callback. It -does the following. - - 1. Delete timeout timer. If it fails, it means that timeout timer -has expired and is going to finish the command. Just return. - - 2. Link scmd to per-cpu scsi_done_q using scmd->en_entry - - 3. Raise SCSI_SOFTIRQ +just calls blk_complete_request() to delete the block layer timer and +raise SCSI_SOFTIRQ SCSI_SOFTIRQ handler scsi_softirq calls scsi_decide_disposition() to determine what to do with the command. scsi_decide_disposition() @@ -64,10 +58,12 @@ with the command. - SUCCESS scsi_finish_command() is invoked for the command. The - function does some maintenance choirs and notify completion by - calling scmd->done() callback, which, for fs requests, would - be HLD completion callback - sd:sd_rw_intr, sr:rw_intr, - st:st_intr. + function does some maintenance chores and then calls + scsi_io_completion() to finish the I/O. + scsi_io_completion() then notifies the block layer on + the completed request by calling blk_end_request and + friends or figures out what to do with the remainder + of the data in case of an error. - NEEDS_RETRY - ADD_TO_MLQUEUE @@ -86,33 +82,45 @@ function 1. invokes optional hostt->eh_timed_out() callback. Return value can be one of -- EH_HANDLED - This indicates that eh_timed_out() dealt with the timeout. The - scmd is passed to __scsi_done() and thus linked into per-cpu - scsi_done_q. Normal command completion described in [1-2-1] - follows. +- BLK_EH_HANDLED + This indicates that eh_timed_out() dealt with the timeout. + The command is passed back to the block layer and completed + via __blk_complete_requests(). + + *NOTE* After returning BLK_EH_HANDLED the SCSI layer is + assumed to be finished with the command, and no other + functions from the SCSI layer will be called. So this + should typically only be returned if the eh_timed_out() + handler raced with normal completion. -- EH_RESET_TIMER +- BLK_EH_RESET_TIMER This indicates that more time is required to finish the command. Timer is restarted. This action is counted as a retry and only allowed scmd->allowed + 1(!) times. Once the - limit is reached, action for EH_NOT_HANDLED is taken instead. + limit is reached, action for BLK_EH_NOT_HANDLED is taken instead. - *NOTE* This action is racy as the LLDD could finish the scmd - after the timeout has expired but before it's added back. In - such cases, scsi_done() would think that timeout has occurred - and return without doing anything. We lose completion and the - command will time out again. - -- EH_NOT_HANDLED - This is the same as when eh_timed_out() callback doesn't exist. +- BLK_EH_NOT_HANDLED +eh_timed_out() callback did not handle the command. Step #2 is taken. + 2. If the host supports asynchronous completion (as indicated by the +no_async_abort setting in the host template) scsi_abort_command() +is invoked to schedule an asynchrous abort. If that fails +Step #3 is taken. + 2. scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD) is invoked for the command. See [1-3] for more information. +[1-3] Asynchronous command aborts + + After a timeout occurs a command abort is scheduled from + scsi_abort_command(). If the abort is successful the command + will either be retried (if the number of retries is not exhausted) + or terminated with DID_TIME_OUT. + Otherwise scsi_eh_scmd_add() is invoked for the command. + See [1-4] for more information. -[1-3] How EH takes over +[1-4] How EH takes over scmds enter EH via scsi_eh_scmd_add(), which does the following. @@ -320,7 +328,8 @@ scmd->allowed. <> - This action is taken for each timed out command. + This action is taken for each timed out command when + no_async_abort is enabled in the host template. hostt->eh_abort_handler() i
[PATCHv10 0/5] New EH command timeout handler
Hi all, this patchset implements a new SCSI EH command timeout handler which will be sending command aborts inline without actually engaging SCSI EH. SCSI EH will only be invoked if command abort fails. In addition the commands will be returned directly if the command abort succeeded, cutting down recovery times dramatically. With the original SCSI EH I got: # time dd if=/dev/zero of=/dev/dm-2 bs=4k count=4k oflag=direct 4096+0 records in 4096+0 records out 16777216 bytes (17 MB) copied, 142.652 s, 118 kB/s real2m22.657s user0m0.013s sys 0m0.145s With this patchset I got: # time dd if=/dev/zero of=/dev/dm-2 bs=4k count=4k oflag=direct 4096+0 records in 4096+0 records out 16777216 bytes (17 MB) copied, 52.1579 s, 322 kB/s real0m52.163s user0m0.012s sys 0m0.145s Test was to disable RSCN on the target port, disable the target port, and then start the 'dd' command as indicated. Changes to the original version: - Use a private list in scsi_eh_abort_handler to avoid list starvation (pointed out by Joern Engel) - Terminate command aborts when the first abort fails - Do not attempt command aborts if the host is already in recovery or if the device is removed. - Flush abort workqueue if the device is removed. Changes to v2: - Removed eh_entry initialisation - Convert to per-command workqueue Changes to v3: - Use delayed_work - Enable new eh timeout handler for virtio, SAS, and FC - Modify logging messages to include scmd pointer Changes to v4: - Remove stubs when enabling new eh timeout handler for other drivers Changes to v5: - Enable new eh timeout handler per default - Update documentation Changes to v6: - Include changes from James Bottomley for erratic device offline patch - Rearrange patches - Update SCSI midlayer documentation Changes to v7: - Merge obsolete patch Changes to v8: - Include fixes from hch - Include patch from Ren Mingxin to set eh_deadline to '0' Changes to v9: - Unlock accesses to eh_deadline Hannes Reinecke (3): scsi: improved eh timeout handler scsi: Unlock accesses to eh_deadline scsi: Update documentation James Bottomley (1): scsi: Fix erratic device offline during EH Ren Mingxin (1): scsi: Set the minimum valid value of 'eh_deadline' as 0 Documentation/scsi/scsi_eh.txt | 69 +- Documentation/scsi/scsi_mid_low_api.txt | 9 +- drivers/scsi/hosts.c| 16 ++- drivers/scsi/scsi.c | 9 +- drivers/scsi/scsi_error.c | 229 drivers/scsi/scsi_priv.h| 2 + drivers/scsi/scsi_sysfs.c | 36 +++-- drivers/scsi/sd.c | 26 ++-- include/scsi/scsi_cmnd.h| 1 + include/scsi/scsi_driver.h | 2 +- include/scsi/scsi_host.h| 5 + 11 files changed, 281 insertions(+), 123 deletions(-) -- 1.7.12.4 -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/5] scsi: Set the minimum valid value of 'eh_deadline' as 0
From: Ren Mingxin The former minimum valid value of 'eh_deadline' is 1s, which means the earliest occasion to shorten EH is 1 second later since a command is failed or timed out. But if we want to skip EH steps ASAP, we have to wait until the first EH step is finished. If the duration of the first EH step is long, this waiting time is excruciating. So, it is necessary to accept 0 as the minimum valid value for 'eh_deadline'. According to my test, with Hannes' patchset 'New EH command timeout handler' as well, the minimum IO time is improved from 73s (eh_deadline = 1) to 43s(eh_deadline = 0) when commands are timed out by disabling RSCN and target port. Signed-off-by: Ren Mingxin Signed-off-by: Hannes Reinecke --- drivers/scsi/hosts.c | 16 drivers/scsi/scsi_error.c | 34 +- drivers/scsi/scsi_sysfs.c | 36 +--- 3 files changed, 58 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index f334859..bc8c462 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -316,11 +316,11 @@ static void scsi_host_dev_release(struct device *dev) kfree(shost); } -static unsigned int shost_eh_deadline; +static int shost_eh_deadline = -1; -module_param_named(eh_deadline, shost_eh_deadline, uint, S_IRUGO|S_IWUSR); +module_param_named(eh_deadline, shost_eh_deadline, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(eh_deadline, -"SCSI EH timeout in seconds (should be between 1 and 2^32-1)"); +"SCSI EH timeout in seconds (should be between 0 and 2^31-1)"); static struct device_type scsi_host_type = { .name = "scsi_host", @@ -394,7 +394,15 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->unchecked_isa_dma = sht->unchecked_isa_dma; shost->use_clustering = sht->use_clustering; shost->ordered_tag = sht->ordered_tag; - shost->eh_deadline = shost_eh_deadline * HZ; + if (shost_eh_deadline == -1) + shost->eh_deadline = -1; + else if ((ulong) shost_eh_deadline * HZ > INT_MAX) { + shost_printk(KERN_WARNING, shost, +"eh_deadline %u too large, setting to %u\n", +shost_eh_deadline, INT_MAX / HZ); + shost->eh_deadline = INT_MAX; + } else + shost->eh_deadline = shost_eh_deadline * HZ; if (sht->supported_mode == MODE_UNKNOWN) /* means we didn't set it ... default to INITIATOR */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index a3f8988..f2fe580 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -91,18 +91,18 @@ EXPORT_SYMBOL_GPL(scsi_schedule_eh); static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) { - if (!shost->last_reset || !shost->eh_deadline) + if (!shost->last_reset || shost->eh_deadline == -1) return 0; /* * 32bit accesses are guaranteed to be atomic * (on all supported architectures), so instead * of using a spinlock we can as well double check -* if eh_deadline has been unset during the +* if eh_deadline has been set to 'off' during the * time_before call. */ if (time_before(jiffies, shost->last_reset + shost->eh_deadline) && - shost->eh_deadline != 0) + shost->eh_deadline > -1) return 0; return 1; @@ -132,26 +132,34 @@ scmd_eh_abort_handler(struct work_struct *work) rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd); if (rtn == SUCCESS) { scmd->result |= DID_TIME_OUT << 16; - if (!scsi_noretry_cmd(scmd) && + if (scsi_host_eh_past_deadline(sdev->host)) { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "scmd %p eh timeout, " + "not retrying aborted " + "command\n", scmd)); + } else if (!scsi_noretry_cmd(scmd) && (++scmd->retries <= scmd->allowed)) { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_WARNING, scmd, "scmd %p retry " "aborted command\n", scmd)); scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); + return; } else { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_WARNING, scmd,
[PATCH 3/5] scsi: Unlock accesses to eh_deadline
32bit accesses are guaranteed to be atomic, so we can remove the spinlock when checking for eh_deadline. We only need to make sure to catch any updates which might happened during the call to time_before(); if so we just recheck with the correct value. Signed-off-by: Hannes Reinecke --- drivers/scsi/scsi_error.c | 44 +--- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 7eecbb5..a3f8988 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -94,8 +94,15 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) if (!shost->last_reset || !shost->eh_deadline) return 0; - if (time_before(jiffies, - shost->last_reset + shost->eh_deadline)) + /* +* 32bit accesses are guaranteed to be atomic +* (on all supported architectures), so instead +* of using a spinlock we can as well double check +* if eh_deadline has been unset during the +* time_before call. +*/ + if (time_before(jiffies, shost->last_reset + shost->eh_deadline) && + shost->eh_deadline != 0) return 0; return 1; @@ -111,18 +118,14 @@ scmd_eh_abort_handler(struct work_struct *work) struct scsi_cmnd *scmd = container_of(work, struct scsi_cmnd, abort_work.work); struct scsi_device *sdev = scmd->device; - unsigned long flags; int rtn; - spin_lock_irqsave(sdev->host->host_lock, flags); if (scsi_host_eh_past_deadline(sdev->host)) { - spin_unlock_irqrestore(sdev->host->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "scmd %p eh timeout, not aborting\n", scmd)); } else { - spin_unlock_irqrestore(sdev->host->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "aborting command %p\n", scmd)); @@ -1132,7 +1135,6 @@ int scsi_eh_get_sense(struct list_head *work_q, struct scsi_cmnd *scmd, *next; struct Scsi_Host *shost; int rtn; - unsigned long flags; list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) || @@ -1140,16 +1142,13 @@ int scsi_eh_get_sense(struct list_head *work_q, continue; shost = scmd->device->host; - spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_eh_past_deadline(shost)) { - spin_unlock_irqrestore(shost->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "skip %s, past eh deadline\n", __func__)); break; } - spin_unlock_irqrestore(shost->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd, "%s: requesting sense\n", current->comm)); @@ -1235,26 +1234,21 @@ static int scsi_eh_test_devices(struct list_head *cmd_list, struct scsi_cmnd *scmd, *next; struct scsi_device *sdev; int finish_cmds; - unsigned long flags; while (!list_empty(cmd_list)) { scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry); sdev = scmd->device; if (!try_stu) { - spin_lock_irqsave(sdev->host->host_lock, flags); if (scsi_host_eh_past_deadline(sdev->host)) { /* Push items back onto work_q */ list_splice_init(cmd_list, work_q); - spin_unlock_irqrestore(sdev->host->host_lock, - flags); SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, sdev->host, "skip %s, past eh deadline", __func__)); break; } - spin_unlock_irqrestore(sdev->host->host_lock, flags); } finish_cmds = !scsi_device_online(scmd->device) || @@ -1295,15 +1289,12 @@ static int scsi_eh_abort_cmds(struct list_head *work_q, LIST_HEAD(check_list); int rtn; struct Scsi_Host *shost; - unsigned long flags; list_for_each_entry_safe(scmd, next,
[PATCH 1/5] scsi: Fix erratic device offline during EH
From: James Bottomley Commit 18a4d0a22ed6c54b67af7718c305cd010f09ddf8 (Handle disk devices which can not process medium access commands) was introduced to offline any device which cannot process medium access commands. However, commit 3eef6257de48ff84a5d98ca533685df8a3beaeb8 (Reduce error recovery time by reducing use of TURs) reduced the number of TURs by sending it only on the first failing command, which might or might not be a medium access command. So in combination this results in an erratic device offlining during EH; if the command where the TUR was sent upon happens to be a medium access command the device will be set offline, if not everything proceeds as normal. This patch moves the check to the final test, eliminating this problem. Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley --- drivers/scsi/scsi_error.c | 26 +- drivers/scsi/sd.c | 26 -- include/scsi/scsi_driver.h | 2 +- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index e8bee9f..67c0014 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -941,12 +941,6 @@ retry: scsi_eh_restore_cmnd(scmd, &ses); - if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { - struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); - if (sdrv->eh_action) - rtn = sdrv->eh_action(scmd, cmnd, cmnd_size, rtn); - } - return rtn; } @@ -964,6 +958,16 @@ static int scsi_request_sense(struct scsi_cmnd *scmd) return scsi_send_eh_cmnd(scmd, NULL, 0, scmd->device->eh_timeout, ~0); } +static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn) +{ + if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { + struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); + if (sdrv->eh_action) + rtn = sdrv->eh_action(scmd, rtn); + } + return rtn; +} + /** * scsi_eh_finish_cmd - Handle a cmd that eh is finished with. * @scmd: Original SCSI cmd that eh has finished. @@ -1142,7 +1146,9 @@ static int scsi_eh_test_devices(struct list_head *cmd_list, list_for_each_entry_safe(scmd, next, cmd_list, eh_entry) if (scmd->device == sdev) { - if (finish_cmds) + if (finish_cmds && + (try_stu || +scsi_eh_action(scmd, SUCCESS) == SUCCESS)) scsi_eh_finish_cmd(scmd, done_q); else list_move_tail(&scmd->eh_entry, work_q); @@ -1283,7 +1289,8 @@ static int scsi_eh_stu(struct Scsi_Host *shost, !scsi_eh_tur(stu_scmd)) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { - if (scmd->device == sdev) + if (scmd->device == sdev && + scsi_eh_action(scmd, SUCCESS) == SUCCESS) scsi_eh_finish_cmd(scmd, done_q); } } @@ -1350,7 +1357,8 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, !scsi_eh_tur(bdr_scmd)) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { - if (scmd->device == sdev) + if (scmd->device == sdev && + scsi_eh_action(scmd, rtn) != FAILED) scsi_eh_finish_cmd(scmd, done_q); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fd874b8..1929838 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -110,7 +110,7 @@ static int sd_suspend_runtime(struct device *); static int sd_resume(struct device *); static void sd_rescan(struct device *); static int sd_done(struct scsi_cmnd *); -static int sd_eh_action(struct scsi_cmnd *, unsigned char *, int, int); +static int sd_eh_action(struct scsi_cmnd *, int); static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer); static void scsi_disk_release(struct device *cdev); static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *); @@ -1551,23 +1551,23 @@ static const struct block_device_operations sd_fops = { /** * sd_eh_action - error handling callback * @scmd: sd-issued command that has failed - * @eh_cmnd: The command that was sent during