[PATCH] scsi: Do not call do_div() with a 64-bit divisor

2013-11-04 Thread Geert Uytterhoeven
do_div() is meant for divisions of 64-bit number by 32-bit numbers.
Passing 64-bit divisor types caused issues in the past on 32-bit platforms,
cfr. commit ea077b1b96e073eac5c3c5590529e964767fc5f7 ("m68k: Truncate base
in do_div()").

As scsi_device.sector_size is unsigned (int), factor should be unsigned
int, too.

Signed-off-by: Geert Uytterhoeven 
---
 drivers/scsi/sd.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 5693f6d7eddb..d6645c70cceb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1607,7 +1607,7 @@ static unsigned int sd_completed_bytes(struct scsi_cmnd 
*scmd)
end_lba <<= 1;
} else {
/* be careful ... don't want any overflows */
-   u64 factor = scmd->device->sector_size / 512;
+   unsigned int factor = scmd->device->sector_size / 512;
do_div(start_lba, factor);
do_div(end_lba, factor);
}
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 11/11] pm80xx : gpio feature support for motherboard controllers

2013-11-04 Thread Viswas G
Hi Jack,

We wanted to control the GPIO in the HBA only. Bsg interface gets created only 
for enclosure or expander. 

For our HBA, bsg interface will not be created since it does not have an 
enclosure target inside. That's why we wanted to use IOCTL. Please advise.

Best Regards,
Viswas G

-Original Message-
From: Jack Wang [mailto:xjtu...@gmail.com] 
Sent: Tuesday, October 29, 2013 3:49 PM
To: Viswas G
Cc: linux-scsi@vger.kernel.org; Sangeetha Gnanasekaran; Nikith Ganigarakoppal; 
Anand Kumar Santhanam
Subject: Re: [PATCH 11/11] pm80xx : gpio feature support for motherboard 
controllers

Hi Viswas,

As ioctl interface is not welcome for new feature, that's why we removed ioctl 
interface when pm8001 accepted into mainline.

I suggest you use bsg interface for this, see sas_host_smp.c for details.

Regards,
Jack

On 10/22/2013 02:20 PM, Viswas G wrote:
> 
> Signed-off-by: Viswas G 
> ---
>  drivers/scsi/pm8001/pm8001_ctl.c  |  248
> -
>  drivers/scsi/pm8001/pm8001_ctl.h  |   55 
>  drivers/scsi/pm8001/pm8001_init.c |   37 ++
>  drivers/scsi/pm8001/pm8001_sas.h  |   30 +
>  drivers/scsi/pm8001/pm80xx_hwi.c  |  106 
>  drivers/scsi/pm8001/pm80xx_hwi.h  |   49 +++
>  6 files changed, 524 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/scsi/pm8001/pm8001_ctl.c
> b/drivers/scsi/pm8001/pm8001_ctl.c
> index 247cb1c..3c9ca21 100644
> --- a/drivers/scsi/pm8001/pm8001_ctl.c
> +++ b/drivers/scsi/pm8001/pm8001_ctl.c
> @@ -40,7 +40,8 @@
>  #include 
>  #include 
>  #include "pm8001_sas.h"
> -#include "pm8001_ctl.h"
> +
> +int pm8001_major = -1;
> 
>  /* scsi host attributes */
> 
> @@ -759,3 +760,248 @@ struct device_attribute *pm8001_host_attrs[] = {
> NULL,
>  };
> 
> +/**
> + * pm8001_open - open the configuration file
> + * @inode: inode being opened
> + * @file: file handle attached
> + *
> + * Called when the configuration device is opened. Does the needed
> + * set up on the handle and then returns
> + *
> + */
> +static int pm8001_open(struct inode *inode, struct file *file) {
> +   struct pm8001_hba_info *pm8001_ha;
> +   unsigned minor_number = iminor(inode);
> +   int err = -ENODEV;
> +
> +   list_for_each_entry(pm8001_ha, &hba_list, list) {
> +   if (pm8001_ha->id == minor_number) {
> +   file->private_data = pm8001_ha;
> +   err = 0;
> +   break;
> +   }
> +   }
> +
> +   return err;
> +}
> +
> +/**
> + * pm8001_close - close the configuration file
> + * @inode: inode being opened
> + * @file: file handle attached
> + *
> + * Called when the configuration device is closed. Does the needed
> + * set up on the handle and then returns
> + *
> + */
> +static int pm8001_close(struct inode *inode, struct file *file) {
> +   return 0;
> +}
> +
> +static long pm8001_info_ioctl(struct pm8001_hba_info *pm8001_ha,
> + unsigned long arg) {
> +   u32 ret = 0;
> +   struct ioctl_info_buffer info_buf;
> +   union main_cfg_table main_tbl =  pm8001_ha->main_cfg_tbl;
> +
> +   strcpy(info_buf.information.sz_name, DRV_NAME);
> +
> +   info_buf.information.usmajor_revision = DRV_MAJOR;
> +   info_buf.information.usminor_revision = DRV_MINOR;
> +   info_buf.information.usbuild_revision = DRV_BUILD;
> +   if (pm8001_ha->chip_id == chip_8001) {
> +   info_buf.information.maxoutstandingIO =
> +   main_tbl.pm8001_tbl.max_out_io;
> +   info_buf.information.maxdevices =
> +   (main_tbl.pm8001_tbl.max_sgl >> 16) & 0x;
> +   } else {
> +   info_buf.information.maxoutstandingIO =
> +   main_tbl.pm80xx_tbl.max_out_io;
> +   info_buf.information.maxdevices =
> +   (main_tbl.pm80xx_tbl.max_sgl >> 16) & 0x;
> +   }
> +   info_buf.header.return_code = ADPT_IOCTL_CALL_SUCCESS;
> +
> +   if (copy_to_user((void *)arg, (void *)&info_buf,
> +sizeof(struct ioctl_info_buffer))) {
> +   ret = ADPT_IOCTL_CALL_FAILED;
> +   }
> +
> +   return ret;
> +}
> +
> +static long pm8001_gpio_ioctl(struct pm8001_hba_info *pm8001_ha,
> + unsigned long arg) {
> +   struct gpio_buffer buffer;
> +   struct pm8001_gpio *payload;
> +   struct gpio_ioctl_resp *gpio_resp;
> +   DECLARE_COMPLETION_ONSTACK(completion);
> +   unsigned long timeout;
> +   u32 ret = 0, operation;
> +
> +   mutex_lock(&pm8001_ha->ioctl_mutex);
> +
> +   if (copy_from_user(&buffer, (struct gpio_buffer *)arg,
> +   sizeof(struct gpio_buffer))) {
> +   ret = ADPT_IOCTL_CALL_FAILED;
> +   goto exit;
> +   }
> +
> +   pm8001_ha->ioctl_completion = &completion;
> +   payload = &buffer.gpio_payload;
> +   operation = payload->oper

Re: [PATCH 11/11] pm80xx : gpio feature support for motherboard controllers

2013-11-04 Thread Jack Wang
On 11/04/2013 11:13 AM, Viswas G wrote:
> Hi Jack,
> 
> We wanted to control the GPIO in the HBA only. Bsg interface gets created 
> only for enclosure or expander. 
> 
> For our HBA, bsg interface will not be created since it does not have an 
> enclosure target inside. That's why we wanted to use IOCTL. Please advise.
> 
> Best Regards,
> Viswas G
> 
Hi Viswas,

No, bsg interface also support HBA.
Here is two example output from LSI mpt2sas:

smp_rep_manufacturer /dev/bsg/sas_host0
Report manufacturer response:
  SAS-1.1 format: 0
  vendor identification: LSI
  product identification: Virtual SMP Port
  product revision level:
smp_read_gpio /dev/bsg/sas_host0
Read GPIO register response:
  GPIO_CFG[0]:
version: 0
GPIO enable: 1
cfg register count: 2
gp register count: 1
supported drive count: 16

Regards,
Jack

> -Original Message-
> From: Jack Wang [mailto:xjtu...@gmail.com] 
> Sent: Tuesday, October 29, 2013 3:49 PM
> To: Viswas G
> Cc: linux-scsi@vger.kernel.org; Sangeetha Gnanasekaran; Nikith 
> Ganigarakoppal; Anand Kumar Santhanam
> Subject: Re: [PATCH 11/11] pm80xx : gpio feature support for motherboard 
> controllers
> 
> Hi Viswas,
> 
> As ioctl interface is not welcome for new feature, that's why we removed 
> ioctl interface when pm8001 accepted into mainline.
> 
> I suggest you use bsg interface for this, see sas_host_smp.c for details.
> 
> Regards,
> Jack
> 
> On 10/22/2013 02:20 PM, Viswas G wrote:
>>
>> Signed-off-by: Viswas G 
>> ---
>>  drivers/scsi/pm8001/pm8001_ctl.c  |  248
>> -
>>  drivers/scsi/pm8001/pm8001_ctl.h  |   55 
>>  drivers/scsi/pm8001/pm8001_init.c |   37 ++
>>  drivers/scsi/pm8001/pm8001_sas.h  |   30 +
>>  drivers/scsi/pm8001/pm80xx_hwi.c  |  106 
>>  drivers/scsi/pm8001/pm80xx_hwi.h  |   49 +++
>>  6 files changed, 524 insertions(+), 1 deletions(-)
>>
>> diff --git a/drivers/scsi/pm8001/pm8001_ctl.c
>> b/drivers/scsi/pm8001/pm8001_ctl.c
>> index 247cb1c..3c9ca21 100644
>> --- a/drivers/scsi/pm8001/pm8001_ctl.c
>> +++ b/drivers/scsi/pm8001/pm8001_ctl.c
>> @@ -40,7 +40,8 @@
>>  #include 
>>  #include 
>>  #include "pm8001_sas.h"
>> -#include "pm8001_ctl.h"
>> +
>> +int pm8001_major = -1;
>>
>>  /* scsi host attributes */
>>
>> @@ -759,3 +760,248 @@ struct device_attribute *pm8001_host_attrs[] = {
>> NULL,
>>  };
>>
>> +/**
>> + * pm8001_open - open the configuration file
>> + * @inode: inode being opened
>> + * @file: file handle attached
>> + *
>> + * Called when the configuration device is opened. Does the needed
>> + * set up on the handle and then returns
>> + *
>> + */
>> +static int pm8001_open(struct inode *inode, struct file *file) {
>> +   struct pm8001_hba_info *pm8001_ha;
>> +   unsigned minor_number = iminor(inode);
>> +   int err = -ENODEV;
>> +
>> +   list_for_each_entry(pm8001_ha, &hba_list, list) {
>> +   if (pm8001_ha->id == minor_number) {
>> +   file->private_data = pm8001_ha;
>> +   err = 0;
>> +   break;
>> +   }
>> +   }
>> +
>> +   return err;
>> +}
>> +
>> +/**
>> + * pm8001_close - close the configuration file
>> + * @inode: inode being opened
>> + * @file: file handle attached
>> + *
>> + * Called when the configuration device is closed. Does the needed
>> + * set up on the handle and then returns
>> + *
>> + */
>> +static int pm8001_close(struct inode *inode, struct file *file) {
>> +   return 0;
>> +}
>> +
>> +static long pm8001_info_ioctl(struct pm8001_hba_info *pm8001_ha,
>> + unsigned long arg) {
>> +   u32 ret = 0;
>> +   struct ioctl_info_buffer info_buf;
>> +   union main_cfg_table main_tbl =  pm8001_ha->main_cfg_tbl;
>> +
>> +   strcpy(info_buf.information.sz_name, DRV_NAME);
>> +
>> +   info_buf.information.usmajor_revision = DRV_MAJOR;
>> +   info_buf.information.usminor_revision = DRV_MINOR;
>> +   info_buf.information.usbuild_revision = DRV_BUILD;
>> +   if (pm8001_ha->chip_id == chip_8001) {
>> +   info_buf.information.maxoutstandingIO =
>> +   main_tbl.pm8001_tbl.max_out_io;
>> +   info_buf.information.maxdevices =
>> +   (main_tbl.pm8001_tbl.max_sgl >> 16) & 0x;
>> +   } else {
>> +   info_buf.information.maxoutstandingIO =
>> +   main_tbl.pm80xx_tbl.max_out_io;
>> +   info_buf.information.maxdevices =
>> +   (main_tbl.pm80xx_tbl.max_sgl >> 16) & 0x;
>> +   }
>> +   info_buf.header.return_code = ADPT_IOCTL_CALL_SUCCESS;
>> +
>> +   if (copy_to_user((void *)arg, (void *)&info_buf,
>> +sizeof(struct ioctl_info_buffer))) {
>> +   ret = ADPT_IOCTL_CALL_FAILED;
>> +   }
>> +
>> +   return ret;
>> +}
>> +
>> +static long pm8001_gpio_ioctl(struct pm8001_hba_info *pm800

Re: [PATCH 2/3] scsi: improved eh timeout handler

2013-11-04 Thread Hannes Reinecke
On 10/31/2013 04:49 PM, Christoph Hellwig wrote:
> Looks reasonable to me, but a few minor nitpicks:
> 
>> +spin_lock_irqsave(sdev->host->host_lock, flags);
>> +if (scsi_host_eh_past_deadline(sdev->host)) {
> 
> I don't have the implementation of scsi_host_eh_past_deadline in my
> local tree, but do we really need the host lock for it?
> 
Yes. The eh_deadline variable might be set from an interrupt context
or from userland, so we need to protect access to it.

>> +int
>> +scsi_abort_command(struct scsi_cmnd *scmd)
> 
> Seems like this should be static and not exported in the current version
> of the code?
> 
Yep. Will be doing so.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries & Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] scsi: Set the minimum valid value of 'eh_deadline' as 0

2013-11-04 Thread Hannes Reinecke
From: Ren Mingxin 

The former minimum valid value of 'eh_deadline' is 1s, which means
the earliest occasion to shorten EH is 1 second later since a
command is failed or timed out. But if we want to skip EH steps
ASAP, we have to wait until the first EH step is finished. If the
duration of the first EH step is long, this waiting time is
excruciating. So, it is necessary to accept 0 as the minimum valid
value for 'eh_deadline'.

According to my test, with Hannes' patchset 'New EH command timeout
handler' as well, the minimum IO time is improved from 73s
(eh_deadline = 1) to 43s(eh_deadline = 0) when commands are timed
out by disabling RSCN and target port.

Another thing: scsi_finish_command() should be invoked if
scsi_eh_scmd_add() is returned on failure - let EH finish those
commands.

Signed-off-by: Ren Mingxin 
Signed-off-by: Hannes Reinecke 
---
 drivers/scsi/hosts.c  | 16 
 drivers/scsi/scsi_error.c | 40 +++-
 drivers/scsi/scsi_sysfs.c | 36 +---
 3 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index f334859..81529ba 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -316,11 +316,11 @@ static void scsi_host_dev_release(struct device *dev)
kfree(shost);
 }
 
-static unsigned int shost_eh_deadline;
+static int shost_eh_deadline = -1;
 
-module_param_named(eh_deadline, shost_eh_deadline, uint, S_IRUGO|S_IWUSR);
+module_param_named(eh_deadline, shost_eh_deadline, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(eh_deadline,
-"SCSI EH timeout in seconds (should be between 1 and 2^32-1)");
+"SCSI EH timeout in seconds (should be between 0 and 2^31-1)");
 
 static struct device_type scsi_host_type = {
.name = "scsi_host",
@@ -394,7 +394,15 @@ struct Scsi_Host *scsi_host_alloc(struct 
scsi_host_template *sht, int privsize)
shost->unchecked_isa_dma = sht->unchecked_isa_dma;
shost->use_clustering = sht->use_clustering;
shost->ordered_tag = sht->ordered_tag;
-   shost->eh_deadline = shost_eh_deadline * HZ;
+   if (shost_eh_deadline == -1)
+   shost->eh_deadline = -1;
+   else if ((ulong) shost_eh_deadline * HZ > INT_MAX) {
+   printk(KERN_WARNING "scsi%d: eh_deadline %u exceeds the "
+  "maximum, setting to %u\n",
+  shost->host_no, shost_eh_deadline, INT_MAX / HZ);
+   shost->eh_deadline = INT_MAX;
+   } else
+   shost->eh_deadline = shost_eh_deadline * HZ;
 
if (sht->supported_mode == MODE_UNKNOWN)
/* means we didn't set it ... default to INITIATOR */
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 6a137fa..2487293 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -91,7 +91,7 @@ EXPORT_SYMBOL_GPL(scsi_schedule_eh);
 
 static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
 {
-   if (!shost->last_reset || !shost->eh_deadline)
+   if (!shost->last_reset || shost->eh_deadline == -1)
return 0;
 
if (time_before(jiffies,
@@ -129,29 +129,43 @@ scmd_eh_abort_handler(struct work_struct *work)
rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd);
if (rtn == SUCCESS) {
scmd->result |= DID_TIME_OUT << 16;
-   if (!scsi_noretry_cmd(scmd) &&
+   spin_lock_irqsave(sdev->host->host_lock, flags);
+   if (scsi_host_eh_past_deadline(sdev->host)) {
+   spin_unlock_irqrestore(sdev->host->host_lock,
+  flags);
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_INFO, scmd,
+   "scmd %p eh timeout, "
+   "not retrying aborted "
+   "command\n", scmd));
+   } else if (!scsi_noretry_cmd(scmd) &&
(++scmd->retries <= scmd->allowed)) {
+   spin_unlock_irqrestore(sdev->host->host_lock,
+  flags);
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_WARNING, scmd,
"scmd %p retry "
"aborted command\n", scmd));
scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
+   return;
} else {
+   spin_unlock_irqrestore(sdev->host->host_lock,
+  flags);
  

[PATCH 4/4] scsi: Update documentation

2013-11-04 Thread Hannes Reinecke
The documentation has gone out-of-sync, so update it to
the current status.

Signed-off-by: Hannes Reinecke 
---
 Documentation/scsi/scsi_eh.txt  | 69 +++--
 Documentation/scsi/scsi_mid_low_api.txt |  9 -
 drivers/scsi/scsi.c |  6 +--
 3 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
index 6ff16b6..a0c8511 100644
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -42,20 +42,14 @@ discussion.
 
  Once LLDD gets hold of a scmd, either the LLDD will complete the
 command by calling scsi_done callback passed from midlayer when
-invoking hostt->queuecommand() or SCSI midlayer will time it out.
+invoking hostt->queuecommand() or the block layer will time it out.
 
 
 [1-2-1] Completing a scmd w/ scsi_done
 
  For all non-EH commands, scsi_done() is the completion callback.  It
-does the following.
-
- 1. Delete timeout timer.  If it fails, it means that timeout timer
-has expired and is going to finish the command.  Just return.
-
- 2. Link scmd to per-cpu scsi_done_q using scmd->en_entry
-
- 3. Raise SCSI_SOFTIRQ
+just calls blk_complete_request() to delete the block layer timer and
+raise SCSI_SOFTIRQ
 
  SCSI_SOFTIRQ handler scsi_softirq calls scsi_decide_disposition() to
 determine what to do with the command.  scsi_decide_disposition()
@@ -64,10 +58,12 @@ with the command.
 
  - SUCCESS
scsi_finish_command() is invoked for the command.  The
-   function does some maintenance choirs and notify completion by
-   calling scmd->done() callback, which, for fs requests, would
-   be HLD completion callback - sd:sd_rw_intr, sr:rw_intr,
-   st:st_intr.
+   function does some maintenance chores and then calls
+   scsi_io_completion() to finish the I/O.
+   scsi_io_completion() then notifies the block layer on
+   the completed request by calling blk_end_request and
+   friends or figures out what to do with the remainder
+   of the data in case of an error.
 
  - NEEDS_RETRY
  - ADD_TO_MLQUEUE
@@ -86,33 +82,45 @@ function
  1. invokes optional hostt->eh_timed_out() callback.  Return value can
 be one of
 
-- EH_HANDLED
-   This indicates that eh_timed_out() dealt with the timeout.  The
-   scmd is passed to __scsi_done() and thus linked into per-cpu
-   scsi_done_q.  Normal command completion described in [1-2-1]
-   follows.
+- BLK_EH_HANDLED
+   This indicates that eh_timed_out() dealt with the timeout.
+   The command is passed back to the block layer and completed
+   via __blk_complete_requests().
+
+   *NOTE* After returning BLK_EH_HANDLED the SCSI layer is
+   assumed to be finished with the command, and no other
+   functions from the SCSI layer will be called. So this
+   should typically only be returned if the eh_timed_out()
+   handler raced with normal completion.
 
-- EH_RESET_TIMER
+- BLK_EH_RESET_TIMER
This indicates that more time is required to finish the
command.  Timer is restarted.  This action is counted as a
retry and only allowed scmd->allowed + 1(!) times.  Once the
-   limit is reached, action for EH_NOT_HANDLED is taken instead.
+   limit is reached, action for BLK_EH_NOT_HANDLED is taken instead.
 
-   *NOTE* This action is racy as the LLDD could finish the scmd
-   after the timeout has expired but before it's added back.  In
-   such cases, scsi_done() would think that timeout has occurred
-   and return without doing anything.  We lose completion and the
-   command will time out again.
-
-- EH_NOT_HANDLED
-   This is the same as when eh_timed_out() callback doesn't exist.
+- BLK_EH_NOT_HANDLED
+eh_timed_out() callback did not handle the command.
Step #2 is taken.
 
+ 2. If the host supports asynchronous completion (as indicated by the
+no_async_abort setting in the host template) scsi_abort_command()
+is invoked to schedule an asynchrous abort. If that fails
+Step #3 is taken.
+
  2. scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD) is invoked for the
 command.  See [1-3] for more information.
 
+[1-3] Asynchronous command aborts
+
+ After a timeout occurs a command abort is scheduled from
+ scsi_abort_command(). If the abort is successful the command
+ will either be retried (if the number of retries is not exhausted)
+ or terminated with DID_TIME_OUT.
+ Otherwise scsi_eh_scmd_add() is invoked for the command.
+ See [1-4] for more information.
 
-[1-3] How EH takes over
+[1-4] How EH takes over
 
  scmds enter EH via scsi_eh_scmd_add(), which does the following.
 
@@ -320,7 +328,8 @@ scmd->allowed.
 
 <>
 
-   This action is taken for each timed out command.
+   This action is taken for each timed out command when
+   no_async_abort is enabled in the host template.
hostt->eh_abort_handler() i

[PATCH 1/4] scsi: Fix erratic device offline during EH

2013-11-04 Thread Hannes Reinecke
From: James Bottomley 

Commit 18a4d0a22ed6c54b67af7718c305cd010f09ddf8
(Handle disk devices which can not process medium access commands)
was introduced to offline any device which cannot process medium
access commands.
However, commit 3eef6257de48ff84a5d98ca533685df8a3beaeb8
(Reduce error recovery time by reducing use of TURs) reduced
the number of TURs by sending it only on the first failing
command, which might or might not be a medium access command.
So in combination this results in an erratic device offlining
during EH; if the command where the TUR was sent upon happens
to be a medium access command the device will be set offline,
if not everything proceeds as normal.

This patch moves the check to the final test, eliminating
this problem.

Signed-off-by: Hannes Reinecke 
Signed-off-by: James Bottomley 
---
 drivers/scsi/scsi_error.c  | 26 +-
 drivers/scsi/sd.c  | 26 --
 include/scsi/scsi_driver.h |  2 +-
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index e8bee9f..67c0014 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -941,12 +941,6 @@ retry:
 
scsi_eh_restore_cmnd(scmd, &ses);
 
-   if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) {
-   struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
-   if (sdrv->eh_action)
-   rtn = sdrv->eh_action(scmd, cmnd, cmnd_size, rtn);
-   }
-
return rtn;
 }
 
@@ -964,6 +958,16 @@ static int scsi_request_sense(struct scsi_cmnd *scmd)
return scsi_send_eh_cmnd(scmd, NULL, 0, scmd->device->eh_timeout, ~0);
 }
 
+static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
+{
+   if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) {
+   struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
+   if (sdrv->eh_action)
+   rtn = sdrv->eh_action(scmd, rtn);
+   }
+   return rtn;
+}
+
 /**
  * scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
  * @scmd:  Original SCSI cmd that eh has finished.
@@ -1142,7 +1146,9 @@ static int scsi_eh_test_devices(struct list_head 
*cmd_list,
 
list_for_each_entry_safe(scmd, next, cmd_list, eh_entry)
if (scmd->device == sdev) {
-   if (finish_cmds)
+   if (finish_cmds &&
+   (try_stu ||
+scsi_eh_action(scmd, SUCCESS) == SUCCESS))
scsi_eh_finish_cmd(scmd, done_q);
else
list_move_tail(&scmd->eh_entry, work_q);
@@ -1283,7 +1289,8 @@ static int scsi_eh_stu(struct Scsi_Host *shost,
!scsi_eh_tur(stu_scmd)) {
list_for_each_entry_safe(scmd, next,
  work_q, eh_entry) {
-   if (scmd->device == sdev)
+   if (scmd->device == sdev &&
+   scsi_eh_action(scmd, SUCCESS) == 
SUCCESS)
scsi_eh_finish_cmd(scmd, 
done_q);
}
}
@@ -1350,7 +1357,8 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host 
*shost,
!scsi_eh_tur(bdr_scmd)) {
list_for_each_entry_safe(scmd, next,
 work_q, eh_entry) {
-   if (scmd->device == sdev)
+   if (scmd->device == sdev &&
+   scsi_eh_action(scmd, rtn) != FAILED)
scsi_eh_finish_cmd(scmd,
   done_q);
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fd874b8..1929838 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -110,7 +110,7 @@ static int sd_suspend_runtime(struct device *);
 static int sd_resume(struct device *);
 static void sd_rescan(struct device *);
 static int sd_done(struct scsi_cmnd *);
-static int sd_eh_action(struct scsi_cmnd *, unsigned char *, int, int);
+static int sd_eh_action(struct scsi_cmnd *, int);
 static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
 static void scsi_disk_release(struct device *cdev);
 static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *);
@@ -1551,23 +1551,23 @@ static const struct block_device_operations sd_fops = {
 /**
  * sd_eh_action - error handling callback
  * @scmd:  sd-issued command that has failed
- * @eh_cmnd:   The command that was sent during

[PATCHv9 0/4] New EH command timeout handler

2013-11-04 Thread Hannes Reinecke
Hi all,

this patchset implements a new SCSI EH command timeout handler
which will be sending command aborts inline without actually
engaging SCSI EH.
SCSI EH will only be invoked if command abort fails.

In addition the commands will be returned directly
if the command abort succeeded, cutting down recovery
times dramatically.

With the original SCSI EH I got:
# time dd if=/dev/zero of=/dev/dm-2 bs=4k count=4k oflag=direct
4096+0 records in
4096+0 records out
16777216 bytes (17 MB) copied, 142.652 s, 118 kB/s

real2m22.657s
user0m0.013s
sys 0m0.145s

With this patchset I got:
# time dd if=/dev/zero of=/dev/dm-2 bs=4k count=4k oflag=direct
4096+0 records in
4096+0 records out
16777216 bytes (17 MB) copied, 52.1579 s, 322 kB/s

real0m52.163s
user0m0.012s
sys 0m0.145s

Test was to disable RSCN on the target port, disable the
target port, and then start the 'dd' command as indicated.

Changes to the original version:
- Use a private list in scsi_eh_abort_handler to avoid
  list starvation (pointed out by Joern Engel)
- Terminate command aborts when the first abort fails
- Do not attempt command aborts if the host is already in recovery
  or if the device is removed.
- Flush abort workqueue if the device is removed.

Changes to v2:
- Removed eh_entry initialisation
- Convert to per-command workqueue

Changes to v3:
- Use delayed_work
- Enable new eh timeout handler for virtio, SAS, and FC
- Modify logging messages to include scmd pointer

Changes to v4:
- Remove stubs when enabling new eh timeout handler
  for other drivers

Changes to v5:
- Enable new eh timeout handler per default
- Update documentation

Changes to v6:
- Include changes from James Bottomley for erratic device
  offline patch
- Rearrange patches
- Update SCSI midlayer documentation

Changes to v7:
- Merge obsolete patch

Changes to v8:
- Include fixes from hch
- Include patch from Ren Mingxin to set eh_deadline to '0'

Hannes Reinecke (2):
  scsi: improved eh timeout handler
  scsi: Update documentation

James Bottomley (1):
  scsi: Fix erratic device offline during EH

Ren Mingxin (1):
  scsi: Set the minimum valid value of 'eh_deadline' as 0

 Documentation/scsi/scsi_eh.txt  |  69 ++-
 Documentation/scsi/scsi_mid_low_api.txt |   9 +-
 drivers/scsi/hosts.c|  16 ++-
 drivers/scsi/scsi.c |   9 +-
 drivers/scsi/scsi_error.c   | 199 +++-
 drivers/scsi/scsi_priv.h|   2 +
 drivers/scsi/scsi_sysfs.c   |  36 --
 drivers/scsi/sd.c   |  26 ++---
 include/scsi/scsi_cmnd.h|   1 +
 include/scsi/scsi_driver.h  |   2 +-
 include/scsi/scsi_host.h|   5 +
 11 files changed, 282 insertions(+), 92 deletions(-)

-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] scsi: improved eh timeout handler

2013-11-04 Thread Hannes Reinecke
When a command runs into a timeout we need to send an 'ABORT TASK'
TMF. This is typically done by the 'eh_abort_handler' LLDD callback.

Conceptually, however, this function is a normal SCSI command, so
there is no need to enter the error handler.

This patch implements a new scsi_abort_command() function which
invokes an asynchronous function scsi_eh_abort_handler() to
abort the commands via the usual 'eh_abort_handler'.

If abort succeeds the command is either retried or terminated,
depending on the number of allowed retries. However, 'eh_eflags'
records the abort, so if the retry would fail again the
command is pushed onto the error handler without trying to
abort it (again); it'll be cleared up from SCSI EH.

Signed-off-by: Hannes Reinecke 
Cc: Ren Mingxin 
Cc: Christoph Hellwig 
---
 drivers/scsi/scsi.c   |   3 +
 drivers/scsi/scsi_error.c | 151 ++
 drivers/scsi/scsi_priv.h  |   2 +
 include/scsi/scsi_cmnd.h  |   1 +
 include/scsi/scsi_host.h  |   5 ++
 5 files changed, 149 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index fe0bcb1..2b04a57 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -297,6 +297,7 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, 
gfp_t gfp_mask)
 
cmd->device = dev;
INIT_LIST_HEAD(&cmd->list);
+   INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
spin_lock_irqsave(&dev->list_lock, flags);
list_add_tail(&cmd->list, &dev->cmd_list);
spin_unlock_irqrestore(&dev->list_lock, flags);
@@ -353,6 +354,8 @@ void scsi_put_command(struct scsi_cmnd *cmd)
list_del_init(&cmd->list);
spin_unlock_irqrestore(&cmd->device->list_lock, flags);
 
+   cancel_delayed_work(&cmd->abort_work);
+
__scsi_put_command(cmd->device->host, cmd, &sdev->sdev_gendev);
 }
 EXPORT_SYMBOL(scsi_put_command);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 67c0014..6a137fa 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -53,6 +53,8 @@ static void scsi_eh_done(struct scsi_cmnd *scmd);
 #define HOST_RESET_SETTLE_TIME  (10)
 
 static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
+static int scsi_try_to_abort_cmd(struct scsi_host_template *,
+struct scsi_cmnd *);
 
 /* called with shost->host_lock held */
 void scsi_eh_wakeup(struct Scsi_Host *shost)
@@ -100,6 +102,116 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host 
*shost)
 }
 
 /**
+ * scmd_eh_abort_handler - Handle command aborts
+ * @work:  command to be aborted.
+ */
+void
+scmd_eh_abort_handler(struct work_struct *work)
+{
+   struct scsi_cmnd *scmd =
+   container_of(work, struct scsi_cmnd, abort_work.work);
+   struct scsi_device *sdev = scmd->device;
+   unsigned long flags;
+   int rtn;
+
+   spin_lock_irqsave(sdev->host->host_lock, flags);
+   if (scsi_host_eh_past_deadline(sdev->host)) {
+   spin_unlock_irqrestore(sdev->host->host_lock, flags);
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_INFO, scmd,
+   "scmd %p eh timeout, not aborting\n",
+   scmd));
+   } else {
+   spin_unlock_irqrestore(sdev->host->host_lock, flags);
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_INFO, scmd,
+   "aborting command %p\n", scmd));
+   rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd);
+   if (rtn == SUCCESS) {
+   scmd->result |= DID_TIME_OUT << 16;
+   if (!scsi_noretry_cmd(scmd) &&
+   (++scmd->retries <= scmd->allowed)) {
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_WARNING, scmd,
+   "scmd %p retry "
+   "aborted command\n", scmd));
+   scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
+   } else {
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_WARNING, scmd,
+   "scmd %p finish "
+   "aborted command\n", scmd));
+   scsi_finish_command(scmd);
+   }
+   return;
+   }
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_INFO, scmd,
+   "scmd %p abort failed, rtn %d\n",
+   scmd, rtn));
+   }
+
+   if (scsi_eh_scmd_add(scmd, 0)) {
+   SCSI_LOG_ERROR_RECOV

Re: [PATCH 2/3] scsi: improved eh timeout handler

2013-11-04 Thread James Bottomley
On Mon, 2013-11-04 at 14:36 +0100, Hannes Reinecke wrote:
> On 10/31/2013 04:49 PM, Christoph Hellwig wrote:
> > Looks reasonable to me, but a few minor nitpicks:
> > 
> >> +  spin_lock_irqsave(sdev->host->host_lock, flags);
> >> +  if (scsi_host_eh_past_deadline(sdev->host)) {
> > 
> > I don't have the implementation of scsi_host_eh_past_deadline in my
> > local tree, but do we really need the host lock for it?
> > 
> Yes. The eh_deadline variable might be set from an interrupt context
> or from userland, so we need to protect access to it.

That's not really true.  on all our supported architectures 32 bit
reads/writes are atomic, which means that if one CPU writes a word at
the same time another reads one, the reader is guaranteed to see either
the old or the new data.  Given the expense of lock cache line bouncing
on the newer architectures, we really want to avoid a spinlock where
possible.

In this case, the problem with the implementation is that the writer
might set eh_deadline to zero, but this is fixable in
scsi_host_eh_past_deadline() by checking for zero before and after the
time_before (for the zero to non-zero and non-zero to zero cases).

James

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] scsi: improved eh timeout handler

2013-11-04 Thread Hannes Reinecke
On 11/04/2013 03:25 PM, James Bottomley wrote:
> On Mon, 2013-11-04 at 14:36 +0100, Hannes Reinecke wrote:
>> On 10/31/2013 04:49 PM, Christoph Hellwig wrote:
>>> Looks reasonable to me, but a few minor nitpicks:
>>>
 +  spin_lock_irqsave(sdev->host->host_lock, flags);
 +  if (scsi_host_eh_past_deadline(sdev->host)) {
>>>
>>> I don't have the implementation of scsi_host_eh_past_deadline in my
>>> local tree, but do we really need the host lock for it?
>>>
>> Yes. The eh_deadline variable might be set from an interrupt context
>> or from userland, so we need to protect access to it.
> 
> That's not really true.  on all our supported architectures 32 bit
> reads/writes are atomic, which means that if one CPU writes a word at
> the same time another reads one, the reader is guaranteed to see either
> the old or the new data.  Given the expense of lock cache line bouncing
> on the newer architectures, we really want to avoid a spinlock where
> possible.
> 
> In this case, the problem with the implementation is that the writer
> might set eh_deadline to zero, but this is fixable in
> scsi_host_eh_past_deadline() by checking for zero before and after the
> time_before (for the zero to non-zero and non-zero to zero cases).
> 
IE you mean something like that attached patch?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries & Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 6a137fa..8abf7ba 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -94,8 +94,10 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
 	if (!shost->last_reset || !shost->eh_deadline)
 		return 0;
 
+	/* Double check eh_deadline to catch atomic updates */
 	if (time_before(jiffies,
-			shost->last_reset + shost->eh_deadline))
+			shost->last_reset + shost->eh_deadline) &&
+	shost->eh_deadline)
 		return 0;
 
 	return 1;
@@ -114,15 +116,12 @@ scmd_eh_abort_handler(struct work_struct *work)
 	unsigned long flags;
 	int rtn;
 
-	spin_lock_irqsave(sdev->host->host_lock, flags);
 	if (scsi_host_eh_past_deadline(sdev->host)) {
-		spin_unlock_irqrestore(sdev->host->host_lock, flags);
 		SCSI_LOG_ERROR_RECOVERY(3,
 			scmd_printk(KERN_INFO, scmd,
 "scmd %p eh timeout, not aborting\n",
 scmd));
 	} else {
-		spin_unlock_irqrestore(sdev->host->host_lock, flags);
 		SCSI_LOG_ERROR_RECOVERY(3,
 			scmd_printk(KERN_INFO, scmd,
 "aborting command %p\n", scmd));
@@ -1140,16 +1139,13 @@ int scsi_eh_get_sense(struct list_head *work_q,
 			continue;
 
 		shost = scmd->device->host;
-		spin_lock_irqsave(shost->host_lock, flags);
 		if (scsi_host_eh_past_deadline(shost)) {
-			spin_unlock_irqrestore(shost->host_lock, flags);
 			SCSI_LOG_ERROR_RECOVERY(3,
 shost_printk(KERN_INFO, shost,
 	"skip %s, past eh deadline\n",
 	 __func__));
 			break;
 		}
-		spin_unlock_irqrestore(shost->host_lock, flags);
 		SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
 		  "%s: requesting sense\n",
 		  current->comm));
@@ -1242,19 +1238,15 @@ static int scsi_eh_test_devices(struct list_head *cmd_list,
 		sdev = scmd->device;
 
 		if (!try_stu) {
-			spin_lock_irqsave(sdev->host->host_lock, flags);
 			if (scsi_host_eh_past_deadline(sdev->host)) {
 /* Push items back onto work_q */
 list_splice_init(cmd_list, work_q);
-spin_unlock_irqrestore(sdev->host->host_lock,
-		   flags);
 SCSI_LOG_ERROR_RECOVERY(3,
 	shost_printk(KERN_INFO, sdev->host,
 		 "skip %s, past eh deadline",
 		 __func__));
 break;
 			}
-			spin_unlock_irqrestore(sdev->host->host_lock, flags);
 		}
 
 		finish_cmds = !scsi_device_online(scmd->device) ||
@@ -1301,9 +1293,7 @@ static int scsi_eh_abort_cmds(struct list_head *work_q,
 		if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))
 			continue;
 		shost = scmd->device->host;
-		spin_lock_irqsave(shost->host_lock, flags);
 		if (scsi_host_eh_past_deadline(shost)) {
-			spin_unlock_irqrestore(shost->host_lock, flags);
 			list_splice_init(&check_list, work_q);
 			SCSI_LOG_ERROR_RECOVERY(3,
 shost_printk(KERN_INFO, shost,
@@ -1311,7 +1301,6 @@ static int scsi_eh_abort_cmds(struct list_head *work_q,
 	 __func__));
 			return list_empty(work_q);
 		}
-		spin_unlock_irqrestore(shost->host_lock, flags);
 		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
 		  "0x%p\n", current->comm,
 		  scmd));
@@ -1378,16 +1367,13 @@ static int scsi_eh_stu(struct Scsi_Host *shost,
 	unsigned long flags;
 
 	shost_for_each_device(sdev, shost) {
-		spin_lock_irqsave(shost->host_lock, flags);
 		if (scsi_host_eh_past_deadline(shost)) {
-			spin_unlock_irqrestore(shost->host_lock, flags);
 			SCSI_LOG_ERROR_RECOVERY(3,
 shost_printk(KERN_INFO, shost,
 	"skip %s, past eh deadline\n",
 

Re: [PATCH 2/3] scsi: improved eh timeout handler

2013-11-04 Thread James Bottomley
On Mon, 2013-11-04 at 15:46 +0100, Hannes Reinecke wrote:
> On 11/04/2013 03:25 PM, James Bottomley wrote:
> > On Mon, 2013-11-04 at 14:36 +0100, Hannes Reinecke wrote:
> >> On 10/31/2013 04:49 PM, Christoph Hellwig wrote:
> >>> Looks reasonable to me, but a few minor nitpicks:
> >>>
>  +spin_lock_irqsave(sdev->host->host_lock, flags);
>  +if (scsi_host_eh_past_deadline(sdev->host)) {
> >>>
> >>> I don't have the implementation of scsi_host_eh_past_deadline in my
> >>> local tree, but do we really need the host lock for it?
> >>>
> >> Yes. The eh_deadline variable might be set from an interrupt context
> >> or from userland, so we need to protect access to it.
> > 
> > That's not really true.  on all our supported architectures 32 bit
> > reads/writes are atomic, which means that if one CPU writes a word at
> > the same time another reads one, the reader is guaranteed to see either
> > the old or the new data.  Given the expense of lock cache line bouncing
> > on the newer architectures, we really want to avoid a spinlock where
> > possible.
> > 
> > In this case, the problem with the implementation is that the writer
> > might set eh_deadline to zero, but this is fixable in
> > scsi_host_eh_past_deadline() by checking for zero before and after the
> > time_before (for the zero to non-zero and non-zero to zero cases).
> > 
> IE you mean something like that attached patch?

Yes (except that there should be a comment explaining why we do the read
twice), I think the cost of the extra read check is much less than the
spinlock on all of our platforms.

James

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v6 2/2] block,scsi: convert and handle ERR_PTR from blk_get_request

2013-11-04 Thread Boaz Harrosh
On 10/31/2013 03:50 PM, Joe Lawrence wrote:
> The blk_get_request function may fail in low-memory conditions or during
> device removal (even if __GFP_WAIT is set). To distinguish between these
> errors, modify the blk_get_request call stack to return the appropriate
> ERR_PTR. Verify that all callers check the return status and consider
> IS_ERR instead of a simple NULL pointer check.
> 
> Signed-off-by: Joe Lawrence 
> ---
<>
> diff --git a/drivers/scsi/osd/osd_initiator.c 
> b/drivers/scsi/osd/osd_initiator.c
> index aa66361ed44b..0250efda647b 100644
> --- a/drivers/scsi/osd/osd_initiator.c
> +++ b/drivers/scsi/osd/osd_initiator.c
> @@ -1567,8 +1567,8 @@ static struct request *_make_request(struct 
> request_queue *q, bool has_write,
>   struct request *req;
>  
>   req = blk_get_request(q, has_write ? WRITE : READ, flags);
> - if (unlikely(!req))
> - return ERR_PTR(-ENOMEM);
> + if (unlikely(IS_ERR(req)))

Just a nit IS_ERR already has an unlikely so it can be dropped here. (No bigy)

ACK-by: Boaz Harrosh 

> + return req;
>  
>   return req;
>   }
<>

Thanks
Boaz

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] scsi: improved eh timeout handler

2013-11-04 Thread Hannes Reinecke
On 11/04/2013 03:50 PM, James Bottomley wrote:
> On Mon, 2013-11-04 at 15:46 +0100, Hannes Reinecke wrote:
>> On 11/04/2013 03:25 PM, James Bottomley wrote:
>>> On Mon, 2013-11-04 at 14:36 +0100, Hannes Reinecke wrote:
 On 10/31/2013 04:49 PM, Christoph Hellwig wrote:
> Looks reasonable to me, but a few minor nitpicks:
>
>> +spin_lock_irqsave(sdev->host->host_lock, flags);
>> +if (scsi_host_eh_past_deadline(sdev->host)) {
>
> I don't have the implementation of scsi_host_eh_past_deadline in my
> local tree, but do we really need the host lock for it?
>
 Yes. The eh_deadline variable might be set from an interrupt context
 or from userland, so we need to protect access to it.
>>>
>>> That's not really true.  on all our supported architectures 32 bit
>>> reads/writes are atomic, which means that if one CPU writes a word at
>>> the same time another reads one, the reader is guaranteed to see either
>>> the old or the new data.  Given the expense of lock cache line bouncing
>>> on the newer architectures, we really want to avoid a spinlock where
>>> possible.
>>>
>>> In this case, the problem with the implementation is that the writer
>>> might set eh_deadline to zero, but this is fixable in
>>> scsi_host_eh_past_deadline() by checking for zero before and after the
>>> time_before (for the zero to non-zero and non-zero to zero cases).
>>>
>> IE you mean something like that attached patch?
> 
> Yes (except that there should be a comment explaining why we do the read
> twice), I think the cost of the extra read check is much less than the
> spinlock on all of our platforms.
> 
So, this is what I've ended up with; sadly I had to use 'volatile'
here which checkpatch doesn't like.
I _could_ move eh_deadline to be atomic, that would avoid the
'volatile' setting. Feels like an overkill, though.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries & Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
>From 283f1b50e833fad969323531ccd0ce889a5e4044 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke 
Date: Mon, 4 Nov 2013 16:23:36 +0100
Subject: [PATCH 3/5] scsi: Unlock accesses to eh_deadline

32bit accesses are guaranteed to be atomic, so we can remove
the spinlock when checking for eh_deadline. We only need to
make sure to catch any updates which might happened during
the call to time_before(); if so we just recheck with the
correct value.

Signed-off-by: Hannes Reinecke 
---
 drivers/scsi/scsi_error.c | 54 +++
 1 file changed, 17 insertions(+), 37 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 7eecbb5..d122e89 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -91,13 +91,26 @@ EXPORT_SYMBOL_GPL(scsi_schedule_eh);
 
 static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
 {
-	if (!shost->last_reset || !shost->eh_deadline)
-		return 0;
+	volatile int eh_deadline;
 
-	if (time_before(jiffies,
-			shost->last_reset + shost->eh_deadline))
+recheck:
+	eh_deadline = shost->eh_deadline;
+	if (!shost->last_reset || !eh_deadline)
 		return 0;
 
+	/*
+	 * 32bit accesses are guaranteed to be atomic
+	 * (on all supported architectures), so instead
+	 * of using a spinlock we can as well double check
+	 * if eh_deadline has been modified after the
+	 * time_before call; if so we need to recheck
+	 * with the correct values.
+	 */
+	if (time_before(jiffies, shost->last_reset + eh_deadline)) {
+		if (eh_deadline != shost->eh_deadline)
+			goto recheck;
+		return 0;
+	}
 	return 1;
 }
 
@@ -111,18 +124,14 @@ scmd_eh_abort_handler(struct work_struct *work)
 	struct scsi_cmnd *scmd =
 		container_of(work, struct scsi_cmnd, abort_work.work);
 	struct scsi_device *sdev = scmd->device;
-	unsigned long flags;
 	int rtn;
 
-	spin_lock_irqsave(sdev->host->host_lock, flags);
 	if (scsi_host_eh_past_deadline(sdev->host)) {
-		spin_unlock_irqrestore(sdev->host->host_lock, flags);
 		SCSI_LOG_ERROR_RECOVERY(3,
 			scmd_printk(KERN_INFO, scmd,
 "scmd %p eh timeout, not aborting\n",
 scmd));
 	} else {
-		spin_unlock_irqrestore(sdev->host->host_lock, flags);
 		SCSI_LOG_ERROR_RECOVERY(3,
 			scmd_printk(KERN_INFO, scmd,
 "aborting command %p\n", scmd));
@@ -1132,7 +1141,6 @@ int scsi_eh_get_sense(struct list_head *work_q,
 	struct scsi_cmnd *scmd, *next;
 	struct Scsi_Host *shost;
 	int rtn;
-	unsigned long flags;
 
 	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
 		if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) ||
@@ -1140,16 +1148,13 @@ int scsi_eh_get_sense(struct list_head *work_q,
 			continue;
 
 		shost = scmd->device->host;
-		spin_lock_irqsave(shost->host_lock, flags);
 		if (scsi_host_eh_past_deadline(shost)) {
-			spin_unlock_irqrestore(shost->host_lock, flags);
 			SCSI_LOG_ERROR_RECOVERY(3,
 		

Re: scsi-mq + open-iscsi support patches..?

2013-11-04 Thread Nicholas A. Bellinger
On Sat, 2013-11-02 at 16:10 +, Jayamohan Kallickal wrote:



> >> On a related note, some iscsi vendor has been hitting a crash with 
> >> your tree.
> 
> >I got an email from Jayamohan recently, but the OOPs did not appear to be 
> >scsi-mq related..
> 
> I do see the crash on my Ubuntu VM running on VirtualBox  but don't see the 
> crash on a Standalone system. 
> Attaching the screenshot
> 

Hi Jayamohan,

Care to enable the virtual serial port for this VM in order to grab the
full dmesg output..?

Also, an idea of what SATA emulation is being used here would be helpful
as well.  (eg: lspci -vvv for the ATA device on the running system)

--nab

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] libata: use sleep instead of standby command

2013-11-04 Thread Phillip Susi
The ATA SLEEP mode saves some more power than SUSPEND, and
has basically the same recovery time, so use it instead.

Signed-off-by: Phillip Susi 
---
 drivers/ata/libata-scsi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 97a0cef..79b75fd 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1362,8 +1362,8 @@ static unsigned int ata_scsi_start_stop_xlat(struct 
ata_queued_cmd *qc)
 system_entering_hibernation())
goto skip;
 
-   /* Issue ATA STANDBY IMMEDIATE command */
-   tf->command = ATA_CMD_STANDBYNOW1;
+   /* Issue ATA SLEEP command */
+   tf->command = ATA_CMD_SLEEP;
}
 
/*
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] libata: avoid waking disk to check power

2013-11-04 Thread Phillip Susi
When a disk is in SLEEP mode it can not respond to commands,
including the CHECK POWER command.  Instead of waking up the
sleeping disk, fake the reply to the CHECK POWER command to
indicate the disk is in standby mode.  This prevents udisks
from waking up sleeping disks when it polls to see if they
are awake or not before trying to read their smart status.

Signed-off-by: Phillip Susi 
---
 drivers/ata/libata-core.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 83b1a9f..573d151 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5084,6 +5084,13 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
 
/* if device is sleeping, schedule reset and abort the link */
if (unlikely(qc->dev->flags & ATA_DFLAG_SLEEPING)) {
+   if (unlikely(qc->tf.command == ATA_CMD_CHK_POWER))
+   {
+   /* fake reply to avoid waking drive */
+   qc->result_tf.nsect = 0;
+   ata_qc_complete(qc);
+   return;
+   }
link->eh_info.action |= ATA_EH_RESET;
ata_ehi_push_desc(&link->eh_info, "waking up from sleep");
ata_link_abort(link);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] scsi: improved eh timeout handler

2013-11-04 Thread James Bottomley
On Mon, 2013-11-04 at 16:43 +0100, Hannes Reinecke wrote:
> On 11/04/2013 03:50 PM, James Bottomley wrote:
> > On Mon, 2013-11-04 at 15:46 +0100, Hannes Reinecke wrote:
> >> On 11/04/2013 03:25 PM, James Bottomley wrote:
> >>> On Mon, 2013-11-04 at 14:36 +0100, Hannes Reinecke wrote:
>  On 10/31/2013 04:49 PM, Christoph Hellwig wrote:
> > Looks reasonable to me, but a few minor nitpicks:
> >
> >> +  spin_lock_irqsave(sdev->host->host_lock, flags);
> >> +  if (scsi_host_eh_past_deadline(sdev->host)) {
> >
> > I don't have the implementation of scsi_host_eh_past_deadline in my
> > local tree, but do we really need the host lock for it?
> >
>  Yes. The eh_deadline variable might be set from an interrupt context
>  or from userland, so we need to protect access to it.
> >>>
> >>> That's not really true.  on all our supported architectures 32 bit
> >>> reads/writes are atomic, which means that if one CPU writes a word at
> >>> the same time another reads one, the reader is guaranteed to see either
> >>> the old or the new data.  Given the expense of lock cache line bouncing
> >>> on the newer architectures, we really want to avoid a spinlock where
> >>> possible.
> >>>
> >>> In this case, the problem with the implementation is that the writer
> >>> might set eh_deadline to zero, but this is fixable in
> >>> scsi_host_eh_past_deadline() by checking for zero before and after the
> >>> time_before (for the zero to non-zero and non-zero to zero cases).
> >>>
> >> IE you mean something like that attached patch?
> > 
> > Yes (except that there should be a comment explaining why we do the read
> > twice), I think the cost of the extra read check is much less than the
> > spinlock on all of our platforms.
> > 
> So, this is what I've ended up with; sadly I had to use 'volatile'
> here which checkpatch doesn't like.

Why?  Volatile has no real meaning on a local variable.  You can just do
an ordinary eh_deadline = shost->eh_deadline; and it will see either the
before or after value.

> I _could_ move eh_deadline to be atomic, that would avoid the
> 'volatile' setting. Feels like an overkill, though.

Please dump the recheck loop and just check for zero again.  The race is
acceptable because we're not trying to mediate it in a meaningful way.
As long as the result is consistent with either the before or after
value, that's fine.

James

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] libata: use sleep instead of standby command

2013-11-04 Thread Aaron Lu
+linux-ide

On 11/05/2013 08:52 AM, Phillip Susi wrote:
> The ATA SLEEP mode saves some more power than SUSPEND, and
> has basically the same recovery time, so use it instead.

I suppose this is mainly for runtime PM? Since for system
suspend/hibernation, the disk and its controller will be powered off
anyway.

Best regards,
Aaron

> 
> Signed-off-by: Phillip Susi 
> ---
>  drivers/ata/libata-scsi.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index 97a0cef..79b75fd 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -1362,8 +1362,8 @@ static unsigned int ata_scsi_start_stop_xlat(struct 
> ata_queued_cmd *qc)
>system_entering_hibernation())
>   goto skip;
>  
> - /* Issue ATA STANDBY IMMEDIATE command */
> - tf->command = ATA_CMD_STANDBYNOW1;
> + /* Issue ATA SLEEP command */
> + tf->command = ATA_CMD_SLEEP;
>   }
>  
>   /*
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Disk wakeup on resume

2013-11-04 Thread Phillip Susi
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA512

I can not figure out what is waking up disks on resume from suspend.
I thought it was sd.c, and setting manage_start_stop = 0 should stop
that.  It does stop the message printed saying it is being started,
yet the disk is still started, and this makes the resume take nearly
10 seconds.  So it seems sd_resume()'s attempt to start the disk is
pointless and redundant, and something else is starting up the disk.
Oddly, if I unbind the sd driver ( echo 1 >
/sys/block/sdx/device/delete ), then after a resume the disk remains
off ( and spins up again on rescan ).

I don't see why the kernel needs to delay completing the resume until
after the disks have spun up when they will automatically spin up when
accessed, which may be never ( thus they should be left spun down ),
but I can not find the culprit causing this.

-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.14 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQEcBAEBCgAGBQJSeFnWAAoJEJrBOlT6nu758QwIAJAhlQ5o6qDG9fm98w0j7MY9
OYp/NTd/nO4LGPGaTob/JCyuMDPMtXCOdueLxStlqnBLnPgfANZWD+VSSbsm/uae
pzvjihe6uU+mTCbPW0MNPEiFpH2mfVY8NMhrAEI/n0j9fjgd7E33NI2fedymEDPk
LcgdOtdQP3VSMMs6pjPRpkpVBJG1g3Y6XPLV8IFj8RPz6lUj3k5qj45lsgf//Am8
5vD8SRoZZd7bZghqOCeUU2I/1kntKI5qz4cDUQq6wTVwqAuhLbpjFMOk5lX3gg/h
Q25FGU6Rc8nMyLw4eLbRA8Tvh2aLX5anPkdJqkb132ulbmqP7b8hjUZ4qtNQ/wA=
=JmIT
-END PGP SIGNATURE-
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] libata: avoid waking disk to check power

2013-11-04 Thread Aaron Lu
+linux-ide

On 11/05/2013 08:53 AM, Phillip Susi wrote:
> When a disk is in SLEEP mode it can not respond to commands,
> including the CHECK POWER command.  Instead of waking up the
> sleeping disk, fake the reply to the CHECK POWER command to
> indicate the disk is in standby mode.  This prevents udisks
> from waking up sleeping disks when it polls to see if they
> are awake or not before trying to read their smart status.

If the disk entered sleep mode due to runtime PM, then udisks can easily
tell by checking the device's runtime status and not send out the query.

But if the disk entered sleep mode due to other reason, the patch may be
necessary. So what's your scenario?

Best regards,
Aaron

> 
> Signed-off-by: Phillip Susi 
> ---
>  drivers/ata/libata-core.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index 83b1a9f..573d151 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -5084,6 +5084,13 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
>  
>   /* if device is sleeping, schedule reset and abort the link */
>   if (unlikely(qc->dev->flags & ATA_DFLAG_SLEEPING)) {
> + if (unlikely(qc->tf.command == ATA_CMD_CHK_POWER))
> + {
> + /* fake reply to avoid waking drive */
> + qc->result_tf.nsect = 0;
> + ata_qc_complete(qc);
> + return;
> + }
>   link->eh_info.action |= ATA_EH_RESET;
>   ata_ehi_push_desc(&link->eh_info, "waking up from sleep");
>   ata_link_abort(link);
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] libata: use sleep instead of standby command

2013-11-04 Thread Phillip Susi
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA512

On 11/04/2013 09:23 PM, Aaron Lu wrote:
> I suppose this is mainly for runtime PM? Since for system 
> suspend/hibernation, the disk and its controller will be powered
> off anyway.

Yes, or the second patch also helps when one manually issues hdparm
- -Y, which otherwise will cause the drive to be woken up to answer the
CHECK POWER command, which udisks issues to decide if it should skip
polling the SMART status of the drive.


-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.14 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQEcBAEBCgAGBQJSeFpOAAoJEJrBOlT6nu75JdwIALFvwncNyf1ENxnIho2Y3CXC
N6BpHQ7CzD1tap/5ybSFPBvUvJfyC/UPAY69bjldOtbwnArDIWRFqEWW7JWm6G0z
J1C5MMCIRHSuRo4RbN/rnsPQFsaFJy0Z5kJ0uXX+aOrZwEov5vo2MEuj/DEdCBDC
SKpYkogDlSHTNhFsaEF5iJiJlLU9FF5pEM6S6P0/+Z+SrZ5PVCDOm9rK+oeqrn0U
TBX5tKPxWRSjJ/2F094DYia5l8ODNgzl1gPDXA50a+A4M0CZi3d5Liz5Ctfnv88A
ac5cG8Ae8MDss0kJmSMCuRwkKVIROM3qMxf1wDWjBwm3eCTAJV1gxRYSN0adOvc=
=Rfny
-END PGP SIGNATURE-
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] libata: use sleep instead of standby command

2013-11-04 Thread Aaron Lu
On 11/05/2013 10:39 AM, Phillip Susi wrote:
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA512
> 
> On 11/04/2013 09:23 PM, Aaron Lu wrote:
>> I suppose this is mainly for runtime PM? Since for system 
>> suspend/hibernation, the disk and its controller will be powered
>> off anyway.
> 
> Yes, or the second patch also helps when one manually issues hdparm
> - -Y, which otherwise will cause the drive to be woken up to answer the
> CHECK POWER command, which udisks issues to decide if it should skip
> polling the SMART status of the drive.

OK, I see.
I wish udidks can do both of these(polling smart status and put the disk
into sleep mode), so that it knows when the disk is in sleep mode and it
will not send out the needless poll...

-Aaron
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] libata: avoid waking disk to check power

2013-11-04 Thread Phillip Susi
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA512

On 11/04/2013 09:39 PM, Aaron Lu wrote:
> If the disk entered sleep mode due to runtime PM, then udisks can
> easily tell by checking the device's runtime status and not send
> out the query.
> 
> But if the disk entered sleep mode due to other reason, the patch
> may be necessary. So what's your scenario?

I am planning on patching udisks to do that as well, but if you have a
current udsisk or manually run hdparm -C, or some other program is
doing something similar, then this patch takes care of that.



-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.14 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQEcBAEBCgAGBQJSeFvFAAoJEJrBOlT6nu75hcYH/0YsYcs3u9Dmuqq9z5kFXL3k
C4xYQtvOxw2z+B/vEFzdym5J3ZfVyT4+ZdcHMvRgbbPH+R8/rzNC+F3OjdGtDIRA
5EUo0BUT2IfY0Yp2SzP44+8aRC4iF5jLLcH9XJKQx7WHedJQ7puCgJXXHAgUhpwW
52gJ0UEHnjlBgjwxY9RKqppnJs57x2VwgwR2BeFns6Hcry0S0HN0U4gwROmb1GZR
kjpOUUYHb3MH/gfzCaY97t0duAAp7jzZeOIuu+XHfkyV3APwsnbasLOOoRYtpbvQ
M++xmYQdNAh2DcZLEmAo6H6HLB/Z5d9K0+SKUFnsKHFaUCH0GPHm8SZ7E/NN7ig=
=7c7v
-END PGP SIGNATURE-
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/5] scsi: improved eh timeout handler

2013-11-04 Thread Hannes Reinecke
When a command runs into a timeout we need to send an 'ABORT TASK'
TMF. This is typically done by the 'eh_abort_handler' LLDD callback.

Conceptually, however, this function is a normal SCSI command, so
there is no need to enter the error handler.

This patch implements a new scsi_abort_command() function which
invokes an asynchronous function scsi_eh_abort_handler() to
abort the commands via the usual 'eh_abort_handler'.

If abort succeeds the command is either retried or terminated,
depending on the number of allowed retries. However, 'eh_eflags'
records the abort, so if the retry would fail again the
command is pushed onto the error handler without trying to
abort it (again); it'll be cleared up from SCSI EH.

Signed-off-by: Hannes Reinecke 
Cc: Ren Mingxin 
Cc: Christoph Hellwig 
---
 drivers/scsi/scsi.c   |   3 +
 drivers/scsi/scsi_error.c | 151 ++
 drivers/scsi/scsi_priv.h  |   2 +
 include/scsi/scsi_cmnd.h  |   1 +
 include/scsi/scsi_host.h  |   5 ++
 5 files changed, 149 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index fe0bcb1..2b04a57 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -297,6 +297,7 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, 
gfp_t gfp_mask)
 
cmd->device = dev;
INIT_LIST_HEAD(&cmd->list);
+   INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
spin_lock_irqsave(&dev->list_lock, flags);
list_add_tail(&cmd->list, &dev->cmd_list);
spin_unlock_irqrestore(&dev->list_lock, flags);
@@ -353,6 +354,8 @@ void scsi_put_command(struct scsi_cmnd *cmd)
list_del_init(&cmd->list);
spin_unlock_irqrestore(&cmd->device->list_lock, flags);
 
+   cancel_delayed_work(&cmd->abort_work);
+
__scsi_put_command(cmd->device->host, cmd, &sdev->sdev_gendev);
 }
 EXPORT_SYMBOL(scsi_put_command);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 67c0014..7eecbb5 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -53,6 +53,8 @@ static void scsi_eh_done(struct scsi_cmnd *scmd);
 #define HOST_RESET_SETTLE_TIME  (10)
 
 static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
+static int scsi_try_to_abort_cmd(struct scsi_host_template *,
+struct scsi_cmnd *);
 
 /* called with shost->host_lock held */
 void scsi_eh_wakeup(struct Scsi_Host *shost)
@@ -100,6 +102,116 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host 
*shost)
 }
 
 /**
+ * scmd_eh_abort_handler - Handle command aborts
+ * @work:  command to be aborted.
+ */
+void
+scmd_eh_abort_handler(struct work_struct *work)
+{
+   struct scsi_cmnd *scmd =
+   container_of(work, struct scsi_cmnd, abort_work.work);
+   struct scsi_device *sdev = scmd->device;
+   unsigned long flags;
+   int rtn;
+
+   spin_lock_irqsave(sdev->host->host_lock, flags);
+   if (scsi_host_eh_past_deadline(sdev->host)) {
+   spin_unlock_irqrestore(sdev->host->host_lock, flags);
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_INFO, scmd,
+   "scmd %p eh timeout, not aborting\n",
+   scmd));
+   } else {
+   spin_unlock_irqrestore(sdev->host->host_lock, flags);
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_INFO, scmd,
+   "aborting command %p\n", scmd));
+   rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd);
+   if (rtn == SUCCESS) {
+   scmd->result |= DID_TIME_OUT << 16;
+   if (!scsi_noretry_cmd(scmd) &&
+   (++scmd->retries <= scmd->allowed)) {
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_WARNING, scmd,
+   "scmd %p retry "
+   "aborted command\n", scmd));
+   scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
+   } else {
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_WARNING, scmd,
+   "scmd %p finish "
+   "aborted command\n", scmd));
+   scsi_finish_command(scmd);
+   }
+   return;
+   }
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_INFO, scmd,
+   "scmd %p abort failed, rtn %d\n",
+   scmd, rtn));
+   }
+
+   if (!scsi_eh_scmd_add(scmd, 0)) {
+   SCSI_LOG_ERROR_RECO

[PATCH 5/5] scsi: Update documentation

2013-11-04 Thread Hannes Reinecke
The documentation has gone out-of-sync, so update it to
the current status.

Signed-off-by: Hannes Reinecke 
---
 Documentation/scsi/scsi_eh.txt  | 69 +++--
 Documentation/scsi/scsi_mid_low_api.txt |  9 -
 drivers/scsi/scsi.c |  6 +--
 3 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
index 6ff16b6..a0c8511 100644
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -42,20 +42,14 @@ discussion.
 
  Once LLDD gets hold of a scmd, either the LLDD will complete the
 command by calling scsi_done callback passed from midlayer when
-invoking hostt->queuecommand() or SCSI midlayer will time it out.
+invoking hostt->queuecommand() or the block layer will time it out.
 
 
 [1-2-1] Completing a scmd w/ scsi_done
 
  For all non-EH commands, scsi_done() is the completion callback.  It
-does the following.
-
- 1. Delete timeout timer.  If it fails, it means that timeout timer
-has expired and is going to finish the command.  Just return.
-
- 2. Link scmd to per-cpu scsi_done_q using scmd->en_entry
-
- 3. Raise SCSI_SOFTIRQ
+just calls blk_complete_request() to delete the block layer timer and
+raise SCSI_SOFTIRQ
 
  SCSI_SOFTIRQ handler scsi_softirq calls scsi_decide_disposition() to
 determine what to do with the command.  scsi_decide_disposition()
@@ -64,10 +58,12 @@ with the command.
 
  - SUCCESS
scsi_finish_command() is invoked for the command.  The
-   function does some maintenance choirs and notify completion by
-   calling scmd->done() callback, which, for fs requests, would
-   be HLD completion callback - sd:sd_rw_intr, sr:rw_intr,
-   st:st_intr.
+   function does some maintenance chores and then calls
+   scsi_io_completion() to finish the I/O.
+   scsi_io_completion() then notifies the block layer on
+   the completed request by calling blk_end_request and
+   friends or figures out what to do with the remainder
+   of the data in case of an error.
 
  - NEEDS_RETRY
  - ADD_TO_MLQUEUE
@@ -86,33 +82,45 @@ function
  1. invokes optional hostt->eh_timed_out() callback.  Return value can
 be one of
 
-- EH_HANDLED
-   This indicates that eh_timed_out() dealt with the timeout.  The
-   scmd is passed to __scsi_done() and thus linked into per-cpu
-   scsi_done_q.  Normal command completion described in [1-2-1]
-   follows.
+- BLK_EH_HANDLED
+   This indicates that eh_timed_out() dealt with the timeout.
+   The command is passed back to the block layer and completed
+   via __blk_complete_requests().
+
+   *NOTE* After returning BLK_EH_HANDLED the SCSI layer is
+   assumed to be finished with the command, and no other
+   functions from the SCSI layer will be called. So this
+   should typically only be returned if the eh_timed_out()
+   handler raced with normal completion.
 
-- EH_RESET_TIMER
+- BLK_EH_RESET_TIMER
This indicates that more time is required to finish the
command.  Timer is restarted.  This action is counted as a
retry and only allowed scmd->allowed + 1(!) times.  Once the
-   limit is reached, action for EH_NOT_HANDLED is taken instead.
+   limit is reached, action for BLK_EH_NOT_HANDLED is taken instead.
 
-   *NOTE* This action is racy as the LLDD could finish the scmd
-   after the timeout has expired but before it's added back.  In
-   such cases, scsi_done() would think that timeout has occurred
-   and return without doing anything.  We lose completion and the
-   command will time out again.
-
-- EH_NOT_HANDLED
-   This is the same as when eh_timed_out() callback doesn't exist.
+- BLK_EH_NOT_HANDLED
+eh_timed_out() callback did not handle the command.
Step #2 is taken.
 
+ 2. If the host supports asynchronous completion (as indicated by the
+no_async_abort setting in the host template) scsi_abort_command()
+is invoked to schedule an asynchrous abort. If that fails
+Step #3 is taken.
+
  2. scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD) is invoked for the
 command.  See [1-3] for more information.
 
+[1-3] Asynchronous command aborts
+
+ After a timeout occurs a command abort is scheduled from
+ scsi_abort_command(). If the abort is successful the command
+ will either be retried (if the number of retries is not exhausted)
+ or terminated with DID_TIME_OUT.
+ Otherwise scsi_eh_scmd_add() is invoked for the command.
+ See [1-4] for more information.
 
-[1-3] How EH takes over
+[1-4] How EH takes over
 
  scmds enter EH via scsi_eh_scmd_add(), which does the following.
 
@@ -320,7 +328,8 @@ scmd->allowed.
 
 <>
 
-   This action is taken for each timed out command.
+   This action is taken for each timed out command when
+   no_async_abort is enabled in the host template.
hostt->eh_abort_handler() i

[PATCHv10 0/5] New EH command timeout handler

2013-11-04 Thread Hannes Reinecke
Hi all,

this patchset implements a new SCSI EH command timeout handler
which will be sending command aborts inline without actually
engaging SCSI EH.
SCSI EH will only be invoked if command abort fails.

In addition the commands will be returned directly
if the command abort succeeded, cutting down recovery
times dramatically.

With the original SCSI EH I got:
# time dd if=/dev/zero of=/dev/dm-2 bs=4k count=4k oflag=direct
4096+0 records in
4096+0 records out
16777216 bytes (17 MB) copied, 142.652 s, 118 kB/s

real2m22.657s
user0m0.013s
sys 0m0.145s

With this patchset I got:
# time dd if=/dev/zero of=/dev/dm-2 bs=4k count=4k oflag=direct
4096+0 records in
4096+0 records out
16777216 bytes (17 MB) copied, 52.1579 s, 322 kB/s

real0m52.163s
user0m0.012s
sys 0m0.145s

Test was to disable RSCN on the target port, disable the
target port, and then start the 'dd' command as indicated.

Changes to the original version:
- Use a private list in scsi_eh_abort_handler to avoid
  list starvation (pointed out by Joern Engel)
- Terminate command aborts when the first abort fails
- Do not attempt command aborts if the host is already in recovery
  or if the device is removed.
- Flush abort workqueue if the device is removed.

Changes to v2:
- Removed eh_entry initialisation
- Convert to per-command workqueue

Changes to v3:
- Use delayed_work
- Enable new eh timeout handler for virtio, SAS, and FC
- Modify logging messages to include scmd pointer

Changes to v4:
- Remove stubs when enabling new eh timeout handler
  for other drivers

Changes to v5:
- Enable new eh timeout handler per default
- Update documentation

Changes to v6:
- Include changes from James Bottomley for erratic device
  offline patch
- Rearrange patches
- Update SCSI midlayer documentation

Changes to v7:
- Merge obsolete patch

Changes to v8:
- Include fixes from hch
- Include patch from Ren Mingxin to set eh_deadline to '0'

Changes to v9:
- Unlock accesses to eh_deadline

Hannes Reinecke (3):
  scsi: improved eh timeout handler
  scsi: Unlock accesses to eh_deadline
  scsi: Update documentation

James Bottomley (1):
  scsi: Fix erratic device offline during EH

Ren Mingxin (1):
  scsi: Set the minimum valid value of 'eh_deadline' as 0

 Documentation/scsi/scsi_eh.txt  |  69 +-
 Documentation/scsi/scsi_mid_low_api.txt |   9 +-
 drivers/scsi/hosts.c|  16 ++-
 drivers/scsi/scsi.c |   9 +-
 drivers/scsi/scsi_error.c   | 229 
 drivers/scsi/scsi_priv.h|   2 +
 drivers/scsi/scsi_sysfs.c   |  36 +++--
 drivers/scsi/sd.c   |  26 ++--
 include/scsi/scsi_cmnd.h|   1 +
 include/scsi/scsi_driver.h  |   2 +-
 include/scsi/scsi_host.h|   5 +
 11 files changed, 281 insertions(+), 123 deletions(-)

-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/5] scsi: Set the minimum valid value of 'eh_deadline' as 0

2013-11-04 Thread Hannes Reinecke
From: Ren Mingxin 

The former minimum valid value of 'eh_deadline' is 1s, which means
the earliest occasion to shorten EH is 1 second later since a
command is failed or timed out. But if we want to skip EH steps
ASAP, we have to wait until the first EH step is finished. If the
duration of the first EH step is long, this waiting time is
excruciating. So, it is necessary to accept 0 as the minimum valid
value for 'eh_deadline'.

According to my test, with Hannes' patchset 'New EH command timeout
handler' as well, the minimum IO time is improved from 73s
(eh_deadline = 1) to 43s(eh_deadline = 0) when commands are timed
out by disabling RSCN and target port.

Signed-off-by: Ren Mingxin 
Signed-off-by: Hannes Reinecke 
---
 drivers/scsi/hosts.c  | 16 
 drivers/scsi/scsi_error.c | 34 +-
 drivers/scsi/scsi_sysfs.c | 36 +---
 3 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index f334859..bc8c462 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -316,11 +316,11 @@ static void scsi_host_dev_release(struct device *dev)
kfree(shost);
 }
 
-static unsigned int shost_eh_deadline;
+static int shost_eh_deadline = -1;
 
-module_param_named(eh_deadline, shost_eh_deadline, uint, S_IRUGO|S_IWUSR);
+module_param_named(eh_deadline, shost_eh_deadline, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(eh_deadline,
-"SCSI EH timeout in seconds (should be between 1 and 2^32-1)");
+"SCSI EH timeout in seconds (should be between 0 and 2^31-1)");
 
 static struct device_type scsi_host_type = {
.name = "scsi_host",
@@ -394,7 +394,15 @@ struct Scsi_Host *scsi_host_alloc(struct 
scsi_host_template *sht, int privsize)
shost->unchecked_isa_dma = sht->unchecked_isa_dma;
shost->use_clustering = sht->use_clustering;
shost->ordered_tag = sht->ordered_tag;
-   shost->eh_deadline = shost_eh_deadline * HZ;
+   if (shost_eh_deadline == -1)
+   shost->eh_deadline = -1;
+   else if ((ulong) shost_eh_deadline * HZ > INT_MAX) {
+   shost_printk(KERN_WARNING, shost,
+"eh_deadline %u too large, setting to %u\n",
+shost_eh_deadline, INT_MAX / HZ);
+   shost->eh_deadline = INT_MAX;
+   } else
+   shost->eh_deadline = shost_eh_deadline * HZ;
 
if (sht->supported_mode == MODE_UNKNOWN)
/* means we didn't set it ... default to INITIATOR */
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index a3f8988..f2fe580 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -91,18 +91,18 @@ EXPORT_SYMBOL_GPL(scsi_schedule_eh);
 
 static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
 {
-   if (!shost->last_reset || !shost->eh_deadline)
+   if (!shost->last_reset || shost->eh_deadline == -1)
return 0;
 
/*
 * 32bit accesses are guaranteed to be atomic
 * (on all supported architectures), so instead
 * of using a spinlock we can as well double check
-* if eh_deadline has been unset during the
+* if eh_deadline has been set to 'off' during the
 * time_before call.
 */
if (time_before(jiffies, shost->last_reset + shost->eh_deadline) &&
-   shost->eh_deadline != 0)
+   shost->eh_deadline > -1)
return 0;
 
return 1;
@@ -132,26 +132,34 @@ scmd_eh_abort_handler(struct work_struct *work)
rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd);
if (rtn == SUCCESS) {
scmd->result |= DID_TIME_OUT << 16;
-   if (!scsi_noretry_cmd(scmd) &&
+   if (scsi_host_eh_past_deadline(sdev->host)) {
+   SCSI_LOG_ERROR_RECOVERY(3,
+   scmd_printk(KERN_INFO, scmd,
+   "scmd %p eh timeout, "
+   "not retrying aborted "
+   "command\n", scmd));
+   } else if (!scsi_noretry_cmd(scmd) &&
(++scmd->retries <= scmd->allowed)) {
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_WARNING, scmd,
"scmd %p retry "
"aborted command\n", scmd));
scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
+   return;
} else {
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_WARNING, scmd,

[PATCH 3/5] scsi: Unlock accesses to eh_deadline

2013-11-04 Thread Hannes Reinecke
32bit accesses are guaranteed to be atomic, so we can remove
the spinlock when checking for eh_deadline. We only need to
make sure to catch any updates which might happened during
the call to time_before(); if so we just recheck with the
correct value.

Signed-off-by: Hannes Reinecke 
---
 drivers/scsi/scsi_error.c | 44 +---
 1 file changed, 9 insertions(+), 35 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 7eecbb5..a3f8988 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -94,8 +94,15 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host 
*shost)
if (!shost->last_reset || !shost->eh_deadline)
return 0;
 
-   if (time_before(jiffies,
-   shost->last_reset + shost->eh_deadline))
+   /*
+* 32bit accesses are guaranteed to be atomic
+* (on all supported architectures), so instead
+* of using a spinlock we can as well double check
+* if eh_deadline has been unset during the
+* time_before call.
+*/
+   if (time_before(jiffies, shost->last_reset + shost->eh_deadline) &&
+   shost->eh_deadline != 0)
return 0;
 
return 1;
@@ -111,18 +118,14 @@ scmd_eh_abort_handler(struct work_struct *work)
struct scsi_cmnd *scmd =
container_of(work, struct scsi_cmnd, abort_work.work);
struct scsi_device *sdev = scmd->device;
-   unsigned long flags;
int rtn;
 
-   spin_lock_irqsave(sdev->host->host_lock, flags);
if (scsi_host_eh_past_deadline(sdev->host)) {
-   spin_unlock_irqrestore(sdev->host->host_lock, flags);
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_INFO, scmd,
"scmd %p eh timeout, not aborting\n",
scmd));
} else {
-   spin_unlock_irqrestore(sdev->host->host_lock, flags);
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_INFO, scmd,
"aborting command %p\n", scmd));
@@ -1132,7 +1135,6 @@ int scsi_eh_get_sense(struct list_head *work_q,
struct scsi_cmnd *scmd, *next;
struct Scsi_Host *shost;
int rtn;
-   unsigned long flags;
 
list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) ||
@@ -1140,16 +1142,13 @@ int scsi_eh_get_sense(struct list_head *work_q,
continue;
 
shost = scmd->device->host;
-   spin_lock_irqsave(shost->host_lock, flags);
if (scsi_host_eh_past_deadline(shost)) {
-   spin_unlock_irqrestore(shost->host_lock, flags);
SCSI_LOG_ERROR_RECOVERY(3,
shost_printk(KERN_INFO, shost,
"skip %s, past eh deadline\n",
 __func__));
break;
}
-   spin_unlock_irqrestore(shost->host_lock, flags);
SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
  "%s: requesting sense\n",
  current->comm));
@@ -1235,26 +1234,21 @@ static int scsi_eh_test_devices(struct list_head 
*cmd_list,
struct scsi_cmnd *scmd, *next;
struct scsi_device *sdev;
int finish_cmds;
-   unsigned long flags;
 
while (!list_empty(cmd_list)) {
scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry);
sdev = scmd->device;
 
if (!try_stu) {
-   spin_lock_irqsave(sdev->host->host_lock, flags);
if (scsi_host_eh_past_deadline(sdev->host)) {
/* Push items back onto work_q */
list_splice_init(cmd_list, work_q);
-   spin_unlock_irqrestore(sdev->host->host_lock,
-  flags);
SCSI_LOG_ERROR_RECOVERY(3,
shost_printk(KERN_INFO, sdev->host,
 "skip %s, past eh 
deadline",
 __func__));
break;
}
-   spin_unlock_irqrestore(sdev->host->host_lock, flags);
}
 
finish_cmds = !scsi_device_online(scmd->device) ||
@@ -1295,15 +1289,12 @@ static int scsi_eh_abort_cmds(struct list_head *work_q,
LIST_HEAD(check_list);
int rtn;
struct Scsi_Host *shost;
-   unsigned long flags;
 
list_for_each_entry_safe(scmd, next, 

[PATCH 1/5] scsi: Fix erratic device offline during EH

2013-11-04 Thread Hannes Reinecke
From: James Bottomley 

Commit 18a4d0a22ed6c54b67af7718c305cd010f09ddf8
(Handle disk devices which can not process medium access commands)
was introduced to offline any device which cannot process medium
access commands.
However, commit 3eef6257de48ff84a5d98ca533685df8a3beaeb8
(Reduce error recovery time by reducing use of TURs) reduced
the number of TURs by sending it only on the first failing
command, which might or might not be a medium access command.
So in combination this results in an erratic device offlining
during EH; if the command where the TUR was sent upon happens
to be a medium access command the device will be set offline,
if not everything proceeds as normal.

This patch moves the check to the final test, eliminating
this problem.

Signed-off-by: Hannes Reinecke 
Signed-off-by: James Bottomley 
---
 drivers/scsi/scsi_error.c  | 26 +-
 drivers/scsi/sd.c  | 26 --
 include/scsi/scsi_driver.h |  2 +-
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index e8bee9f..67c0014 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -941,12 +941,6 @@ retry:
 
scsi_eh_restore_cmnd(scmd, &ses);
 
-   if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) {
-   struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
-   if (sdrv->eh_action)
-   rtn = sdrv->eh_action(scmd, cmnd, cmnd_size, rtn);
-   }
-
return rtn;
 }
 
@@ -964,6 +958,16 @@ static int scsi_request_sense(struct scsi_cmnd *scmd)
return scsi_send_eh_cmnd(scmd, NULL, 0, scmd->device->eh_timeout, ~0);
 }
 
+static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
+{
+   if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) {
+   struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
+   if (sdrv->eh_action)
+   rtn = sdrv->eh_action(scmd, rtn);
+   }
+   return rtn;
+}
+
 /**
  * scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
  * @scmd:  Original SCSI cmd that eh has finished.
@@ -1142,7 +1146,9 @@ static int scsi_eh_test_devices(struct list_head 
*cmd_list,
 
list_for_each_entry_safe(scmd, next, cmd_list, eh_entry)
if (scmd->device == sdev) {
-   if (finish_cmds)
+   if (finish_cmds &&
+   (try_stu ||
+scsi_eh_action(scmd, SUCCESS) == SUCCESS))
scsi_eh_finish_cmd(scmd, done_q);
else
list_move_tail(&scmd->eh_entry, work_q);
@@ -1283,7 +1289,8 @@ static int scsi_eh_stu(struct Scsi_Host *shost,
!scsi_eh_tur(stu_scmd)) {
list_for_each_entry_safe(scmd, next,
  work_q, eh_entry) {
-   if (scmd->device == sdev)
+   if (scmd->device == sdev &&
+   scsi_eh_action(scmd, SUCCESS) == 
SUCCESS)
scsi_eh_finish_cmd(scmd, 
done_q);
}
}
@@ -1350,7 +1357,8 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host 
*shost,
!scsi_eh_tur(bdr_scmd)) {
list_for_each_entry_safe(scmd, next,
 work_q, eh_entry) {
-   if (scmd->device == sdev)
+   if (scmd->device == sdev &&
+   scsi_eh_action(scmd, rtn) != FAILED)
scsi_eh_finish_cmd(scmd,
   done_q);
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fd874b8..1929838 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -110,7 +110,7 @@ static int sd_suspend_runtime(struct device *);
 static int sd_resume(struct device *);
 static void sd_rescan(struct device *);
 static int sd_done(struct scsi_cmnd *);
-static int sd_eh_action(struct scsi_cmnd *, unsigned char *, int, int);
+static int sd_eh_action(struct scsi_cmnd *, int);
 static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
 static void scsi_disk_release(struct device *cdev);
 static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *);
@@ -1551,23 +1551,23 @@ static const struct block_device_operations sd_fops = {
 /**
  * sd_eh_action - error handling callback
  * @scmd:  sd-issued command that has failed
- * @eh_cmnd:   The command that was sent during