Likely we should do the same for host_eh_scheduled++ as we do for 
host_failed++ in scsi_eh_inc_host_failed. Just put it in call_rcu. These 
way rcu_read_lock would be enough: if we don't see RECOVERY state in 
scsi_dec_host_busy, that means that host_eh_scheduled++ and 
corresponding scsi_eh_wakeup not yet happened, and they will handle 
wakeup for us.

Bart did these rcu-way to make common path faster, so better we do it 
without slow mem-barrier.

On 3/25/19 12:05 PM, zhengbin wrote:
> When I use fio test kernel in the following steps:
> 1.The sas controller mixes SAS/SATA disks
> 2.Use fio test all disks
> 3.Simultaneous enable/disable/link_reset/hard_reset PHY
> 
> it will hung in ata_port_wait_eh
> Call trace:
>   __switch_to+0xb4/0x1b8
>   __schedule+0x1e8/0x718
>   schedule+0x38/0x90
>   ata_port_wait_eh+0x70/0xf8
>   sas_ata_wait_eh+0x24/0x30 [libsas]
>   transport_sas_phy_reset.isra.3+0x128/0x160 [libsas]
>   phy_reset_work+0x20/0x30 [libsas]
>   process_one_work+0x1e4/0x460
>   worker_thread+0x40/0x450
>   kthread+0x12c/0x130
>   ret_from_fork+0x10/0x18
> 
> The key code process is like this:
> scsi_dec_host_busy
>       atomic_dec(&shost->host_busy);
>       if (unlikely(scsi_host_in_recovery(shost))) {
>               spin_lock_irqsave(shost->host_lock, flags);
>               ...
>               scsi_eh_wakeup(shost)
>               ...
>       }
> 
> scsi_schedule_eh
>       spin_lock_irqsave(shost->host_lock, flags);
>       if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 ||
>           scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) {
>               ...
>               scsi_eh_wakeup(shost);
>       }
> 
> scsi_eh_wakeup
>       if (scsi_host_busy(shost) == shost->host_failed)
>               wake_up_process(shost->ehandler);
> 
> In scsi_dec_host_busy, host_busy & shost_state not in spinlock. Neither
> function wakes up the SCSI error handler in the following timing:
> 
> CPU 0(call scsi_dec_host_busy)    CPU 1(call scsi_schedule_eh)
> LOAD shost_state(!=recovery)
>                                    scsi_host_set_state(SHOST_RECOVERY)
>                                    scsi_eh_wakeup(host_busy != host_failed)
> atomic_dec(&shost->host_busy);
> if (scsi_host_in_recovery(shost))
> 
> Add a smp_mb between host_busy and shost_state.
> 
> Signed-off-by: zhengbin <zhengbi...@huawei.com>
> ---
>   drivers/scsi/scsi_error.c | 7 +++++++
>   drivers/scsi/scsi_lib.c   | 5 +++++
>   2 files changed, 12 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 1b8378f..c605a71 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -88,6 +88,13 @@ void scsi_schedule_eh(struct Scsi_Host *shost)
> 
>       if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 ||
>           scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) {
> +             /*
> +              * We have to order shost_state store above and test of
> +              * the host_busy(scsi_eh_wakeup will test it), because
> +              * scsi_dec_host_busy accesses these variables without
> +              * host_lock.
> +              */
> +             smp_mb();
>               shost->host_eh_scheduled++;
>               scsi_eh_wakeup(shost);
>       }
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 601b9f1..9094d20 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -337,6 +337,11 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost)
> 
>       rcu_read_lock();
>       atomic_dec(&shost->host_busy);
> +     /*
> +      * We have to order host_busy dec above and test of the shost_state
> +      * below outside the host_lock.
> +      */
> +     smp_mb();
>       if (unlikely(scsi_host_in_recovery(shost))) {
>               spin_lock_irqsave(shost->host_lock, flags);
>               if (shost->host_failed || shost->host_eh_scheduled)
> --
> 2.7.4
> 

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.

Reply via email to