On Tue, Dec 08, 2015 at 08:37:33AM +0100, Hannes Reinecke wrote:
> When we receive a unit attention code of 'ALUA state changed'
> we should recheck the state, as it might be due to an implicit
> ALUA state transition. This allows us to return NEEDS_RETRY
> instead of ADD_TO_MLQUEUE, allowing to terminate the retries
> after a certain time.
> At the same time a workqueue item might already be queued, which
> should be started immediately to avoid any delays.
> 
> Signed-off-by: Hannes Reinecke <h...@suse.de>
> ---
>  drivers/scsi/device_handler/scsi_dh_alua.c | 58 
> ++++++++++++++++++++++++------
>  1 file changed, 47 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c 
> b/drivers/scsi/device_handler/scsi_dh_alua.c
> index 525449f..04a3a543 100644
> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
> @@ -121,7 +121,8 @@ struct alua_queue_data {
>  static void alua_rtpg_work(struct work_struct *work);
>  static void alua_rtpg_queue(struct alua_port_group *pg,
>                           struct scsi_device *sdev,
> -                         struct alua_queue_data *qdata);
> +                         struct alua_queue_data *qdata, bool force);
> +static void alua_check(struct scsi_device *sdev, bool force);
>  
>  static void release_port_group(struct kref *kref)
>  {
> @@ -386,7 +387,7 @@ static int alua_check_vpd(struct scsi_device *sdev, 
> struct alua_dh_data *h,
>               rcu_assign_pointer(h->pg, pg);
>               pg_found = true;
>       }
> -     alua_rtpg_queue(h->pg, sdev, NULL);
> +     alua_rtpg_queue(h->pg, sdev, NULL, true);
>       spin_unlock(&h->pg_lock);
>  
>       if (pg_found)
> @@ -427,18 +428,24 @@ static int alua_check_sense(struct scsi_device *sdev,
>  {
>       switch (sense_hdr->sense_key) {
>       case NOT_READY:
> -             if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a)
> +             if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
>                       /*
>                        * LUN Not Accessible - ALUA state transition
>                        */
> -                     return ADD_TO_MLQUEUE;
> +                     alua_check(sdev, false);
> +                     return NEEDS_RETRY;
> +             }
>               break;
>       case UNIT_ATTENTION:
> -             if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
> +             if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
>                       /*
> -                      * Power On, Reset, or Bus Device Reset, just retry.
> +                      * Power On, Reset, or Bus Device Reset.
> +                      * Might have obscured a state transition,
> +                      * so schedule a recheck.
>                        */
> +                     alua_check(sdev, true);
>                       return ADD_TO_MLQUEUE;
> +             }
>               if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
>                       /*
>                        * Device internal reset
> @@ -449,16 +456,20 @@ static int alua_check_sense(struct scsi_device *sdev,
>                        * Mode Parameters Changed
>                        */
>                       return ADD_TO_MLQUEUE;
> -             if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06)
> +             if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
>                       /*
>                        * ALUA state changed
>                        */
> +                     alua_check(sdev, true);
>                       return ADD_TO_MLQUEUE;
> -             if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07)
> +             }
> +             if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
>                       /*
>                        * Implicit ALUA state transition failed
>                        */
> +                     alua_check(sdev, true);
>                       return ADD_TO_MLQUEUE;
> +             }
>               if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
>                       /*
>                        * Inquiry data has changed
> @@ -777,7 +788,7 @@ static void alua_rtpg_work(struct work_struct *work)
>  
>  static void alua_rtpg_queue(struct alua_port_group *pg,
>                           struct scsi_device *sdev,
> -                         struct alua_queue_data *qdata)
> +                         struct alua_queue_data *qdata, bool force)
>  {
>       int start_queue = 0;
>       unsigned long flags;
> @@ -797,7 +808,9 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
>               pg->rtpg_sdev = sdev;
>               scsi_device_get(sdev);
>               start_queue = 1;
> -     }
> +     } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force)
> +             start_queue = 1;
> +
>       spin_unlock_irqrestore(&pg->lock, flags);
>  
>       if (start_queue &&
> @@ -912,7 +925,7 @@ static int alua_activate(struct scsi_device *sdev,
>       kref_get(&pg->kref);
>       rcu_read_unlock();
>  
> -     alua_rtpg_queue(pg, sdev, qdata);
> +     alua_rtpg_queue(pg, sdev, qdata, true);
>       kref_put(&pg->kref, release_port_group);
>  out:
>       if (fn)
> @@ -921,6 +934,29 @@ out:
>  }
>  
>  /*
> + * alua_check - check path status
> + * @sdev: device on the path to be checked
> + *
> + * Check the device status
> + */
> +static void alua_check(struct scsi_device *sdev, bool force)
> +{
> +     struct alua_dh_data *h = sdev->handler_data;
> +     struct alua_port_group *pg;
> +
> +     rcu_read_lock();
> +     pg = rcu_dereference(h->pg);
> +     if (!pg) {
> +             rcu_read_unlock();
> +             return;
> +     }
> +     kref_get(&pg->kref);

What protects us from pg->kref beeing released?  I think the whole
refcounting scheme needs an audit to see where kref_get is called
without synchronization and use kref_get_unless_zero where needed.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to