Author: mav
Date: Sat Jun  9 13:07:44 2012
New Revision: 236814
URL: http://svn.freebsd.org/changeset/base/236814

Log:
  One more major cam_periph_error() rewrite to improve error handling and
  reporting. It includes:
   - removing of error messages controlled by bootverbose, replacing them
  with more universal and informative debugging on CAM_DEBUG_INFO level,
  that is now built into the kernel by default;
   - more close following to the arguments submitted by caller, such as
  SF_PRINT_ALWAYS, SF_QUIET_IR and SF_NO_PRINT; consumer knows better which
  errors are usual/expected at this point and which are really informative;
   - adding two new flags SF_NO_RECOVERY and SF_NO_RETRY to allow caller
  specify how much assistance it needs at this point; previously consumers
  controlled that by not calling cam_periph_error() at all, but that made
  behavior inconsistent and debugging complicated;
   - tuning debug messages and taken actions order to make debugging output
  more readable and cause-effect relationships visible;
   - making camperiphdone() (common device recovery completion handler) to
  also use cam_periph_error() in most cases, instead of own dumb code;
   - removing manual sense fetching code from cam_periph_error(); I was told
  by number of people that it is SIM obligation to fetch sense data, so this
  code is useless and only significantly complicates recovery logic;
   - making ada, da and pass driver to use cam_periph_error() with new limited
  recovery options to handle error recovery and debugging in common way;
  as one of results, CAM_REQUEUE_REQ and other retrying statuses are now
  working fine with pass driver, that caused many problems before.
   - reverting r186891 by raj@ to avoid burning few seconds in tight DELAY()
  loops on device probe, while device simply loads media; I think that problem
  may already be fixed in other way, and even if it is not, solution must be
  different.
  
  Sponsored by: iXsystems, Inc.
  MFC after:    2 weeks

Modified:
  head/sys/cam/ata/ata_da.c
  head/sys/cam/ata/ata_xpt.c
  head/sys/cam/cam.h
  head/sys/cam/cam_periph.c
  head/sys/cam/cam_periph.h
  head/sys/cam/scsi/scsi_all.c
  head/sys/cam/scsi/scsi_all.h
  head/sys/cam/scsi/scsi_da.c
  head/sys/cam/scsi/scsi_pass.c
  head/sys/cam/scsi/scsi_xpt.c

Modified: head/sys/cam/ata/ata_da.c
==============================================================================
--- head/sys/cam/ata/ata_da.c   Sat Jun  9 12:58:00 2012        (r236813)
+++ head/sys/cam/ata/ata_da.c   Sat Jun  9 13:07:44 2012        (r236814)
@@ -584,6 +584,7 @@ adadump(void *arg, void *virtual, vm_off
        struct      disk *dp;
        uint64_t    lba;
        uint16_t    count;
+       int         error = 0;
 
        dp = arg;
        periph = dp->d_drv1;
@@ -622,13 +623,16 @@ adadump(void *arg, void *virtual, vm_off
                }
                xpt_polled_action(&ccb);
 
-               if ((ccb.ataio.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
+               error = cam_periph_error(&ccb,
+                   0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
+               if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
+                       cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
+                           /*reduction*/0, /*timeout*/0, /*getcount_only*/0);
+               if (error != 0)
                        printf("Aborting dump due to I/O error.\n");
-                       cam_periph_unlock(periph);
-                       return(EIO);
-               }
+
                cam_periph_unlock(periph);
-               return(0);
+               return (error);
        }
 
        if (softc->flags & ADA_FLAG_CAN_FLUSHCACHE) {
@@ -636,7 +640,7 @@ adadump(void *arg, void *virtual, vm_off
 
                ccb.ccb_h.ccb_state = ADA_CCB_DUMP;
                cam_fill_ataio(&ccb.ataio,
-                                   1,
+                                   0,
                                    adadone,
                                    CAM_DIR_NONE,
                                    0,
@@ -650,18 +654,16 @@ adadump(void *arg, void *virtual, vm_off
                        ata_28bit_cmd(&ccb.ataio, ATA_FLUSHCACHE, 0, 0, 0);
                xpt_polled_action(&ccb);
 
-               if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
-                       xpt_print(periph->path, "Synchronize cache failed\n");
-
+               error = cam_periph_error(&ccb,
+                   0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
                if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
-                       cam_release_devq(ccb.ccb_h.path,
-                                        /*relsim_flags*/0,
-                                        /*reduction*/0,
-                                        /*timeout*/0,
-                                        /*getcount_only*/0);
+                       cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
+                           /*reduction*/0, /*timeout*/0, /*getcount_only*/0);
+               if (error != 0)
+                       xpt_print(periph->path, "Synchronize cache failed\n");
        }
        cam_periph_unlock(periph);
-       return (0);
+       return (error);
 }
 
 static void
@@ -1716,6 +1718,7 @@ adaflush(void)
 {
        struct cam_periph *periph;
        struct ada_softc *softc;
+       int error;
 
        TAILQ_FOREACH(periph, &adadriver.units, unit_links) {
                union ccb ccb;
@@ -1739,7 +1742,7 @@ adaflush(void)
 
                ccb.ccb_h.ccb_state = ADA_CCB_DUMP;
                cam_fill_ataio(&ccb.ataio,
-                                   1,
+                                   0,
                                    adadone,
                                    CAM_DIR_NONE,
                                    0,
@@ -1753,15 +1756,13 @@ adaflush(void)
                        ata_28bit_cmd(&ccb.ataio, ATA_FLUSHCACHE, 0, 0, 0);
                xpt_polled_action(&ccb);
 
-               if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
-                       xpt_print(periph->path, "Synchronize cache failed\n");
-
+               error = cam_periph_error(&ccb,
+                   0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
                if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
-                       cam_release_devq(ccb.ccb_h.path,
-                                        /*relsim_flags*/0,
-                                        /*reduction*/0,
-                                        /*timeout*/0,
-                                        /*getcount_only*/0);
+                       cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
+                           /*reduction*/0, /*timeout*/0, /*getcount_only*/0);
+               if (error != 0)
+                       xpt_print(periph->path, "Synchronize cache failed\n");
                cam_periph_unlock(periph);
        }
 }
@@ -1771,6 +1772,7 @@ adaspindown(uint8_t cmd, int flags)
 {
        struct cam_periph *periph;
        struct ada_softc *softc;
+       int error;
 
        TAILQ_FOREACH(periph, &adadriver.units, unit_links) {
                union ccb ccb;
@@ -1795,7 +1797,7 @@ adaspindown(uint8_t cmd, int flags)
 
                ccb.ccb_h.ccb_state = ADA_CCB_DUMP;
                cam_fill_ataio(&ccb.ataio,
-                                   1,
+                                   0,
                                    adadone,
                                    CAM_DIR_NONE | flags,
                                    0,
@@ -1806,15 +1808,13 @@ adaspindown(uint8_t cmd, int flags)
                ata_28bit_cmd(&ccb.ataio, cmd, 0, 0, 0);
                xpt_polled_action(&ccb);
 
-               if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
-                       xpt_print(periph->path, "Spin-down disk failed\n");
-
+               error = cam_periph_error(&ccb,
+                   0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
                if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
-                       cam_release_devq(ccb.ccb_h.path,
-                                        /*relsim_flags*/0,
-                                        /*reduction*/0,
-                                        /*timeout*/0,
-                                        /*getcount_only*/0);
+                       cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
+                           /*reduction*/0, /*timeout*/0, /*getcount_only*/0);
+               if (error != 0)
+                       xpt_print(periph->path, "Spin-down disk failed\n");
                cam_periph_unlock(periph);
        }
 }

Modified: head/sys/cam/ata/ata_xpt.c
==============================================================================
--- head/sys/cam/ata/ata_xpt.c  Sat Jun  9 12:58:00 2012        (r236813)
+++ head/sys/cam/ata/ata_xpt.c  Sat Jun  9 13:07:44 2012        (r236814)
@@ -705,12 +705,9 @@ probedone(struct cam_periph *periph, uni
        inq_buf = &path->device->inq_data;
 
        if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
-               if (softc->restart) {
-                       if (bootverbose) {
-                               cam_error_print(done_ccb,
-                                   CAM_ESF_ALL, CAM_EPF_ALL);
-                       }
-               } else if (cam_periph_error(done_ccb, 0, 0, NULL) == ERESTART)
+               if (cam_periph_error(done_ccb,
+                   0, softc->restart ? (SF_NO_RECOVERY | SF_NO_RETRY) : 0,
+                   NULL) == ERESTART)
                        return;
                if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) {
                        /* Don't wedge the queue */

Modified: head/sys/cam/cam.h
==============================================================================
--- head/sys/cam/cam.h  Sat Jun  9 12:58:00 2012        (r236813)
+++ head/sys/cam/cam.h  Sat Jun  9 13:07:44 2012        (r236814)
@@ -108,6 +108,15 @@ typedef enum {
        CAM_RETRY_SELTO         = 0x02 /* Retry Selection Timeouts */
 } cam_flags;
 
+enum {
+       SF_RETRY_UA             = 0x01, /* Retry UNIT ATTENTION conditions. */
+       SF_NO_PRINT             = 0x02, /* Never print error status. */
+       SF_QUIET_IR             = 0x04, /* Be quiet about Illegal Request 
reponses */
+       SF_PRINT_ALWAYS         = 0x08, /* Always print error status. */
+       SF_NO_RECOVERY          = 0x10, /* Don't do active error recovery. */
+       SF_NO_RETRY             = 0x20  /* Don't do any retries. */
+};
+
 /* CAM  Status field values */
 typedef enum {
        CAM_REQ_INPROG,         /* CCB request is in progress */

Modified: head/sys/cam/cam_periph.c
==============================================================================
--- head/sys/cam/cam_periph.c   Sat Jun  9 12:58:00 2012        (r236813)
+++ head/sys/cam/cam_periph.c   Sat Jun  9 13:07:44 2012        (r236814)
@@ -69,18 +69,22 @@ static      void            camperiphdone(struct cam_pe
                                        union ccb *done_ccb);
 static  void           camperiphfree(struct cam_periph *periph);
 static int             camperiphscsistatuserror(union ccb *ccb,
+                                               union ccb **orig_ccb,
                                                 cam_flags camflags,
                                                 u_int32_t sense_flags,
                                                 int *openings,
                                                 u_int32_t *relsim_flags,
                                                 u_int32_t *timeout,
+                                                int *print,
                                                 const char **action_string);
 static int             camperiphscsisenseerror(union ccb *ccb,
+                                               union ccb **orig_ccb,
                                                cam_flags camflags,
                                                u_int32_t sense_flags,
                                                int *openings,
                                                u_int32_t *relsim_flags,
                                                u_int32_t *timeout,
+                                               int *print,
                                                const char **action_string);
 
 static int nperiph_drivers;
@@ -1108,255 +1112,84 @@ cam_release_devq(struct cam_path *path, 
 }
 
 #define saved_ccb_ptr ppriv_ptr0
-#define recovery_depth ppriv_field1
-static void
-camperiphsensedone(struct cam_periph *periph, union ccb *done_ccb)
-{
-       union ccb      *saved_ccb = (union ccb *)done_ccb->ccb_h.saved_ccb_ptr;
-       cam_status      status;
-       int             frozen = 0;
-       int             depth = done_ccb->ccb_h.recovery_depth;
-
-       status = done_ccb->ccb_h.status;
-       if (status & CAM_DEV_QFRZN) {
-               frozen = 1;
-               /*
-                * Clear freeze flag now for case of retry,
-                * freeze will be dropped later.
-                */
-               done_ccb->ccb_h.status &= ~CAM_DEV_QFRZN;
-       }
-       status &= CAM_STATUS_MASK;
-       switch (status) {
-       case CAM_REQ_CMP:
-       {
-               int error_code, sense_key, asc, ascq;
-
-               scsi_extract_sense_len(&saved_ccb->csio.sense_data,
-                                      saved_ccb->csio.sense_len -
-                                      saved_ccb->csio.sense_resid,
-                                      &error_code, &sense_key, &asc, &ascq,
-                                      /*show_errors*/ 1);
-               /*
-                * If we manually retrieved sense into a CCB and got
-                * something other than "NO SENSE" send the updated CCB
-                * back to the client via xpt_done() to be processed via
-                * the error recovery code again.
-                */
-               if ((sense_key != -1)
-                && (sense_key != SSD_KEY_NO_SENSE)) {
-                       saved_ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
-               } else {
-                       saved_ccb->ccb_h.status &= ~CAM_STATUS_MASK;
-                       saved_ccb->ccb_h.status |= CAM_AUTOSENSE_FAIL;
-               }
-               saved_ccb->csio.sense_resid = done_ccb->csio.resid;
-               bcopy(saved_ccb, done_ccb, sizeof(union ccb));
-               xpt_free_ccb(saved_ccb);
-               break;
-       }
-       default:
-               bcopy(saved_ccb, done_ccb, sizeof(union ccb));
-               xpt_free_ccb(saved_ccb);
-               done_ccb->ccb_h.status &= ~CAM_STATUS_MASK;
-               done_ccb->ccb_h.status |= CAM_AUTOSENSE_FAIL;
-               break;
-       }
-       periph->flags &= ~CAM_PERIPH_SENSE_INPROG;
-       /*
-        * If it is the end of recovery, drop freeze, taken due to
-        * CAM_DEV_QFREEZE flag, set on recovery request.
-        */
-       if (depth == 0) {
-               cam_release_devq(done_ccb->ccb_h.path,
-                        /*relsim_flags*/0,
-                        /*openings*/0,
-                        /*timeout*/0,
-                        /*getcount_only*/0);
-       }
-       /*
-        * Copy frozen flag from recovery request if it is set there
-        * for some reason.
-        */
-       if (frozen != 0)
-               done_ccb->ccb_h.status |= CAM_DEV_QFRZN;
-       (*done_ccb->ccb_h.cbfcnp)(periph, done_ccb);
-}
-
 static void
 camperiphdone(struct cam_periph *periph, union ccb *done_ccb)
 {
-       union ccb      *saved_ccb, *save_ccb;
+       union ccb      *saved_ccb;
        cam_status      status;
-       int             frozen = 0;
        struct scsi_start_stop_unit *scsi_cmd;
-       u_int32_t       relsim_flags, timeout;
 
+       scsi_cmd = (struct scsi_start_stop_unit *)
+           &done_ccb->csio.cdb_io.cdb_bytes;
        status = done_ccb->ccb_h.status;
-       if (status & CAM_DEV_QFRZN) {
-               frozen = 1;
-               /*
-                * Clear freeze flag now for case of retry,
-                * freeze will be dropped later.
-                */
-               done_ccb->ccb_h.status &= ~CAM_DEV_QFRZN;
-       }
 
-       timeout = 0;
-       relsim_flags = 0;
-       saved_ccb = (union ccb *)done_ccb->ccb_h.saved_ccb_ptr;
-
-       switch (status & CAM_STATUS_MASK) {
-       case CAM_REQ_CMP:
-       {
-               /*
-                * If we have successfully taken a device from the not
-                * ready to ready state, re-scan the device and re-get
-                * the inquiry information.  Many devices (mostly disks)
-                * don't properly report their inquiry information unless
-                * they are spun up.
-                */
-               scsi_cmd = (struct scsi_start_stop_unit *)
-                               &done_ccb->csio.cdb_io.cdb_bytes;
-
-               if (scsi_cmd->opcode == START_STOP_UNIT)
-                       xpt_async(AC_INQ_CHANGED,
-                                 done_ccb->ccb_h.path, NULL);
-               goto final;
-       }
-       case CAM_SCSI_STATUS_ERROR:
-               scsi_cmd = (struct scsi_start_stop_unit *)
-                               &done_ccb->csio.cdb_io.cdb_bytes;
-               if (status & CAM_AUTOSNS_VALID) {
-                       struct ccb_getdev cgd;
+       if ((status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
+               if ((status & CAM_STATUS_MASK) == CAM_SCSI_STATUS_ERROR &&
+                   (status & CAM_AUTOSNS_VALID)) {
                        struct scsi_sense_data *sense;
                        int    error_code, sense_key, asc, ascq, sense_len;
-                       scsi_sense_action err_action;
 
                        sense = &done_ccb->csio.sense_data;
                        sense_len = done_ccb->csio.sense_len -
                                    done_ccb->csio.sense_resid;
-                       scsi_extract_sense_len(sense, sense_len, &error_code, 
-                                              &sense_key, &asc, &ascq,
-                                              /*show_errors*/ 1);
-                       /*
-                        * Grab the inquiry data for this device.
-                        */
-                       xpt_setup_ccb(&cgd.ccb_h, done_ccb->ccb_h.path,
-                           CAM_PRIORITY_NORMAL);
-                       cgd.ccb_h.func_code = XPT_GDEV_TYPE;
-                       xpt_action((union ccb *)&cgd);
-                       err_action = scsi_error_action(&done_ccb->csio,
-                                                      &cgd.inq_data, 0);
+                       scsi_extract_sense_len(sense, sense_len, &error_code,
+                           &sense_key, &asc, &ascq, /*show_errors*/ 1);
                        /*
-                        * If the error is "invalid field in CDB", 
-                        * and the load/eject flag is set, turn the 
-                        * flag off and try again.  This is just in 
-                        * case the drive in question barfs on the 
-                        * load eject flag.  The CAM code should set 
-                        * the load/eject flag by default for 
+                        * If the error is "invalid field in CDB",
+                        * and the load/eject flag is set, turn the
+                        * flag off and try again.  This is just in
+                        * case the drive in question barfs on the
+                        * load eject flag.  The CAM code should set
+                        * the load/eject flag by default for
                         * removable media.
                         */
-                       /* XXX KDM 
-                        * Should we check to see what the specific
-                        * scsi status is??  Or does it not matter
-                        * since we already know that there was an
-                        * error, and we know what the specific
-                        * error code was, and we know what the
-                        * opcode is..
-                        */
                        if ((scsi_cmd->opcode == START_STOP_UNIT) &&
                            ((scsi_cmd->how & SSS_LOEJ) != 0) &&
-                            (asc == 0x24) && (ascq == 0x00) &&
-                            (done_ccb->ccb_h.retry_count > 0)) {
-
+                            (asc == 0x24) && (ascq == 0x00)) {
                                scsi_cmd->how &= ~SSS_LOEJ;
+                               if (status & CAM_DEV_QFRZN) {
+                                       cam_release_devq(done_ccb->ccb_h.path,
+                                           0, 0, 0, 0);
+                                       done_ccb->ccb_h.status &=
+                                           ~CAM_DEV_QFRZN;
+                               }
                                xpt_action(done_ccb);
-                       } else if ((done_ccb->ccb_h.retry_count > 1)
-                               && ((err_action & SS_MASK) != SS_FAIL)) {
-
-                               /*
-                                * In this case, the error recovery
-                                * command failed, but we've got 
-                                * some retries left on it.  Give
-                                * it another try unless this is an
-                                * unretryable error.
-                                */
-                               /* set the timeout to .5 sec */
-                               relsim_flags =
-                                       RELSIM_RELEASE_AFTER_TIMEOUT;
-                               timeout = 500;
-                               xpt_action(done_ccb);
-                               break;
-                       } else {
-                               /* 
-                                * Perform the final retry with the original
-                                * CCB so that final error processing is
-                                * performed by the owner of the CCB.
-                                */
-                               goto final;
+                               goto out;
                        }
-               } else {
-                       save_ccb = xpt_alloc_ccb_nowait();
-                       if (save_ccb == NULL)
-                               goto final;
-                       bcopy(done_ccb, save_ccb, sizeof(*save_ccb));
-                       periph->flags |= CAM_PERIPH_SENSE_INPROG;
-                       /*
-                        * Send a Request Sense to the device.  We
-                        * assume that we are in a contingent allegiance
-                        * condition so we do not tag this request.
-                        */
-                       scsi_request_sense(&done_ccb->csio, /*retries*/1,
-                                          camperiphsensedone,
-                                          &save_ccb->csio.sense_data,
-                                          save_ccb->csio.sense_len,
-                                          CAM_TAG_ACTION_NONE,
-                                          /*sense_len*/SSD_FULL_SIZE,
-                                          /*timeout*/5000);
-                       done_ccb->ccb_h.pinfo.priority--;
-                       done_ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
-                       done_ccb->ccb_h.saved_ccb_ptr = save_ccb;
-                       done_ccb->ccb_h.recovery_depth++;
-                       xpt_action(done_ccb);
                }
-               break;
-       default:
-final:
-               bcopy(saved_ccb, done_ccb, sizeof(*done_ccb));
-               xpt_free_ccb(saved_ccb);
-               periph->flags &= ~CAM_PERIPH_RECOVERY_INPROG;
-               xpt_action(done_ccb);
-               break;
+               if (cam_periph_error(done_ccb,
+                   0, SF_RETRY_UA | SF_NO_PRINT, NULL) == ERESTART)
+                       goto out;
+               if (done_ccb->ccb_h.status & CAM_DEV_QFRZN) {
+                       cam_release_devq(done_ccb->ccb_h.path, 0, 0, 0, 0);
+                       done_ccb->ccb_h.status &= ~CAM_DEV_QFRZN;
+               }
+       } else {
+               /*
+                * If we have successfully taken a device from the not
+                * ready to ready state, re-scan the device and re-get
+                * the inquiry information.  Many devices (mostly disks)
+                * don't properly report their inquiry information unless
+                * they are spun up.
+                */
+               if (scsi_cmd->opcode == START_STOP_UNIT)
+                       xpt_async(AC_INQ_CHANGED, done_ccb->ccb_h.path, NULL);
        }
 
-       /* decrement the retry count */
-       /*
-        * XXX This isn't appropriate in all cases.  Restructure,
-        *     so that the retry count is only decremented on an
-        *     actual retry.  Remeber that the orignal ccb had its
-        *     retry count dropped before entering recovery, so
-        *     doing it again is a bug.
-        */
-       if (done_ccb->ccb_h.retry_count > 0)
-               done_ccb->ccb_h.retry_count--;
        /*
-        * Drop freeze taken due to CAM_DEV_QFREEZE flag set on recovery
-        * request.
+        * Perform the final retry with the original CCB so that final
+        * error processing is performed by the owner of the CCB.
         */
-       cam_release_devq(done_ccb->ccb_h.path,
-                        /*relsim_flags*/relsim_flags,
-                        /*openings*/0,
-                        /*timeout*/timeout,
-                        /*getcount_only*/0);
-       /* Drop freeze taken, if this recovery request got error. */
-       if (frozen != 0) {
-               cam_release_devq(done_ccb->ccb_h.path,
-                        /*relsim_flags*/0,
-                        /*openings*/0,
-                        /*timeout*/0,
-                        /*getcount_only*/0);
-       }
+       saved_ccb = (union ccb *)done_ccb->ccb_h.saved_ccb_ptr;
+       bcopy(saved_ccb, done_ccb, sizeof(*done_ccb));
+       xpt_free_ccb(saved_ccb);
+       if (done_ccb->ccb_h.cbfcnp != camperiphdone)
+               periph->flags &= ~CAM_PERIPH_RECOVERY_INPROG;
+       xpt_action(done_ccb);
+
+out:
+       /* Drop freeze taken due to CAM_DEV_QFREEZE flag set. */
+       cam_release_devq(done_ccb->ccb_h.path, 0, 0, 0, 0);
 }
 
 /*
@@ -1415,10 +1248,10 @@ cam_periph_freeze_after_event(struct cam
 }
 
 static int
-camperiphscsistatuserror(union ccb *ccb, cam_flags camflags,
-                        u_int32_t sense_flags,
-                        int *openings, u_int32_t *relsim_flags,
-                        u_int32_t *timeout, const char **action_string)
+camperiphscsistatuserror(union ccb *ccb, union ccb **orig_ccb,
+    cam_flags camflags, u_int32_t sense_flags,
+    int *openings, u_int32_t *relsim_flags,
+    u_int32_t *timeout, int *print, const char **action_string)
 {
        int error;
 
@@ -1431,14 +1264,13 @@ camperiphscsistatuserror(union ccb *ccb,
                break;
        case SCSI_STATUS_CMD_TERMINATED:
        case SCSI_STATUS_CHECK_COND:
-               if (bootverbose)
-                       xpt_print(ccb->ccb_h.path, "SCSI status error\n");
-               error = camperiphscsisenseerror(ccb,
+               error = camperiphscsisenseerror(ccb, orig_ccb,
                                                camflags,
                                                sense_flags,
                                                openings,
                                                relsim_flags,
                                                timeout,
+                                               print,
                                                action_string);
                break;
        case SCSI_STATUS_QUEUE_FULL:
@@ -1493,9 +1325,6 @@ camperiphscsistatuserror(union ccb *ccb,
                        }
                        *timeout = 0;
                        error = ERESTART;
-                       if (bootverbose) {
-                               xpt_print(ccb->ccb_h.path, "Queue full\n");
-                       }
                        break;
                }
                /* FALLTHROUGH */
@@ -1505,9 +1334,6 @@ camperiphscsistatuserror(union ccb *ccb,
                 * Restart the queue after either another
                 * command completes or a 1 second timeout.
                 */
-               if (bootverbose) {
-                       xpt_print(ccb->ccb_h.path, "Device busy\n");
-               }
                if (ccb->ccb_h.retry_count > 0) {
                        ccb->ccb_h.retry_count--;
                        error = ERESTART;
@@ -1519,12 +1345,7 @@ camperiphscsistatuserror(union ccb *ccb,
                }
                break;
        case SCSI_STATUS_RESERV_CONFLICT:
-               xpt_print(ccb->ccb_h.path, "Reservation conflict\n");
-               error = EIO;
-               break;
        default:
-               xpt_print(ccb->ccb_h.path, "SCSI status 0x%x\n",
-                   ccb->csio.scsi_status);
                error = EIO;
                break;
        }
@@ -1532,18 +1353,18 @@ camperiphscsistatuserror(union ccb *ccb,
 }
 
 static int
-camperiphscsisenseerror(union ccb *ccb, cam_flags camflags,
-                       u_int32_t sense_flags,
-                      int *openings, u_int32_t *relsim_flags,
-                      u_int32_t *timeout, const char **action_string)
+camperiphscsisenseerror(union ccb *ccb, union ccb **orig,
+    cam_flags camflags, u_int32_t sense_flags,
+    int *openings, u_int32_t *relsim_flags,
+    u_int32_t *timeout, int *print, const char **action_string)
 {
        struct cam_periph *periph;
        union ccb *orig_ccb = ccb;
-       int error;
+       int error, recoveryccb;
 
        periph = xpt_path_periph(ccb->ccb_h.path);
-       if (periph->flags &
-           (CAM_PERIPH_RECOVERY_INPROG | CAM_PERIPH_SENSE_INPROG)) {
+       recoveryccb = (ccb->ccb_h.cbfcnp == camperiphdone);
+       if ((periph->flags & CAM_PERIPH_RECOVERY_INPROG) && !recoveryccb) {
                /*
                 * If error recovery is already in progress, don't attempt
                 * to process this error, but requeue it unconditionally
@@ -1558,6 +1379,7 @@ camperiphscsisenseerror(union ccb *ccb, 
                 * imperitive that we don't violate this assumption.
                 */
                error = ERESTART;
+               *print = 0;
        } else {
                scsi_sense_action err_action;
                struct ccb_getdev cgd;
@@ -1573,14 +1395,36 @@ camperiphscsisenseerror(union ccb *ccb, 
                        err_action = scsi_error_action(&ccb->csio,
                                                       &cgd.inq_data,
                                                       sense_flags);
-               else if ((ccb->ccb_h.flags & CAM_DIS_AUTOSENSE) == 0)
-                       err_action = SS_REQSENSE;
                else
                        err_action = SS_RETRY|SSQ_DECREMENT_COUNT|EIO;
-
                error = err_action & SS_ERRMASK;
 
                /*
+                * Do not autostart sequential access devices
+                * to avoid unexpected tape loading.
+                */
+               if ((err_action & SS_MASK) == SS_START &&
+                   SID_TYPE(&cgd.inq_data) == T_SEQUENTIAL) {
+                       *action_string = "Will not autostart a "
+                           "sequential access device";
+                       goto sense_error_done;
+               }
+
+               /*
+                * Avoid recovery recursion if recovery action is the same.
+                */
+               if ((err_action & SS_MASK) >= SS_START && recoveryccb) {
+                       if (((err_action & SS_MASK) == SS_START &&
+                            ccb->csio.cdb_io.cdb_bytes[0] == START_STOP_UNIT) 
||
+                           ((err_action & SS_MASK) == SS_TUR &&
+                            (ccb->csio.cdb_io.cdb_bytes[0] == 
TEST_UNIT_READY))) {
+                               err_action = SS_RETRY|SSQ_DECREMENT_COUNT|EIO;
+                               *relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT;
+                               *timeout = 500;
+                       }
+               }
+
+               /*
                 * If the recovery action will consume a retry,
                 * make sure we actually have retries available.
                 */
@@ -1627,15 +1471,6 @@ camperiphscsisenseerror(union ccb *ccb, 
                case SS_START:
                {
                        int le;
-                       if (SID_TYPE(&cgd.inq_data) == T_SEQUENTIAL) {
-                               xpt_free_ccb(orig_ccb);
-                               ccb->ccb_h.status |= CAM_DEV_QFRZN;
-                               *action_string = "Will not autostart a "
-                                   "sequential access device";
-                               err_action = SS_FAIL;
-                               error = EIO;
-                               break;
-                       }
 
                        /*
                         * Send a start unit command to the device, and
@@ -1699,24 +1534,6 @@ camperiphscsisenseerror(union ccb *ccb, 
                        *timeout = 500;
                        break;
                }
-               case SS_REQSENSE:
-               {
-                       *action_string = "Requesting SCSI sense data";
-                       periph->flags |= CAM_PERIPH_SENSE_INPROG;
-                       /*
-                        * Send a Request Sense to the device.  We
-                        * assume that we are in a contingent allegiance
-                        * condition so we do not tag this request.
-                        */
-                       scsi_request_sense(&ccb->csio, /*retries*/1,
-                                          camperiphsensedone,
-                                          &orig_ccb->csio.sense_data,
-                                          orig_ccb->csio.sense_len,
-                                          CAM_TAG_ACTION_NONE,
-                                          /*sense_len*/SSD_FULL_SIZE,
-                                          /*timeout*/5000);
-                       break;
-               }
                default:
                        panic("Unhandled error action %x", err_action);
                }
@@ -1733,14 +1550,12 @@ camperiphscsisenseerror(union ccb *ccb, 
                        ccb->ccb_h.pinfo.priority--;
                        ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
                        ccb->ccb_h.saved_ccb_ptr = orig_ccb;
-                       ccb->ccb_h.recovery_depth = 0;
                        error = ERESTART;
+                       *orig = orig_ccb;
                }
 
 sense_error_done:
-               if ((err_action & SSQ_PRINT_SENSE) != 0
-                && (ccb->ccb_h.status & CAM_AUTOSNS_VALID) != 0)
-                       cam_error_print(orig_ccb, CAM_ESF_ALL, CAM_EPF_ALL);
+               *print = ((err_action & SSQ_PRINT_SENSE) != 0);
        }
        return (error);
 }
@@ -1754,87 +1569,35 @@ int
 cam_periph_error(union ccb *ccb, cam_flags camflags,
                 u_int32_t sense_flags, union ccb *save_ccb)
 {
+       union ccb  *orig_ccb;
        struct cam_periph *periph;
        const char *action_string;
        cam_status  status;
-       int         frozen;
-       int         error, printed = 0;
-       int         openings;
-       u_int32_t   relsim_flags;
-       u_int32_t   timeout = 0;
+       int         frozen, error, openings, print, lost_device;
+       u_int32_t   relsim_flags, timeout;
 
+       print = 1;
        periph = xpt_path_periph(ccb->ccb_h.path);
        action_string = NULL;
        status = ccb->ccb_h.status;
        frozen = (status & CAM_DEV_QFRZN) != 0;
        status &= CAM_STATUS_MASK;
-       openings = relsim_flags = 0;
+       openings = relsim_flags = timeout = lost_device = 0;
+       orig_ccb = ccb;
 
        switch (status) {
        case CAM_REQ_CMP:
                error = 0;
+               print = 0;
                break;
        case CAM_SCSI_STATUS_ERROR:
-               error = camperiphscsistatuserror(ccb,
-                                                camflags,
-                                                sense_flags,
-                                                &openings,
-                                                &relsim_flags,
-                                                &timeout,
-                                                &action_string);
+               error = camperiphscsistatuserror(ccb, &orig_ccb,
+                   camflags, sense_flags, &openings, &relsim_flags,
+                   &timeout, &print, &action_string);
                break;
        case CAM_AUTOSENSE_FAIL:
-               xpt_print(ccb->ccb_h.path, "AutoSense failed\n");
                error = EIO;    /* we have to kill the command */
                break;
-       case CAM_ATA_STATUS_ERROR:
-               if (bootverbose && printed == 0) {
-                       xpt_print(ccb->ccb_h.path, "ATA status error\n");
-                       cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
-                       printed++;
-               }
-               /* FALLTHROUGH */
-       case CAM_REQ_CMP_ERR:
-               if (bootverbose && printed == 0) {
-                       xpt_print(ccb->ccb_h.path,
-                           "Request completed with CAM_REQ_CMP_ERR\n");
-                       printed++;
-               }
-               /* FALLTHROUGH */
-       case CAM_CMD_TIMEOUT:
-               if (bootverbose && printed == 0) {
-                       xpt_print(ccb->ccb_h.path, "Command timed out\n");
-                       printed++;
-               }
-               /* FALLTHROUGH */
-       case CAM_UNEXP_BUSFREE:
-               if (bootverbose && printed == 0) {
-                       xpt_print(ccb->ccb_h.path, "Unexpected Bus Free\n");
-                       printed++;
-               }
-               /* FALLTHROUGH */
-       case CAM_UNCOR_PARITY:
-               if (bootverbose && printed == 0) {
-                       xpt_print(ccb->ccb_h.path,
-                           "Uncorrected parity error\n");
-                       printed++;
-               }
-               /* FALLTHROUGH */
-       case CAM_DATA_RUN_ERR:
-               if (bootverbose && printed == 0) {
-                       xpt_print(ccb->ccb_h.path, "Data overrun\n");
-                       printed++;
-               }
-               /* decrement the number of retries */
-               if (ccb->ccb_h.retry_count > 0 &&
-                   (periph->flags & CAM_PERIPH_INVALID) == 0) {
-                       ccb->ccb_h.retry_count--;
-                       error = ERESTART;
-               } else {
-                       action_string = "Retries exhausted";
-                       error = EIO;
-               }
-               break;
        case CAM_UA_ABORT:
        case CAM_UA_TERMIO:
        case CAM_MSG_REJECT_REC:
@@ -1845,14 +1608,8 @@ cam_periph_error(union ccb *ccb, cam_fla
                if ((camflags & CAM_RETRY_SELTO) != 0) {
                        if (ccb->ccb_h.retry_count > 0 &&
                            (periph->flags & CAM_PERIPH_INVALID) == 0) {
-
                                ccb->ccb_h.retry_count--;
                                error = ERESTART;
-                               if (bootverbose && printed == 0) {
-                                       xpt_print(ccb->ccb_h.path,
-                                           "Selection timeout\n");
-                                       printed++;
-                               }
 
                                /*
                                 * Wait a bit to give the device
@@ -1866,38 +1623,10 @@ cam_periph_error(union ccb *ccb, cam_fla
                }
                /* FALLTHROUGH */
        case CAM_DEV_NOT_THERE:
-       {
-               struct cam_path *newpath;
-               lun_id_t lun_id;
-
                error = ENXIO;
-
-               /*
-                * For a selection timeout, we consider all of the LUNs on
-                * the target to be gone.  If the status is CAM_DEV_NOT_THERE,
-                * then we only get rid of the device(s) specified by the
-                * path in the original CCB.
-                */
-               if (status == CAM_DEV_NOT_THERE)
-                       lun_id = xpt_path_lun_id(ccb->ccb_h.path);
-               else
-                       lun_id = CAM_LUN_WILDCARD;
-
-               /* Should we do more if we can't create the path?? */
-               if (xpt_create_path(&newpath, periph,
-                                   xpt_path_path_id(ccb->ccb_h.path),
-                                   xpt_path_target_id(ccb->ccb_h.path),
-                                   lun_id) != CAM_REQ_CMP) 
-                       break;
-
-               /*
-                * Let peripheral drivers know that this device has gone
-                * away.
-                */
-               xpt_async(AC_LOST_DEVICE, newpath, NULL);
-               xpt_free_path(newpath);
+               print = 0;
+               lost_device = 1;
                break;
-       }
        case CAM_REQ_INVALID:
        case CAM_PATH_INVALID:
        case CAM_NO_HBA:
@@ -1917,27 +1646,16 @@ cam_periph_error(union ccb *ccb, cam_fla
                 * these events and should be unconditionally
                 * retried.
                 */
-               if (bootverbose && printed == 0) {
-                       xpt_print_path(ccb->ccb_h.path);
-                       if (status == CAM_BDR_SENT)
-                               printf("Bus Device Reset sent\n");
-                       else
-                               printf("Bus Reset issued\n");
-                       printed++;
-               }
-               /* FALLTHROUGH */
        case CAM_REQUEUE_REQ:
-               /* Unconditional requeue */
-               if (bootverbose && printed == 0) {
-                       xpt_print(ccb->ccb_h.path, "Request requeued\n");
-                       printed++;
-               }
-               if ((periph->flags & CAM_PERIPH_INVALID) == 0)
-                       error = ERESTART;
-               else {
-                       action_string = "Retries exhausted";
+               /* Unconditional requeue if device is still there */
+               if (periph->flags & CAM_PERIPH_INVALID) {
+                       action_string = "Periph was invalidated";
                        error = EIO;
-               }
+               } else if (sense_flags & SF_NO_RETRY) {
+                       error = EIO;
+                       action_string = "Retry was blocked";
+               } else
+                       error = ERESTART;
                break;
        case CAM_RESRC_UNAVAIL:
                /* Wait a bit for the resource shortage to abate. */
@@ -1950,30 +1668,37 @@ cam_periph_error(union ccb *ccb, cam_fla
                }
                relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT;
                /* FALLTHROUGH */
+       case CAM_ATA_STATUS_ERROR:
+       case CAM_REQ_CMP_ERR:
+       case CAM_CMD_TIMEOUT:
+       case CAM_UNEXP_BUSFREE:
+       case CAM_UNCOR_PARITY:
+       case CAM_DATA_RUN_ERR:
        default:
-               /* decrement the number of retries */
-               if (ccb->ccb_h.retry_count > 0 &&
-                   (periph->flags & CAM_PERIPH_INVALID) == 0) {
-                       ccb->ccb_h.retry_count--;
-                       error = ERESTART;
-                       if (bootverbose && printed == 0) {
-                               xpt_print(ccb->ccb_h.path, "CAM status 0x%x\n",
-                                   status);
-                               printed++;
-                       }
-               } else {
+               if (periph->flags & CAM_PERIPH_INVALID) {
+                       error = EIO;
+                       action_string = "Periph was invalidated";
+               } else if (ccb->ccb_h.retry_count == 0) {
                        error = EIO;
                        action_string = "Retries exhausted";
+               } else if (sense_flags & SF_NO_RETRY) {
+                       error = EIO;
+                       action_string = "Retry was blocked";
+               } else {
+                       ccb->ccb_h.retry_count--;
+                       error = ERESTART;
                }
                break;
        }
 
-       /*
-        * If we have and error and are booting verbosely, whine
-        * *unless* this was a non-retryable selection timeout.
-        */
-       if (error != 0 && bootverbose &&
-           !(status == CAM_SEL_TIMEOUT && (camflags & CAM_RETRY_SELTO) == 0)) {
+       if ((sense_flags & SF_PRINT_ALWAYS) ||
+           CAM_DEBUGGED(ccb->ccb_h.path, CAM_DEBUG_INFO))
+               print = 1;
+       else if (sense_flags & SF_NO_PRINT)
+               print = 0;
+       if (print)
+               cam_error_print(orig_ccb, CAM_ESF_ALL, CAM_EPF_ALL);
+       if (error != 0 && print) {
                if (error != ERESTART) {
                        if (action_string == NULL)
                                action_string = "Unretryable error";
@@ -1985,6 +1710,36 @@ cam_periph_error(union ccb *ccb, cam_fla
                        xpt_print(ccb->ccb_h.path, "Retrying command\n");
        }
 
+       if (lost_device) {
+               struct cam_path *newpath;
+               lun_id_t lun_id;
+
+               /*
+                * For a selection timeout, we consider all of the LUNs on
+                * the target to be gone.  If the status is CAM_DEV_NOT_THERE,
+                * then we only get rid of the device(s) specified by the
+                * path in the original CCB.
+                */
+               if (status == CAM_DEV_NOT_THERE)
+                       lun_id = xpt_path_lun_id(ccb->ccb_h.path);
+               else
+                       lun_id = CAM_LUN_WILDCARD;
+
+               /* Should we do more if we can't create the path?? */
+               if (xpt_create_path(&newpath, periph,
+                                   xpt_path_path_id(ccb->ccb_h.path),
+                                   xpt_path_target_id(ccb->ccb_h.path),
+                                   lun_id) == CAM_REQ_CMP) {
+
+                       /*
+                        * Let peripheral drivers know that this
+                        * device has gone away.
+                        */
+                       xpt_async(AC_LOST_DEVICE, newpath, NULL);
+                       xpt_free_path(newpath);
+               }
+       }
+
        /* Attempt a retry */
        if (error == ERESTART || error == 0) {
                if (frozen != 0)

Modified: head/sys/cam/cam_periph.h
==============================================================================
--- head/sys/cam/cam_periph.h   Sat Jun  9 12:58:00 2012        (r236813)
+++ head/sys/cam/cam_periph.h   Sat Jun  9 13:07:44 2012        (r236814)
@@ -118,7 +118,6 @@ struct cam_periph {
 #define CAM_PERIPH_INVALID             0x08
 #define CAM_PERIPH_NEW_DEV_FOUND       0x10
 #define CAM_PERIPH_RECOVERY_INPROG     0x20
-#define CAM_PERIPH_SENSE_INPROG                0x40
 #define CAM_PERIPH_FREE                        0x80

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to