zfcp: introduce eh_timed_out handler

This handler is required to avoid offlined SCSI devices in a multipath
setup if scsi commands time out on cable pulls lasting longer than 30
seconds.

Signed-off-by: Andreas Herrmann <[EMAIL PROTECTED]>

diff -Nup linux-2.6.13/drivers/s390/scsi-orig/zfcp_scsi.c 
linux-2.6.13/drivers/s390/scsi/zfcp_scsi.c
--- linux-2.6.13/drivers/s390/scsi-orig/zfcp_scsi.c     2005-09-03 
12:17:16.000000000 +0200
+++ linux-2.6.13/drivers/s390/scsi/zfcp_scsi.c  2005-09-03 12:17:53.000000000 
+0200
@@ -44,6 +44,7 @@ static int zfcp_scsi_eh_abort_handler(st
 static int zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *);
 static int zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *);
 static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *);
+static enum scsi_eh_timer_return zfcp_scsi_eh_timed_out(struct scsi_cmnd *);
 static int zfcp_task_management_function(struct zfcp_unit *, u8);
 
 static struct zfcp_unit *zfcp_unit_lookup(struct zfcp_adapter *, int, 
scsi_id_t,
@@ -69,6 +70,7 @@ struct zfcp_data zfcp_data = {
              eh_device_reset_handler: zfcp_scsi_eh_device_reset_handler,
              eh_bus_reset_handler:    zfcp_scsi_eh_bus_reset_handler,
              eh_host_reset_handler:   zfcp_scsi_eh_host_reset_handler,
+             eh_timed_out:            zfcp_scsi_eh_timed_out,
                                       /* FIXME(openfcp): Tune */
              can_queue:               4096,
              this_id:                 0,
@@ -242,7 +244,6 @@ static void
 zfcp_scsi_command_fail(struct scsi_cmnd *scpnt, int result)
 {
        set_host_byte(&scpnt->result, result);
-       zfcp_cmd_dbf_event_scsi("failing", scpnt);
        /* return directly */
        scpnt->scsi_done(scpnt);
 }
@@ -414,59 +415,18 @@ zfcp_port_lookup(struct zfcp_adapter *ad
        return (struct zfcp_port *) NULL;
 }
 
-/*
- * function:   zfcp_scsi_eh_abort_handler
- *
- * purpose:    tries to abort the specified (timed out) SCSI command
- *
- * note:       We do not need to care for a SCSI command which completes
- *             normally but late during this abort routine runs.
- *             We are allowed to return late commands to the SCSI stack.
- *             It tracks the state of commands and will handle late commands.
- *             (Usually, the normal completion of late commands is ignored with
- *             respect to the running abort operation. Grep for 'done_late'
- *             in the SCSI stacks sources.)
- *
- * returns:    SUCCESS - command has been aborted and cleaned up in internal
- *                       bookkeeping,
- *                       SCSI stack won't be called for aborted command
- *             FAILED  - otherwise
- */
 int
-__zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
+zfcp_scsi_abort_async(struct scsi_cmnd *scpnt,
+                     struct zfcp_fsf_req **fsf_req_ptr)
 {
-       int retval = SUCCESS;
-       struct zfcp_fsf_req *new_fsf_req, *old_fsf_req;
-       struct zfcp_adapter *adapter = (struct zfcp_adapter *) 
scpnt->device->host->hostdata[0];
+       struct Scsi_Host *host = scpnt->device->host;
+       struct zfcp_adapter *adapter = (struct zfcp_adapter *) 
host->hostdata[0];
        struct zfcp_unit *unit = (struct zfcp_unit *) scpnt->device->hostdata;
-       struct zfcp_port *port = unit->port;
-       struct Scsi_Host *scsi_host = scpnt->device->host;
        union zfcp_req_data *req_data = NULL;
+       struct zfcp_fsf_req *new_fsf_req;
+       struct zfcp_fsf_req *old_fsf_req;
+       int req_flags;
        unsigned long flags;
-       u32 status = 0;
-
-       /* the components of a abort_dbf record (fixed size record) */
-       u64 dbf_scsi_cmnd = (unsigned long) scpnt;
-       char dbf_opcode[ZFCP_ABORT_DBF_LENGTH];
-       wwn_t dbf_wwn = port->wwpn;
-       fcp_lun_t dbf_fcp_lun = unit->fcp_lun;
-       u64 dbf_retries = scpnt->retries;
-       u64 dbf_allowed = scpnt->allowed;
-       u64 dbf_timeout = 0;
-       u64 dbf_fsf_req = 0;
-       u64 dbf_fsf_status = 0;
-       u64 dbf_fsf_qual[2] = { 0, 0 };
-       char dbf_result[ZFCP_ABORT_DBF_LENGTH] = "##undef";
-
-       memset(dbf_opcode, 0, ZFCP_ABORT_DBF_LENGTH);
-       memcpy(dbf_opcode,
-              scpnt->cmnd,
-              min(scpnt->cmd_len, (unsigned char) ZFCP_ABORT_DBF_LENGTH));
-
-       ZFCP_LOG_INFO("aborting scsi_cmnd=%p on adapter %s\n",
-                     scpnt, zfcp_get_busid_by_adapter(adapter));
-
-       spin_unlock_irq(scsi_host->host_lock);
 
        /*
         * Race condition between normal (late) completion and abort has
@@ -494,31 +454,18 @@ __zfcp_scsi_eh_abort_handler(struct scsi
                 * Do not initiate abort but return SUCCESS.
                 */
                write_unlock_irqrestore(&adapter->abort_lock, flags);
-               retval = SUCCESS;
-               strncpy(dbf_result, "##late1", ZFCP_ABORT_DBF_LENGTH);
-               goto out;
+               return SUCCESS;
        }
 
        /* Figure out which fsf_req needs to be aborted. */
        old_fsf_req = req_data->send_fcp_command_task.fsf_req;
 
-       dbf_fsf_req = (unsigned long) old_fsf_req;
-       dbf_timeout =
-           (jiffies - req_data->send_fcp_command_task.start_jiffies) / HZ;
-
        ZFCP_LOG_DEBUG("old_fsf_req=%p\n", old_fsf_req);
        if (!old_fsf_req) {
                write_unlock_irqrestore(&adapter->abort_lock, flags);
-               ZFCP_LOG_NORMAL("bug: no old fsf request found\n");
-               ZFCP_LOG_NORMAL("req_data:\n");
-               ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL,
-                             (char *) req_data, sizeof (union zfcp_req_data));
-               ZFCP_LOG_NORMAL("scsi_cmnd:\n");
-               ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL,
-                             (char *) scpnt, sizeof (struct scsi_cmnd));
-               retval = FAILED;
-               strncpy(dbf_result, "##bug:r", ZFCP_ABORT_DBF_LENGTH);
-               goto out;
+               if (fsf_req_ptr)
+                       *fsf_req_ptr = NULL;
+               return SUCCESS;
        }
        old_fsf_req->data.send_fcp_command_task.scsi_cmnd = NULL;
        /* mark old request as being aborted */
@@ -543,83 +490,101 @@ __zfcp_scsi_eh_abort_handler(struct scsi
         * all critical accesses to scsi_req are done.
         */
        write_unlock_irqrestore(&adapter->abort_lock, flags);
+
+       req_flags = (!fsf_req_ptr) ? ZFCP_REQ_AUTO_CLEANUP : 0;
+       new_fsf_req = zfcp_fsf_abort_fcp_command(
+                       (unsigned long) old_fsf_req, adapter, unit, req_flags);
+
        /* call FSF routine which does the abort */
-       new_fsf_req = zfcp_fsf_abort_fcp_command((unsigned long) old_fsf_req,
-                                                adapter, unit, 0);
-       ZFCP_LOG_DEBUG("new_fsf_req=%p\n", new_fsf_req);
        if (!new_fsf_req) {
-               retval = FAILED;
-               ZFCP_LOG_NORMAL("error: initiation of Abort FCP Cmnd "
-                               "failed\n");
-               strncpy(dbf_result, "##nores", ZFCP_ABORT_DBF_LENGTH);
-               goto out;
+               ZFCP_LOG_INFO("error: initiation of Abort FCP Command 
failed\n");
+               if (fsf_req_ptr)
+                       *fsf_req_ptr = NULL;
+               return FAILED;
        }
 
-       /* wait for completion of abort */
-       ZFCP_LOG_DEBUG("waiting for cleanup...\n");
-#if 1
-       /*
-        * FIXME:
-        * copying zfcp_fsf_req_wait_and_cleanup code is not really nice
-        */
-       __wait_event(new_fsf_req->completion_wq,
-                    new_fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
-       status = new_fsf_req->status;
-       dbf_fsf_status = new_fsf_req->qtcb->header.fsf_status;
-       /*
-        * Ralphs special debug load provides timestamps in the FSF
-        * status qualifier. This might be specified later if being
-        * useful for debugging aborts.
-        */
-       dbf_fsf_qual[0] =
-           *(u64 *) & new_fsf_req->qtcb->header.fsf_status_qual.word[0];
-       dbf_fsf_qual[1] =
-           *(u64 *) & new_fsf_req->qtcb->header.fsf_status_qual.word[2];
-       zfcp_fsf_req_free(new_fsf_req);
-#else
-       retval = zfcp_fsf_req_wait_and_cleanup(new_fsf_req,
-                                              ZFCP_UNINTERRUPTIBLE, &status);
-#endif
-       ZFCP_LOG_DEBUG("Waiting for cleanup complete, status=0x%x\n", status);
-       /* status should be valid since signals were not permitted */
-       if (status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) {
-               retval = SUCCESS;
-               strncpy(dbf_result, "##succ", ZFCP_ABORT_DBF_LENGTH);
-       } else if (status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) {
-               retval = SUCCESS;
-               strncpy(dbf_result, "##late2", ZFCP_ABORT_DBF_LENGTH);
-       } else {
-               retval = FAILED;
-               strncpy(dbf_result, "##fail", ZFCP_ABORT_DBF_LENGTH);
-       }
+       if (fsf_req_ptr)
+               *fsf_req_ptr = new_fsf_req;
+       return SUCCESS;
+}
 
- out:
-       debug_event(adapter->abort_dbf, 1, &dbf_scsi_cmnd, sizeof (u64));
-       debug_event(adapter->abort_dbf, 1, &dbf_opcode, ZFCP_ABORT_DBF_LENGTH);
-       debug_event(adapter->abort_dbf, 1, &dbf_wwn, sizeof (wwn_t));
-       debug_event(adapter->abort_dbf, 1, &dbf_fcp_lun, sizeof (fcp_lun_t));
-       debug_event(adapter->abort_dbf, 1, &dbf_retries, sizeof (u64));
-       debug_event(adapter->abort_dbf, 1, &dbf_allowed, sizeof (u64));
-       debug_event(adapter->abort_dbf, 1, &dbf_timeout, sizeof (u64));
-       debug_event(adapter->abort_dbf, 1, &dbf_fsf_req, sizeof (u64));
-       debug_event(adapter->abort_dbf, 1, &dbf_fsf_status, sizeof (u64));
-       debug_event(adapter->abort_dbf, 1, &dbf_fsf_qual[0], sizeof (u64));
-       debug_event(adapter->abort_dbf, 1, &dbf_fsf_qual[1], sizeof (u64));
-       debug_text_event(adapter->abort_dbf, 1, dbf_result);
 
-       spin_lock_irq(scsi_host->host_lock);
+int
+zfcp_scsi_abort_sync(struct scsi_cmnd *scpnt)
+{
+       struct zfcp_fsf_req *fsf_req;
+       int retval;
+
+       retval = zfcp_scsi_abort_async(scpnt, &fsf_req);
+       if (!fsf_req)
+               return retval;
+
+       /* wait for completion of abort */
+       __wait_event(
+               fsf_req->completion_wq,
+               fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
+
+       /* status should be valid since signals were not permitted */
+       if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) {
+               retval = SUCCESS;
+       } else if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) {
+               retval = SUCCESS;
+       } else {
+               retval = FAILED;
+       }
+  
+       zfcp_fsf_req_free(fsf_req);
+
        return retval;
 }
 
+/**
+ * zfcp_scsi_eh_abort_handler - abort the specified SCSI command
+ * @scpnt: pointer to scsi_cmnd to be aborted 
+ * Return: SUCCESS - command has been aborted and cleaned up in internal
+ *          bookkeeping, SCSI stack won't be called for aborted command
+ *         FAILED - otherwise
+ *
+ * We do not need to care for a SCSI command which completes normally
+ * but late during this abort routine runs.  We are allowed to return
+ * late commands to the SCSI stack.  It tracks the state of commands and
+ * will handle late commands.  (Usually, the normal completion of late
+ * commands is ignored with respect to the running abort operation.)
+ */
 int
 zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
 {
-       int rc;
-       struct Scsi_Host *scsi_host = scpnt->device->host;
-       spin_lock_irq(scsi_host->host_lock);
-       rc = __zfcp_scsi_eh_abort_handler(scpnt);
-       spin_unlock_irq(scsi_host->host_lock);
-       return rc;
+       struct Scsi_Host *host = scpnt->device->host;
+       struct zfcp_adapter *adapter = (struct zfcp_adapter *) 
host->hostdata[0];
+       int retval;
+
+       ZFCP_LOG_INFO("aborting scsi_cmnd %p on adapter %s\n",
+                     scpnt, zfcp_get_busid_by_adapter(adapter));
+
+       retval = zfcp_scsi_abort_sync(scpnt);
+  
+       return retval;
+}
+  
+/**
+ * zfcp_scsi_eh_timed_out - handle timed out SCSI command
+ * @scsi_cmnd: pointer to scsi command which timed out
+ * Return: EH_HANDLED - to notify SCSI layer that we would never call
+ *          scsi_done() for that command
+ */
+enum scsi_eh_timer_return
+zfcp_scsi_eh_timed_out(struct scsi_cmnd *scpnt)
+{
+       struct Scsi_Host *host = scpnt->device->host;
+       struct zfcp_adapter *adapter = (struct zfcp_adapter *) 
host->hostdata[0];
+
+       ZFCP_LOG_INFO("scsi_cmnd %p on adapter %s timed out\n",
+                     scpnt, zfcp_get_busid_by_adapter(adapter));
+
+        set_host_byte(&scpnt->result, DID_NO_CONNECT);
+       zfcp_scsi_abort_async(scpnt, NULL);
+
+       return EH_HANDLED;
 }
 
 /*
-
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to