On Tue, 2007-01-09 at 12:55 -0800, Petr Vandrovec wrote:
> Adam Zimman wrote:
> > Adding VMware engineering... 
> > 
> > -----Original Message-----
> > From: Manon Goo [mailto:[EMAIL PROTECTED] 
> > Sent: Tuesday, January 09, 2007 9:49 AM
> > To: Michael Reed; Moore, Eric; David Berghoff
> > Cc: James Bottomley; Adam Zimman; linux-scsi@vger.kernel.org; Shirron, 
> > Stephen
> > Subject: Re: [PATCH 2/5] fusion: vmware bug fix prevent inifinite retries
> > 
> > Hmm .... why don't w make the whole thing configurable (david implemented 
> > this for us)
> 
> In that case I would prefer going on with automatic detection of our 
> implementation - see first part of attached mpt-patch.diff I offered 
> when this all started.  As passing options to modules loaded by initrd 
> is not exactly trivial and varies across distributions, I would prefer 
> this runtime detection...
>                               Thanks,
>                                       Petr Vandrovec
> 

The attached (untested) patch shows a VMware and scsi transport agnostic
approach which introduces a new host status (DID_QUALIFIED_REQUEUE) to
be used by mptscsih.c (and other LLDs) instead of DID_BUS_BUSY.  A host
status of DID_QUALIFIED_REQUEUE will return ADD_TO_MLQUEUE from
scsi_decide_disposition IFF the REQ_FAILFAST bit is not set in the
cmd_flags field of the SCSI command's request structure.

The approach depends on both VMware Linux guests not setting
REQ_FAILFAST and non-VMware Linux hosts with an IBM RDAC/MPP multi-
pathing driver doing so.  This requirement is not a problem for VMware
since its guest operating systems have no need to configure block device
multi-pathing.  This requirement shouldn't be a problem for the IBM
RDAC/MPP driver either since it should already be setting the
REQ_FAILFAST attribute of I/Os for which it is providing multi-pathing,
similar to what the Linux dm-multipath driver already does.

Ed Goggin

> > 
> > +/*
> > + *  cmd line parameters
> > + */
> > +static int mpt_mpi_busy;
> > +module_param(mpt_mpi_busy, int, 0);
> > +MODULE_PARM_DESC(mpt_mpi_busy, " MPT MPI busy workaround for VMWare ESX 
> > (default=0)");
> > +
> >  
> > /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
> > 
> >  typedef struct _BIG_SENSE_BUF {
> > @@ -704,10 +711,13 @@
> >                         sc->resid=0;
> >                 case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:        /* 0x0040 */
> >                 case MPI_IOCSTATUS_SUCCESS:                     /* 0x0000 */
> > -                       if (scsi_status == MPI_SCSI_STATUS_BUSY)
> > +                       if ((scsi_status == MPI_SCSI_STATUS_BUSY) && 
> > !mpt_mpi_busy)
> >                                 sc->result = (DID_BUS_BUSY << 16) | 
> > scsi_status;
> > -                       else
> > +                       else {
> > +                                if (mpt_mpi_busy)
> > +                                        printk(KERN_INFO "MPT MPI ESX busy 
> > hack enabled ... waiting\n");
> >                                 sc->result = (DID_OK << 16) | scsi_status;
> > +                        }
> >                         if (scsi_state == 0) {
> >                                 ;
> >                         } else if (scsi_state & 
> > MPI_SCSI_STATE_AUTOSENSE_VALID) {
> plain text document attachment (mpt-patch.diff)
> Patch to fix timeouts during multi-path failover with LSILogic adapter on 
> RedHat 4.0 Update 3 & 4.
> Patch is needed for other distributions as well - for example current Linus's 
> 2.6.19 kernel needs it.
> 
> 
> diff -urN linux-2.6.9.orig/drivers/message/fusion/mptbase.c 
> linux-2.6.9/drivers/message/fusion/mptbase.c
> --- linux-2.6.9.orig/drivers/message/fusion/mptbase.c 2006-12-15 
> 11:13:47.000000000 -0800
> +++ linux-2.6.9/drivers/message/fusion/mptbase.c      2006-12-15 
> 11:43:01.000000000 -0800
> @@ -1435,12 +1435,21 @@
>               ioc->bus_type = SPI;
>               /* 1030 Chip Fix. Disable Split transactions
>                * for PCIX. Set MOST bits to zero if Rev < C0( = 8).
> +              *
> +              * Also detect VMware's LSILogic emulation - it does
> +              * not have PCI-X capability at offset 0x68 (and does
> +              * not need disabling split transactions although it
> +              * reports itself as revision 1).
>                */
>               pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
>               if (revision < C0_1030) {
> -                     pci_read_config_byte(pdev, 0x6a, &pcixcmd);
> -                     pcixcmd &= 0x8F;
> -                     pci_write_config_byte(pdev, 0x6a, pcixcmd);
> +                     if (pci_find_capability(pdev, PCI_CAP_ID_PCIX) == 0x68) 
> {
> +                             pci_read_config_byte(pdev, 0x6a, &pcixcmd);
> +                             pcixcmd &= 0x8F;
> +                             pci_write_config_byte(pdev, 0x6a, pcixcmd);
> +                     } else {
> +                             ioc->isVMware = 1;
> +                     }
>               }
>       }
>       else if (pdev->device == MPI_MANUFACTPAGE_DEVID_1030_53C1035) {
> diff -urN linux-2.6.9.orig/drivers/message/fusion/mptbase.h 
> linux-2.6.9/drivers/message/fusion/mptbase.h
> --- linux-2.6.9.orig/drivers/message/fusion/mptbase.h 2006-12-15 
> 11:13:47.000000000 -0800
> +++ linux-2.6.9/drivers/message/fusion/mptbase.h      2006-12-15 
> 11:40:47.000000000 -0800
> @@ -673,7 +673,8 @@
>       u8                       upload_fw;     /* If set, do a fw upload */
>       u8                       reload_fw;     /* Force a FW Reload on next 
> reset */
>       u8                       NBShiftFactor;  /* NB Shift Factor based on 
> Block Size (Facts)  */
> -     u8                       pad1[4];
> +     u8                       isVMware;
> +     u8                       pad1[3];
>       int                      DoneCtx;
>       int                      TaskCtx;
>       int                      InternalCtx;
> diff -urN linux-2.6.9.orig/drivers/message/fusion/mptscsi.c 
> linux-2.6.9/drivers/message/fusion/mptscsi.c
> --- linux-2.6.9.orig/drivers/message/fusion/mptscsi.c 2006-12-15 
> 11:13:47.000000000 -0800
> +++ linux-2.6.9/drivers/message/fusion/mptscsi.c      2006-12-15 
> 11:49:18.000000000 -0800
> @@ -773,7 +773,12 @@
>                       sc->resid=0;
>               case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:        /* 0x0040 */
>               case MPI_IOCSTATUS_SUCCESS:                     /* 0x0000 */
> -                     if (scsi_status == MPI_SCSI_STATUS_BUSY)
> +                     /*
> +                      * In the case of emulated adapter busy status may be 
> reported 
> +                      * for minutes when storage path switch occurs in the 
> firmware.
> +                      * We definitely do not want to give up after standard 
> timeout.
> +                      */
> +                     if (scsi_status == MPI_SCSI_STATUS_BUSY && 
> !ioc->isVMware)
>                               sc->result = (DID_BUS_BUSY << 16) | scsi_status;
>                       else
>                               sc->result = (DID_OK << 16) | scsi_status;
> @@ -810,6 +815,7 @@
>                                * Not real sure here either so do nothing...  
> */
>                       }
>  
> +                     /* Perhaps this wanted to test scsi_status and not 
> sc->result? */
>                       if (sc->result == MPI_SCSI_STATUS_TASK_SET_FULL)
>                               mptscsih_report_queue_full(sc, pScsiReply, 
> pScsiReq);
>  
diff -ru linux-2.6.20-rc3/drivers/message/fusion/mptscsih.c linux-2.6.20-rc3.vmware_patch/drivers/message/fusion/mptscsih.c
--- linux-2.6.20-rc3/drivers/message/fusion/mptscsih.c	2007-01-09 15:10:28.000019000 -0500
+++ linux-2.6.20-rc3.vmware_patch/drivers/message/fusion/mptscsih.c	2007-01-09 15:46:26.000105000 -0500
@@ -769,7 +769,7 @@
 		case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:	/* 0x0040 */
 		case MPI_IOCSTATUS_SUCCESS:			/* 0x0000 */
 			if (scsi_status == MPI_SCSI_STATUS_BUSY)
-				sc->result = (DID_BUS_BUSY << 16) | scsi_status;
+				sc->result = (DID_QUALIFIED_REQUEUE << 16) | scsi_status;
 			else
 				sc->result = (DID_OK << 16) | scsi_status;
 			if (scsi_state == 0) {
diff -ru linux-2.6.20-rc3/drivers/scsi/scsi_error.c linux-2.6.20-rc3.vmware_patch/drivers/scsi/scsi_error.c
--- linux-2.6.20-rc3/drivers/scsi/scsi_error.c	2007-01-09 14:40:16.000022000 -0500
+++ linux-2.6.20-rc3.vmware_patch/drivers/scsi/scsi_error.c	2007-01-09 15:55:48.000051000 -0500
@@ -1216,6 +1216,14 @@
 	case DID_IMM_RETRY:
 		return NEEDS_RETRY;
 
+	case DID_QUALIFIED_REQUEUE:
+		/*
+		 * Return immediately w/o requeue if the request 
+		 * indicates no retry.
+		 */
+		if (blk_noretry_request(scmd->request)) {
+			return SUCCESS;
+		}
 	case DID_REQUEUE:
 		return ADD_TO_MLQUEUE;
 
diff -ru linux-2.6.20-rc3/include/scsi/scsi.h linux-2.6.20-rc3.vmware_patch/include/scsi/scsi.h
--- linux-2.6.20-rc3/include/scsi/scsi.h	2007-01-09 15:07:30.000009000 -0500
+++ linux-2.6.20-rc3.vmware_patch/include/scsi/scsi.h	2007-01-09 15:45:41.000075000 -0500
@@ -309,6 +309,8 @@
 #define DID_IMM_RETRY   0x0c	/* Retry without decrementing retry count  */
 #define DID_REQUEUE	0x0d	/* Requeue command (no immediate retry) also
 				 * without decrementing the retry count	   */
+#define DID_QUALIFIED_REQUEUE 0x0e /* Requeue cmd w/o decr of retry count IFF
+				 * not blk_noretry_request		   */
 #define DRIVER_OK       0x00	/* Driver status                           */
 
 /*

Reply via email to