Hi,

I discovered something when I was looking for some hangs (timeouts):

1.
There is problem in used toggling system on bulk transfers - in case of
some errors (underrun, overrun etc.) it missed synchronization with
device, because it expects that all TDs will be processed - and in error
case it is usually not true.
I made some simple workaround (only for OHCI yet) which looks working -
see patch (patch contains also some another things, mainly unrecoverable
error detection and related reset of OHCI and some change in error
handling in usbms_transfer).

2.
I discovered some curious bug on one of my devices. It looks to be
little bit similar to this (which is unfortunately not solved, at least
in this mail list...):
http://www.mail-archive.com/linux-usb-us...@lists.sourceforge.net/msg16836.html
Symptoms:
Device is not able to send more then 1kb in one transfer. If grub
requests usual 4kb data block, device hangs together with OHCI when
first 1kb data is sent - there is no NAK, no STALL nor another error,
OHCI or device (or both together) simply hangs the bulk IN endpoint -
OHCI is waiting for the end of the IN transfer which never happened...
There is no reaction to CLEAR STALL sent to IN endpoint.
Device and OHCI leave this state when Bulk-only reset device procedure
is used.
It looks like there is no way how to make OHCI&device do longer
transfer.

My first idea for workaround - to do in scsi read/write functions
splitting of large data block into parts of device sector size or simply
of grub sector size.
Any other better idea ?

Best regards
Ales
diff -urB ./grub/bus/usb/ohci.c ./grub_toggle/bus/usb/ohci.c
--- ./grub/bus/usb/ohci.c	2010-05-25 20:06:40.000000000 +0200
+++ ./grub_toggle/bus/usb/ohci.c	2010-05-25 08:02:44.000000000 +0200
@@ -395,9 +400,11 @@
   grub_usb_err_t err;
   grub_uint8_t errcode = 0;
   grub_ohci_td_t tderr = NULL;
-  int i, j;
+  int i;
   grub_uint64_t maxtime;
   int err_timeout = 0;
+  int err_unrec = 0;
+  grub_uint32_t intstatus;
 
   /* Allocate an Endpoint Descriptor.  */
   ed = grub_memalign (16, sizeof (*ed));
@@ -530,27 +537,27 @@
     }
 
   grub_dprintf ("ohci", "wait for completion\n");
-  grub_dprintf ("ohci", "control=0x%02x status=0x%02x\n",
+  grub_dprintf ("ohci", "begin: control=0x%02x status=0x%02x\n\t\t intstatus=0x%02x\n",
   grub_ohci_readreg32 (o, GRUB_OHCI_REG_CONTROL),
-  grub_ohci_readreg32 (o, GRUB_OHCI_REG_CMDSTATUS));
+  grub_ohci_readreg32 (o, GRUB_OHCI_REG_CMDSTATUS),
+  grub_ohci_readreg32 (o, GRUB_OHCI_REG_INTSTATUS));
 
   /* Starting time for timeout - feel free to change the value,
    * I have no idea about correct value ...
    * It is workaround only because it looks like my OHCI does not
-   * inicate properly STALL or NAK (or both...) - or there is
+   * indicate properly STALL or NAK (or both...) - or there is
    * some mistake somewhere... */
   maxtime = grub_get_time_ms () + 1000;
 	
   /* Wait until the transfer is completed or STALLs.  */
   do
     {
-      grub_cpu_idle ();
-
       /* Detected a HALT.  */
       if (grub_le_to_cpu32 (ed->td_head) & 1)
         break;
   
-      if ((grub_ohci_readreg32 (o, GRUB_OHCI_REG_INTSTATUS) & 0x2) != 0)
+      intstatus = grub_ohci_readreg32 (o, GRUB_OHCI_REG_INTSTATUS);
+      if ((intstatus & 0x2) != 0)
         {
           if ((grub_le_to_cpu32 (o->hcca->donehead) & ~0xf)
                == (grub_uint32_t) &td_list[transfer->transcnt - 1])
@@ -568,6 +575,13 @@
           grub_ohci_writereg32 (o, GRUB_OHCI_REG_INTSTATUS, (1 << 1));
           continue;
         }
+
+      if ((intstatus & 0x10) != 0)
+        { /* Unrecoverable error - only reset can help...! */
+          err_unrec = 1;
+          break;
+        }
+
       /* Timeout ? */
       if (grub_get_time_ms () > maxtime)
       	{
@@ -577,9 +591,54 @@
       	  err_timeout = 1;
       	  break;
       	}
+
+      grub_cpu_idle ();
     }
   while (1);
+  
+  grub_dprintf ("ohci", "end: control=0x%02x status=0x%02x\n\t\t intstatus=0x%02x\n",
+  grub_ohci_readreg32 (o, GRUB_OHCI_REG_CONTROL),
+  grub_ohci_readreg32 (o, GRUB_OHCI_REG_CMDSTATUS),
+  grub_ohci_readreg32 (o, GRUB_OHCI_REG_INTSTATUS));
+
+  tderr = (grub_ohci_td_t)
+            (grub_le_to_cpu32 (o->hcca->donehead) & ~0xf);
 
+  if (tderr == 0)
+    {
+      /* If hcca->donehead==0 it means that something wrong happened,
+       * it could be:
+       * - timeout
+       * - some error
+       * - something unexpected... */
+      /* Try look into DONEHEAD reg. */
+      grub_dprintf("ohci", "HCCA DoneHead is zero, something is bad!\n");
+      tderr = (grub_ohci_td_t)
+                (grub_ohci_readreg32 (o, GRUB_OHCI_REG_DONEHEAD) & ~0xf);
+    }
+  /* Zero can be in case when timeout or unrecoverable error occured */
+  if ((tderr == 0) && (err_timeout || err_unrec))
+    tderr = &td_list[0]; /* No TD was probably executed, take first */
+  
+  /* Remember last processed transaction (TD) - it is necessary for
+   * proper setting of toggle bit in next transaction. */
+  transfer->last_trans = ((grub_uint32_t)tderr - (grub_uint32_t)td_list) / sizeof (*td_list);
+  /* The expression above maybe will not work on Yeeloong because
+   * of DMA memory mapping. (?) I.e., maybe we will need to have some
+   * additional informations in TDs. */
+  /* Check correct value in last_trans */
+  if (transfer->last_trans >= transfer->transcnt)
+    {
+      grub_dprintf("ohci", "Bad index of last processed transaction!\n");
+      grub_dprintf("ohci", "Something very very bad happened!\n");
+      grub_dprintf("ohci", "tderr=%p, td_list=%p,\n\t\t last_trans=%d, transcnt=%d\n",
+                   tderr, td_list, transfer->last_trans, transfer->transcnt);
+      /* We should set something valid... */
+      transfer->last_trans = 0; /* Can we do something better ? */
+      tderr = &td_list[0]; 
+    }
+
+  /* In case of timeout do not detect error from TD */    
   if (err_timeout)
     {
       err = GRUB_ERR_TIMEOUT;
@@ -590,15 +649,19 @@
       grub_le_to_cpu32(ed->next_ed));
     }
   		
+  /* In case of unrecoverable error do not detect error from TD */    
+  else if (err_unrec)
+    {
+      err = GRUB_USB_ERR_UNRECOVERABLE;
+      grub_dprintf("ohci", "Unrecoverable error, target=%08x, head=%08x\n\t\ttail=%08x, next=%08x\n",
+      grub_le_to_cpu32(ed->target),
+      grub_le_to_cpu32(ed->td_head),
+      grub_le_to_cpu32(ed->td_tail),
+      grub_le_to_cpu32(ed->next_ed));
+    }
+  		
   else if (grub_le_to_cpu32 (ed->td_head) & 1)
     {
-      tderr = (grub_ohci_td_t)
-                (grub_ohci_readreg32 (o, GRUB_OHCI_REG_DONEHEAD) & ~0xf);
-      if (tderr == 0)
-        /* If DONEHEAD==0 it means that correct address is in HCCA.
-         * It should be always now! */
-        tderr = (grub_ohci_td_t) (grub_le_to_cpu32 (o->hcca->donehead) & ~0xf);
-
       errcode = grub_le_to_cpu32 (tderr->token) >> 28;
       
       switch (errcode)
@@ -707,7 +770,8 @@
   /* SF bit reset. (SF bit indicates Start Of Frame (SOF) packet) */
   grub_ohci_writereg32 (o, GRUB_OHCI_REG_INTSTATUS, (1<<2));
   /* Wait for new SOF */
-  while ((grub_ohci_readreg32 (o, GRUB_OHCI_REG_INTSTATUS) & 0x4) == 0);
+  while (((grub_ohci_readreg32 (o, GRUB_OHCI_REG_INTSTATUS) & 0x4) == 0)
+         && !err_unrec);
   /* Now it should be safe to change CONTROL and BULK lists. */
   
   /* Important cleaning. */
@@ -718,6 +782,28 @@
   grub_ohci_writereg32 (o, GRUB_OHCI_REG_BULKHEAD, 0);
   grub_ohci_writereg32 (o, GRUB_OHCI_REG_BULKCURR, 0);
 
+  if (err_unrec)
+    {
+      /* Do OHCI reset in case of unrecoverable error - maybe we will need
+       * do more - re-enumerate bus etc. (?) */
+
+      /* Suspend the OHCI by issuing a reset.  */
+      grub_ohci_writereg32 (o, GRUB_OHCI_REG_CMDSTATUS, 1); /* XXX: Magic.  */
+      grub_millisleep (1);
+      grub_dprintf ("ohci", "Unrecoverable error - OHCI reset\n");
+
+      /* Misc. resets. */
+      o->hcca->donehead = 0;
+      grub_ohci_writereg32 (o, GRUB_OHCI_REG_INTSTATUS, 0x7f); /* Clears everything */
+      grub_ohci_writereg32 (o, GRUB_OHCI_REG_CONTROLHEAD, 0);
+      grub_ohci_writereg32 (o, GRUB_OHCI_REG_CONTROLCURR, 0);
+      grub_ohci_writereg32 (o, GRUB_OHCI_REG_BULKHEAD, 0);
+      grub_ohci_writereg32 (o, GRUB_OHCI_REG_BULKCURR, 0);
+
+      /* Enable the OHCI.  */
+      grub_ohci_writereg32 (o, GRUB_OHCI_REG_CONTROL, (2 << 6));
+    }
+  
   grub_dprintf ("ohci", "OHCI finished, freeing, err=0x%02x (OHCI errcode=0x%02x)\n",
                 err, errcode);
   
diff -urB ./grub/bus/usb/usbtrans.c ./grub_toggle/bus/usb/usbtrans.c
--- ./grub/bus/usb/usbtrans.c	2010-05-25 20:06:40.000000000 +0200
+++ ./grub_toggle/bus/usb/usbtrans.c	2010-05-24 22:26:54.000000000 +0200
@@ -167,6 +167,7 @@
   transfer->type = GRUB_USB_TRANSACTION_TYPE_BULK;
   transfer->max = max;
   transfer->dev = dev;
+  transfer->last_trans = 0; /* Reset index of last processed transaction (TD) */
 
   /* Allocate an array of transfer data structures.  */
   transfer->transactions = grub_malloc (transfer->transcnt
@@ -193,6 +194,10 @@
     }
 
   err = dev->controller.dev->transfer (&dev->controller, transfer);
+  /* We must remember proper toggle value even if some transactions
+   * were not processed - correct value should be inversion of last
+   * processed transaction (TD). */
+  toggle = transfer->transactions[transfer->last_trans].toggle ? 0 : 1;
   grub_dprintf ("usb", "toggle=%d\n", toggle);
   dev->toggle[endpoint] = toggle;
 
diff -urB ./grub/disk/usbms.c ./grub_toggle/disk/usbms.c
--- ./grub/disk/usbms.c	2010-05-25 20:06:40.000000000 +0200
+++ ./grub_toggle/disk/usbms.c	2010-05-24 20:38:23.000000000 +0200
@@ -238,6 +239,7 @@
   struct grub_usbms_csw status;
   static grub_uint32_t tag = 0;
   grub_usb_err_t err = GRUB_USB_ERR_NONE;
+  grub_usb_err_t errCSW = GRUB_USB_ERR_NONE;
   int retrycnt = 3 + 1;
   grub_size_t i;
 
@@ -275,9 +277,8 @@
     {
       if (err == GRUB_USB_ERR_STALL)
 	{
-	  grub_usb_clear_halt (dev->dev, dev->in->endp_addr);
 	  grub_usb_clear_halt (dev->dev, dev->out->endp_addr);
-	  goto retry;
+	  goto CheckCSW;
 	}
       return grub_error (GRUB_ERR_IO, "USB Mass Storage request failed");
     }
@@ -287,7 +288,12 @@
     {
       err = grub_usb_bulk_read (dev->dev, dev->in->endp_addr, size, buf);
       grub_dprintf ("usb", "read: %d %d\n", err, GRUB_USB_ERR_STALL); 
-      if (err) goto CheckCSW;
+      if (err)
+        {
+          if (err == GRUB_USB_ERR_STALL)
+	    grub_usb_clear_halt (dev->dev, dev->in->endp_addr);
+          goto CheckCSW;
+        }
       /* Debug print of received data. */
       grub_dprintf ("usb", "buf:\n");
       if (size <= 64)
@@ -301,6 +307,12 @@
       err = grub_usb_bulk_write (dev->dev, dev->out->endp_addr, size, buf);
       grub_dprintf ("usb", "write: %d %d\n", err, GRUB_USB_ERR_STALL);
       grub_dprintf ("usb", "buf:\n");
+      if (err)
+        {
+          if (err == GRUB_USB_ERR_STALL)
+	    grub_usb_clear_halt (dev->dev, dev->out->endp_addr);
+          goto CheckCSW;
+        }
       /* Debug print of sent data. */
       if (size <= 256)
         for (i=0; i<size; i++)
@@ -311,15 +323,16 @@
 
   /* Read the status - (maybe) according to specification.  */
 CheckCSW:
-  err = grub_usb_bulk_read (dev->dev, dev->in->endp_addr,
+  errCSW = grub_usb_bulk_read (dev->dev, dev->in->endp_addr,
 		    sizeof (status), (char *) &status);
-  if (err)
+  if (errCSW)
     {
       grub_usb_clear_halt (dev->dev, dev->in->endp_addr);
-      err = grub_usb_bulk_read (dev->dev, dev->in->endp_addr,
+      errCSW = grub_usb_bulk_read (dev->dev, dev->in->endp_addr,
 			        sizeof (status), (char *) &status);
-      if (err)
+      if (errCSW)
         { /* Bulk-only reset device. */
+          grub_dprintf ("usb", "Bulk-only reset device - errCSW\n");
           grub_usbms_reset (dev->dev, dev->interface);
           grub_usb_clear_halt (dev->dev, dev->in->endp_addr);
           grub_usb_clear_halt (dev->dev, dev->out->endp_addr);
@@ -332,9 +345,11 @@
   	status.signature, status.tag, status.residue);
   grub_dprintf ("usb", "CSW: status=0x%02x\n", status.status);
   
-  /* If phase error, do bulk-only reset device. */
-  if (status.status == 2)
-    {
+  /* If phase error or not valid signature, do bulk-only reset device. */
+  if ((status.status == 2) ||
+      (status.signature != grub_cpu_to_le32(0x53425355)))
+    { /* Bulk-only reset device. */
+      grub_dprintf ("usb", "Bulk-only reset device - bad status\n");
       grub_usbms_reset (dev->dev, dev->interface);
       grub_usb_clear_halt (dev->dev, dev->in->endp_addr);
       grub_usb_clear_halt (dev->dev, dev->out->endp_addr);
@@ -342,9 +356,13 @@
       goto retry;
     }
 
-  if (status.status)
+  /* If "command failed" status or data transfer failed -> error */
+  if ((status.status || err) && !read_write)
     return grub_error (GRUB_ERR_READ_ERROR,
 		       "error communication with USB Mass Storage device");
+  else if ((status.status || err) && read_write)
+    return grub_error (GRUB_ERR_WRITE_ERROR,
+		       "error communication with USB Mass Storage device");
 
   return GRUB_ERR_NONE;
 }
diff -urB ./grub/include/grub/usb.h ./grub_toggle/include/grub/usb.h
--- ./grub/include/grub/usb.h	2010-05-25 20:06:40.000000000 +0200
+++ ./grub_toggle/include/grub/usb.h	2010-05-24 19:49:11.000000000 +0200
@@ -35,7 +35,8 @@
     GRUB_USB_ERR_NAK,
     GRUB_USB_ERR_BABBLE,
     GRUB_USB_ERR_TIMEOUT,
-    GRUB_USB_ERR_BITSTUFF
+    GRUB_USB_ERR_BITSTUFF,
+    GRUB_USB_ERR_UNRECOVERABLE
   } grub_usb_err_t;
 
 typedef enum
diff -urB ./grub/include/grub/usbtrans.h ./grub_toggle/include/grub/usbtrans.h
--- ./grub/include/grub/usbtrans.h	2010-05-25 20:06:40.000000000 +0200
+++ ./grub_toggle/include/grub/usbtrans.h	2010-05-24 19:52:48.000000000 +0200
@@ -58,6 +58,9 @@
   struct grub_usb_device *dev;
 
   struct grub_usb_transaction *transactions;
+  
+  int last_trans;
+  /* Index of last processed transaction in OHCI/UHCI driver. */
 };
 typedef struct grub_usb_transfer *grub_usb_transfer_t;
 
_______________________________________________
Grub-devel mailing list
Grub-devel@gnu.org
http://lists.gnu.org/mailman/listinfo/grub-devel

Reply via email to