This patch takes advantage of the hardware EHCI qTD queuing mechanism to avoid
software overhead and to make transfers as fast as possible.

The only drawback is a call to memalign. However, this is fast compared to the
transfer timings, and the heap size to allocate is small, e.g. a little bit more
than 100 kB for a transfer length of 65535 packets of 512 bytes.

Tested on i.MX25 and i.MX35. In my test conditions, the speedup was about 15x
using page-aligned buffers, which is really appreciable when accessing large
files.

Signed-off-by: Benoît Thébaudeau <benoit.thebaud...@advansee.com>
Cc: Marek Vasut <ma...@denx.de>
Cc: Ilya Yanok <ilya.ya...@cogentembedded.com>
Cc: Stefan Herbrechtsmeier <ste...@herbrechtsmeier.net>
---
 .../drivers/usb/host/ehci-hcd.c                    |   94 ++++++++++++++------
 1 file changed, 65 insertions(+), 29 deletions(-)

diff --git u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c 
u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c
index 5b3b906..b5645fa 100644
--- u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c
+++ u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c
@@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long 
pipe, void *buffer,
                   int length, struct devrequest *req)
 {
        ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN);
-       ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN);
+       struct qTD *qtd;
+       int qtd_count = 0;
        int qtd_counter = 0;
 
        volatile struct qTD *vtd;
@@ -229,8 +230,25 @@ ehci_submit_async(struct usb_device *dev, unsigned long 
pipe, void *buffer,
                      le16_to_cpu(req->value), le16_to_cpu(req->value),
                      le16_to_cpu(req->index));
 
+       if (req != NULL)                        /* SETUP + ACK */
+               qtd_count += 1 + 1;
+       if (length > 0 || req == NULL) {        /* buffer */
+               if ((uint32_t)buffer & 4095)            /* page-unaligned */
+                       qtd_count += (((uint32_t)buffer & 4095) + length +
+                                       (QT_BUFFER_CNT - 1) * 4096 - 1) /
+                                               ((QT_BUFFER_CNT - 1) * 4096);
+               else                                    /* page-aligned */
+                       qtd_count += (length + QT_BUFFER_CNT * 4096 - 1) /
+                                       (QT_BUFFER_CNT * 4096);
+       }
+       qtd = memalign(USB_DMA_MINALIGN, qtd_count * sizeof(struct qTD));
+       if (qtd == NULL) {
+               printf("unable to allocate TDs\n");
+               return -1;
+       }
+
        memset(qh, 0, sizeof(struct QH));
-       memset(qtd, 0, 3 * sizeof(*qtd));
+       memset(qtd, 0, qtd_count * sizeof(*qtd));
 
        toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
 
@@ -291,31 +309,46 @@ ehci_submit_async(struct usb_device *dev, unsigned long 
pipe, void *buffer,
        }
 
        if (length > 0 || req == NULL) {
-               /*
-                * Setup request qTD (3.5 in ehci-r10.pdf)
-                *
-                *   qt_next ................ 03-00 H
-                *   qt_altnext ............. 07-04 H
-                *   qt_token ............... 0B-08 H
-                *
-                *   [ buffer, buffer_hi ] loaded with "buffer".
-                */
-               qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
-               qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE);
-               token = (toggle << 31) |
-                   (length << 16) |
-                   ((req == NULL ? 1 : 0) << 15) |
-                   (0 << 12) |
-                   (3 << 10) |
-                   ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0);
-               qtd[qtd_counter].qt_token = cpu_to_hc32(token);
-               if (ehci_td_buffer(&qtd[qtd_counter], buffer, length) != 0) {
-                       printf("unable construct DATA td\n");
-                       goto fail;
-               }
-               /* Update previous qTD! */
-               *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]);
-               tdp = &qtd[qtd_counter++].qt_next;
+               uint8_t *buf_ptr = buffer;
+               int left_length = length;
+
+               do {
+                       int xfr_bytes = min(left_length,
+                                           (QT_BUFFER_CNT * 4096 -
+                                            ((uint32_t)buf_ptr & 4095)) &
+                                           ~4095);
+
+                       /*
+                        * Setup request qTD (3.5 in ehci-r10.pdf)
+                        *
+                        *   qt_next ................ 03-00 H
+                        *   qt_altnext ............. 07-04 H
+                        *   qt_token ............... 0B-08 H
+                        *
+                        *   [ buffer, buffer_hi ] loaded with "buffer".
+                        */
+                       qtd[qtd_counter].qt_next =
+                                       cpu_to_hc32(QT_NEXT_TERMINATE);
+                       qtd[qtd_counter].qt_altnext =
+                                       cpu_to_hc32(QT_NEXT_TERMINATE);
+                       token = (toggle << 31) |
+                           (xfr_bytes << 16) |
+                           ((req == NULL ? 1 : 0) << 15) |
+                           (0 << 12) |
+                           (3 << 10) |
+                           ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0);
+                       qtd[qtd_counter].qt_token = cpu_to_hc32(token);
+                       if (ehci_td_buffer(&qtd[qtd_counter], buf_ptr,
+                                               xfr_bytes) != 0) {
+                               printf("unable construct DATA td\n");
+                               goto fail;
+                       }
+                       /* Update previous qTD! */
+                       *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]);
+                       tdp = &qtd[qtd_counter++].qt_next;
+                       buf_ptr += xfr_bytes;
+                       left_length -= xfr_bytes;
+               } while (left_length > 0);
        }
 
        if (req != NULL) {
@@ -346,7 +379,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long 
pipe, void *buffer,
        flush_dcache_range((uint32_t)qh_list,
                ALIGN_END_ADDR(struct QH, qh_list, 1));
        flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1));
-       flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd, 3));
+       flush_dcache_range((uint32_t)qtd,
+                          ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
 
        /* Set async. queue head pointer. */
        ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list);
@@ -377,7 +411,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long 
pipe, void *buffer,
                invalidate_dcache_range((uint32_t)qh,
                        ALIGN_END_ADDR(struct QH, qh, 1));
                invalidate_dcache_range((uint32_t)qtd,
-                       ALIGN_END_ADDR(struct qTD, qtd, 3));
+                       ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
 
                token = hc32_to_cpu(vtd->qt_token);
                if (!(token & 0x80))
@@ -450,9 +484,11 @@ ehci_submit_async(struct usb_device *dev, unsigned long 
pipe, void *buffer,
                      ehci_readl(&hcor->or_portsc[1]));
        }
 
+       free(qtd);
        return (dev->status != USB_ST_NOT_PROC) ? 0 : -1;
 
 fail:
+       free(qtd);
        return -1;
 }
 
_______________________________________________
U-Boot mailing list
U-Boot@lists.denx.de
http://lists.denx.de/mailman/listinfo/u-boot

Reply via email to