Dear Benoît Thébaudeau,

> This patch takes advantage of the hardware EHCI qTD queuing mechanism to
> avoid software overhead and to make transfers as fast as possible.
> 
> The only drawback is a call to memalign. However, this is fast compared to
> the transfer timings, and the heap size to allocate is small, e.g. a
> little bit more than 100 kB for a transfer length of 65535 packets of 512
> bytes.
> 
> Tested on i.MX25 and i.MX35. In my test conditions, the speedup was about
> 15x using page-aligned buffers, which is really appreciable when accessing
> large files.
> 
> Signed-off-by: Benoît Thébaudeau <benoit.thebaud...@advansee.com>
> Cc: Marek Vasut <ma...@denx.de>
> Cc: Ilya Yanok <ilya.ya...@cogentembedded.com>
> Cc: Stefan Herbrechtsmeier <ste...@herbrechtsmeier.net>
> ---
>  .../drivers/usb/host/ehci-hcd.c                    |   94
> ++++++++++++++------ 1 file changed, 65 insertions(+), 29 deletions(-)
> 
> diff --git u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c
> u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c index 5b3b906..b5645fa
> 100644
> --- u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c
> +++ u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c
> @@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long
> pipe, void *buffer, int length, struct devrequest *req)
>  {
>       ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN);
> -     ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN);
> +     struct qTD *qtd;
> +     int qtd_count = 0;
>       int qtd_counter = 0;
> 
>       volatile struct qTD *vtd;
> @@ -229,8 +230,25 @@ ehci_submit_async(struct usb_device *dev, unsigned
> long pipe, void *buffer, le16_to_cpu(req->value), le16_to_cpu(req->value),
>                     le16_to_cpu(req->index));
> 
> +     if (req != NULL)                        /* SETUP + ACK */
> +             qtd_count += 1 + 1;
> +     if (length > 0 || req == NULL) {        /* buffer */
> +             if ((uint32_t)buffer & 4095)            /* page-unaligned */
> +                     qtd_count += (((uint32_t)buffer & 4095) + length +
> +                                     (QT_BUFFER_CNT - 1) * 4096 - 1) /
> +                                             ((QT_BUFFER_CNT - 1) * 4096);

Ok, maybe you can please elaborate on this crazy calculation in here? Or 
somehow 
clarify it? Also, won't the macros in include/common.h help in a way? (like 
ROUND() etc).

I don't really graps what you're trying to calculate here, so maybe even a 
comment would help.

> +             else                                    /* page-aligned */
> +                     qtd_count += (length + QT_BUFFER_CNT * 4096 - 1) /
> +                                     (QT_BUFFER_CNT * 4096);

Same here, also please avoid using those 4096 and such constants ... maybe 
#define them in ehci.h ?

> +     }
> +     qtd = memalign(USB_DMA_MINALIGN, qtd_count * sizeof(struct qTD));

So your code can alloc more than 5 qTDs ? How does it chain them then? Into 
more 
QHs ?

> +     if (qtd == NULL) {
> +             printf("unable to allocate TDs\n");
> +             return -1;
> +     }
> +
>       memset(qh, 0, sizeof(struct QH));
> -     memset(qtd, 0, 3 * sizeof(*qtd));
> +     memset(qtd, 0, qtd_count * sizeof(*qtd));
> 
>       toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
> 
> @@ -291,31 +309,46 @@ ehci_submit_async(struct usb_device *dev, unsigned
> long pipe, void *buffer, }
> 
>       if (length > 0 || req == NULL) {
> -             /*
> -              * Setup request qTD (3.5 in ehci-r10.pdf)
> -              *
> -              *   qt_next ................ 03-00 H
> -              *   qt_altnext ............. 07-04 H
> -              *   qt_token ............... 0B-08 H
> -              *
> -              *   [ buffer, buffer_hi ] loaded with "buffer".
> -              */
> -             qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
> -             qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE);
> -             token = (toggle << 31) |
> -                 (length << 16) |
> -                 ((req == NULL ? 1 : 0) << 15) |
> -                 (0 << 12) |
> -                 (3 << 10) |
> -                 ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0);
> -             qtd[qtd_counter].qt_token = cpu_to_hc32(token);
> -             if (ehci_td_buffer(&qtd[qtd_counter], buffer, length) != 0) {
> -                     printf("unable construct DATA td\n");
> -                     goto fail;
> -             }
> -             /* Update previous qTD! */
> -             *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]);
> -             tdp = &qtd[qtd_counter++].qt_next;
> +             uint8_t *buf_ptr = buffer;
> +             int left_length = length;
> +
> +             do {
> +                     int xfr_bytes = min(left_length,
> +                                         (QT_BUFFER_CNT * 4096 -
> +                                          ((uint32_t)buf_ptr & 4095)) &
> +                                         ~4095);

Magic formula yet again ... comment would again be welcome please.

> +                     /*
> +                      * Setup request qTD (3.5 in ehci-r10.pdf)
> +                      *
> +                      *   qt_next ................ 03-00 H
> +                      *   qt_altnext ............. 07-04 H
> +                      *   qt_token ............... 0B-08 H
> +                      *
> +                      *   [ buffer, buffer_hi ] loaded with "buffer".
> +                      */
> +                     qtd[qtd_counter].qt_next =
> +                                     cpu_to_hc32(QT_NEXT_TERMINATE);
> +                     qtd[qtd_counter].qt_altnext =
> +                                     cpu_to_hc32(QT_NEXT_TERMINATE);
> +                     token = (toggle << 31) |
> +                         (xfr_bytes << 16) |
> +                         ((req == NULL ? 1 : 0) << 15) |
> +                         (0 << 12) |
> +                         (3 << 10) |
> +                         ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0);

If you could fix all this magic afterwards (not in these patches), that'd be 
great.

> +                     qtd[qtd_counter].qt_token = cpu_to_hc32(token);
> +                     if (ehci_td_buffer(&qtd[qtd_counter], buf_ptr,
> +                                             xfr_bytes) != 0) {
> +                             printf("unable construct DATA td\n");
> +                             goto fail;
> +                     }
> +                     /* Update previous qTD! */
> +                     *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]);
> +                     tdp = &qtd[qtd_counter++].qt_next;
> +                     buf_ptr += xfr_bytes;
> +                     left_length -= xfr_bytes;
> +             } while (left_length > 0);
>       }
> 
>       if (req != NULL) {
> @@ -346,7 +379,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long
> pipe, void *buffer, flush_dcache_range((uint32_t)qh_list,
>               ALIGN_END_ADDR(struct QH, qh_list, 1));
>       flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1));
> -     flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd, 3));
> +     flush_dcache_range((uint32_t)qtd,
> +                        ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
> 
>       /* Set async. queue head pointer. */
>       ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list);
> @@ -377,7 +411,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long
> pipe, void *buffer, invalidate_dcache_range((uint32_t)qh,
>                       ALIGN_END_ADDR(struct QH, qh, 1));
>               invalidate_dcache_range((uint32_t)qtd,
> -                     ALIGN_END_ADDR(struct qTD, qtd, 3));
> +                     ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
> 
>               token = hc32_to_cpu(vtd->qt_token);
>               if (!(token & 0x80))
> @@ -450,9 +484,11 @@ ehci_submit_async(struct usb_device *dev, unsigned
> long pipe, void *buffer, ehci_readl(&hcor->or_portsc[1]));
>       }
> 
> +     free(qtd);
>       return (dev->status != USB_ST_NOT_PROC) ? 0 : -1;
> 
>  fail:
> +     free(qtd);
>       return -1;
>  }
_______________________________________________
U-Boot mailing list
U-Boot@lists.denx.de
http://lists.denx.de/mailman/listinfo/u-boot

Reply via email to