Bingo! :) When I changed the bsqh initialisation, I get speed above 400 KB/s, that makes me feel much better :)
I am sending a corrected patch as attachment. Tomas On Thu, 28 Nov 2002, Ian Dowse wrote: > I had a quick look at the patch - one thing I noticed is that you > are possibly not initialising all of the bsqh fields. In -current, > bsqh->hlink and bsqh->qh.qh_hlink are set up to point at the `lsqh' > QH. It might not be the problem though. > > Ian >
diff -u stable/uhci.c verze3/uhci.c --- stable/uhci.c Wed Nov 6 21:23:50 2002 +++ verze3/uhci.c Thu Nov 28 09:48:45 2002 @@ -79,6 +79,9 @@ #include <dev/usb/uhcireg.h> #include <dev/usb/uhcivar.h> +/* Use bandwidth reclamation for control transfers. Some devices choke on it. */ +/*#define UHCI_CTL_LOOP */ + #if defined(__FreeBSD__) #include <machine/clock.h> @@ -97,22 +100,37 @@ #define DPRINTF(x) if (uhcidebug) printf x #define DPRINTFN(n,x) if (uhcidebug>(n)) printf x int uhcidebug = 0; +int uhcinoloop = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, uhci, CTLFLAG_RW, 0, "USB uhci"); SYSCTL_INT(_hw_usb_uhci, OID_AUTO, debug, CTLFLAG_RW, &uhcidebug, 0, "uhci debug level"); +SYSCTL_INT(_hw_usb_uhci, OID_AUTO, loop, CTLFLAG_RW, + &uhcinoloop, 0, "uhci noloop"); #else #define DPRINTF(x) #define DPRINTFN(n,x) #endif +/* + * Just a workaround, because FreeBSD 4-STABLE doesn't have bswap32 + */ +#if defined(__FreeBSD__) +#define bswap32(x) (((x << 24) & 0xff000000 ) | \ + ((x << 8) & 0x00ff0000 ) | \ + ((x >> 8) & 0x0000ff00 ) | \ + ((x >> 24) & 0x000000ff )) +#endif + /* * The UHCI controller is little endian, so on big endian machines * the data strored in memory needs to be swapped. */ #if BYTE_ORDER == BIG_ENDIAN #define LE(x) (bswap32(x)) +#define HE(x) (x) #else #define LE(x) (x) +#define HE(x) (bswap32(x)) #endif struct uhci_pipe { @@ -183,11 +201,15 @@ Static void uhci_timeout(void *); Static void uhci_lock_frames(uhci_softc_t *); Static void uhci_unlock_frames(uhci_softc_t *); -Static void uhci_add_ctrl(uhci_softc_t *, uhci_soft_qh_t *); +Static void uhci_add_ls_ctrl(uhci_softc_t *, uhci_soft_qh_t *); +Static void uhci_add_hs_ctrl(uhci_softc_t *, uhci_soft_qh_t *); Static void uhci_add_bulk(uhci_softc_t *, uhci_soft_qh_t *); -Static void uhci_remove_ctrl(uhci_softc_t *,uhci_soft_qh_t *); +Static void uhci_remove_ls_ctrl(uhci_softc_t *,uhci_soft_qh_t *); +Static void uhci_remove_hs_ctrl(uhci_softc_t *,uhci_soft_qh_t *); Static void uhci_remove_bulk(uhci_softc_t *,uhci_soft_qh_t *); Static int uhci_str(usb_string_descriptor_t *, int, char *); +Static void uhci_add_loop(uhci_softc_t *sc); +Static void uhci_rem_loop(uhci_softc_t *sc); Static usbd_status uhci_setup_isoc(usbd_pipe_handle pipe); Static void uhci_device_isoc_enter(usbd_xfer_handle); @@ -243,6 +265,10 @@ Static usbd_status uhci_device_setintr(uhci_softc_t *sc, struct uhci_pipe *pipe, int ival); +Static __inline__ uhci_soft_qh_t *uhci_find_prev_qh(uhci_soft_qh_t *, + uhci_soft_qh_t *); + + Static void uhci_device_clear_toggle(usbd_pipe_handle pipe); Static void uhci_noop(usbd_pipe_handle pipe); @@ -333,6 +359,22 @@ uhci_device_isoc_done, }; +Static __inline__ uhci_soft_qh_t * +uhci_find_prev_qh(uhci_soft_qh_t *pqh, uhci_soft_qh_t *sqh) +{ + DPRINTFN(15,("uhci_find_prev_qh: pqh=%p sqh=%p\n", pqh, sqh)); + + for (; pqh->hlink != sqh; pqh = pqh->hlink) { +#if defined(DIAGNOSTIC) || defined(USB_DEBUG) + if (HE(pqh->qh.qh_hlink) & UHCI_PTR_T) { + printf("uhci_find_prev_qh: QH not found\n"); + return (NULL); + } +#endif + } + return (pqh); +} + void uhci_busreset(uhci_softc_t *sc) { @@ -346,7 +388,7 @@ { usbd_status err; int i, j; - uhci_soft_qh_t *csqh, *bsqh, *sqh; + uhci_soft_qh_t *clsqh, *chsqh, *bsqh, *sqh, *lsqh; uhci_soft_td_t *std; DPRINTFN(1,("uhci_init: start\n")); @@ -371,22 +413,60 @@ UWRITE2(sc, UHCI_FRNUM, 0); /* set frame number to 0 */ UWRITE4(sc, UHCI_FLBASEADDR, DMAADDR(&sc->sc_dma, 0)); /* set frame list*/ + /* + * Allocate a TD, inactive, that hangs from the last QH. + * This is to avoid a bug in the PIIX that makes it run berserk + * otherwise. + */ + std = uhci_alloc_std(sc); + if (std == NULL) + return (USBD_NOMEM); + std->link.std = NULL; + std->td.td_link = LE(UHCI_PTR_T); + std->td.td_status = LE(0); /* inactive */ + std->td.td_token = LE(0); + std->td.td_buffer = LE(0); + + /* Allocate the dummy QH marking the end and used for looping the QHs.*/ + lsqh = uhci_alloc_sqh(sc); + if (lsqh == NULL) + return (USBD_NOMEM); + lsqh->hlink = NULL; + lsqh->qh.qh_hlink = LE(UHCI_PTR_T); /* end of QH chain */ + lsqh->elink = std; + lsqh->qh.qh_elink = LE(std->physaddr | UHCI_PTR_TD); + sc->sc_last_qh = lsqh; + /* Allocate the dummy QH where bulk traffic will be queued. */ bsqh = uhci_alloc_sqh(sc); if (bsqh == NULL) return (USBD_NOMEM); - bsqh->qh.qh_hlink = LE(UHCI_PTR_T); /* end of QH chain */ + bsqh->hlink = lsqh; + bsqh->qh.qh_hlink = LE(lsqh->physaddr | UHCI_PTR_Q); + bsqh->elink = NULL; bsqh->qh.qh_elink = LE(UHCI_PTR_T); sc->sc_bulk_start = sc->sc_bulk_end = bsqh; - /* Allocate the dummy QH where control traffic will be queued. */ - csqh = uhci_alloc_sqh(sc); - if (csqh == NULL) + /* Allocate dummy QH where high speed control traffic will be queued. */ + chsqh = uhci_alloc_sqh(sc); + if (chsqh == NULL) return (USBD_NOMEM); - csqh->hlink = bsqh; - csqh->qh.qh_hlink = LE(bsqh->physaddr | UHCI_PTR_Q); - csqh->qh.qh_elink = LE(UHCI_PTR_T); - sc->sc_ctl_start = sc->sc_ctl_end = csqh; + chsqh->hlink = bsqh; + chsqh->qh.qh_hlink = LE(bsqh->physaddr | UHCI_PTR_Q); + chsqh->elink = NULL; + chsqh->qh.qh_elink = LE(UHCI_PTR_T); + sc->sc_hctl_start = sc->sc_hctl_end = chsqh; + + /* Allocate dummy QH where control traffic will be queued. */ + clsqh = uhci_alloc_sqh(sc); + if (clsqh == NULL) + return (USBD_NOMEM); + clsqh->hlink = bsqh; + clsqh->qh.qh_hlink = LE(chsqh->physaddr | UHCI_PTR_Q); + clsqh->elink = NULL; + clsqh->qh.qh_elink = LE(UHCI_PTR_T); + sc->sc_lctl_start = sc->sc_lctl_end = clsqh; + /* * Make all (virtual) frame list pointers point to the interrupt @@ -403,8 +483,8 @@ std->td.td_status = LE(UHCI_TD_IOS); /* iso, inactive */ std->td.td_token = LE(0); std->td.td_buffer = LE(0); - sqh->hlink = csqh; - sqh->qh.qh_hlink = LE(csqh->physaddr | UHCI_PTR_Q); + sqh->hlink = clsqh; + sqh->qh.qh_hlink = LE(clsqh->physaddr | UHCI_PTR_Q); sqh->elink = 0; sqh->qh.qh_elink = LE(UHCI_PTR_T); sc->sc_vframes[i].htd = std; @@ -825,45 +905,128 @@ LIST_INSERT_HEAD(&uhci_ii_free, ii, list); /* and put on free list */ } -/* Add control QH, called at splusb(). */ +/* + * Let the last QH loop back to the high speed control transfer QH. + * This is what intel calls "bandwidth reclamation" and improves + * USB performance a lot for some devices. + * If we are already looping, just count it. + */ +void +uhci_add_loop(uhci_softc_t *sc) { +#ifdef USB_DEBUG + if (uhcinoloop) + return; +#endif + if (++sc->sc_loops == 1) { + DPRINTFN(5,("uhci_start_loop: add\n")); + /* Note, we don't loop back the soft pointer. */ + sc->sc_last_qh->qh.qh_hlink = + LE(sc->sc_hctl_start->physaddr | UHCI_PTR_Q); + } +} + +void +uhci_rem_loop(uhci_softc_t *sc) { +#ifdef USB_DEBUG + if (uhcinoloop) + return; +#endif + if (--sc->sc_loops == 0) { + DPRINTFN(5,("uhci_end_loop: remove\n")); + sc->sc_last_qh->qh.qh_hlink = LE(UHCI_PTR_T); + } +} + +/* Add high speed control QH, called at splusb(). */ void -uhci_add_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh) +uhci_add_hs_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh) { uhci_soft_qh_t *eqh; SPLUSBCHECK; DPRINTFN(10, ("uhci_add_ctrl: sqh=%p\n", sqh)); - eqh = sc->sc_ctl_end; + eqh = sc->sc_hctl_end; sqh->hlink = eqh->hlink; sqh->qh.qh_hlink = eqh->qh.qh_hlink; eqh->hlink = sqh; eqh->qh.qh_hlink = LE(sqh->physaddr | UHCI_PTR_Q); - sc->sc_ctl_end = sqh; + sc->sc_hctl_end = sqh; +#ifdef UHCI_CTL_LOOP + uhci_add_loop(sc); +#endif } -/* Remove control QH, called at splusb(). */ +/* Remove high speed control QH, called at splusb(). */ void -uhci_remove_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh) +uhci_remove_hs_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh) { uhci_soft_qh_t *pqh; SPLUSBCHECK; - DPRINTFN(10, ("uhci_remove_ctrl: sqh=%p\n", sqh)); - for (pqh = sc->sc_ctl_start; pqh->hlink != sqh; pqh=pqh->hlink) -#if defined(DIAGNOSTIC) || defined(USB_DEBUG) - if (LE(pqh->qh.qh_hlink) & UHCI_PTR_T) { - printf("uhci_remove_ctrl: QH not found\n"); - return; - } -#else - ; + DPRINTFN(10, ("uhci_remove_hs_ctrl: sqh=%p\n", sqh)); +#ifdef UHCI_CTL_LOOP + uhci_rem_loop(sc); #endif - pqh->hlink = sqh->hlink; + /* + * The T bit should be set in the elink of the QH so that the HC + * doesn't follow the pointer. This condition may fail if the + * the transferred packet was short so that the QH still points + * at the last used TD. + * In this case we set the T bit and wait a little for the HC + * to stop looking at the TD. + */ + if (!(sqh->qh.qh_elink & LE(UHCI_PTR_T))) { + sqh->qh.qh_elink = LE(UHCI_PTR_T); + delay(UHCI_QH_REMOVE_DELAY); + } + + pqh = uhci_find_prev_qh(sc->sc_hctl_start, sqh); + pqh->hlink = sqh->hlink; pqh->qh.qh_hlink = sqh->qh.qh_hlink; - if (sc->sc_ctl_end == sqh) - sc->sc_ctl_end = pqh; + delay(UHCI_QH_REMOVE_DELAY); + if (sc->sc_hctl_end == sqh) + sc->sc_hctl_end = pqh; +} + +/* Add low speed control QH, called at splusb(). */ +void +uhci_add_ls_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh) +{ + uhci_soft_qh_t *eqh; + + SPLUSBCHECK; + + DPRINTFN(10, ("uhci_add_ls_ctrl: sqh=%p\n", sqh)); + eqh = sc->sc_lctl_end; + sqh->hlink = eqh->hlink; + sqh->qh.qh_hlink = eqh->qh.qh_hlink; + eqh->hlink = sqh; + eqh->qh.qh_hlink = LE(sqh->physaddr | UHCI_PTR_Q); + sc->sc_lctl_end = sqh; +} + +/* Remove low speed control QH, called at splusb(). */ +void +uhci_remove_ls_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh) +{ + uhci_soft_qh_t *pqh; + + SPLUSBCHECK; + + DPRINTFN(10, ("uhci_remove_ls_ctrl: sqh=%p\n", sqh)); + /* See comment in uhci_remove_hs_ctrl() */ + if (!(sqh->qh.qh_elink & LE(UHCI_PTR_T))) { + sqh->qh.qh_elink = LE(UHCI_PTR_T); + delay(UHCI_QH_REMOVE_DELAY); + } + pqh = uhci_find_prev_qh(sc->sc_lctl_start, sqh); + pqh->hlink = sqh->hlink; + pqh->qh.qh_hlink = sqh->qh.qh_hlink; + delay(UHCI_QH_REMOVE_DELAY); + if (sc->sc_lctl_end == sqh) + sc->sc_lctl_end = pqh; } /* Add bulk QH, called at splusb(). */ @@ -881,6 +1044,7 @@ eqh->hlink = sqh; eqh->qh.qh_hlink = LE(sqh->physaddr | UHCI_PTR_Q); sc->sc_bulk_end = sqh; + uhci_add_loop(sc); } /* Remove bulk QH, called at splusb(). */ @@ -892,6 +1056,8 @@ SPLUSBCHECK; DPRINTFN(10, ("uhci_remove_bulk: sqh=%p\n", sqh)); + uhci_rem_loop(sc); + for (pqh = sc->sc_bulk_start; pqh->hlink != sqh; pqh = pqh->hlink) #if defined(DIAGNOSTIC) || defined(USB_DEBUG) if (LE(pqh->qh.qh_hlink) & UHCI_PTR_T) { @@ -1929,7 +2095,10 @@ sqh->intr_info = ii; s = splusb(); - uhci_add_ctrl(sc, sqh); + if (dev->lowspeed) + uhci_add_ls_ctrl(sc, sqh); + else + uhci_add_hs_ctrl(sc, sqh); LIST_INSERT_HEAD(&sc->sc_intrhead, ii, list); #ifdef USB_DEBUG if (uhcidebug > 12) { @@ -2320,7 +2489,10 @@ LIST_REMOVE(ii, list); /* remove from active list */ - uhci_remove_ctrl(sc, upipe->u.ctl.sqh); + if (upipe->pipe.device->lowspeed) + uhci_remove_ls_ctrl(sc, upipe->u.ctl.sqh); + else + uhci_remove_hs_ctrl(sc, upipe->u.ctl.sqh); if (upipe->u.ctl.length != 0) uhci_free_std_chain(sc, ii->stdstart->link.std, ii->stdend); @@ -2604,6 +2776,7 @@ USETW2(p->bString[i], 0, s[i]); return (2*i+2); } + /* * Simulate a hardware hub by handling all the necessary requests. diff -u stable/uhcireg.h verze3/uhcireg.h --- stable/uhcireg.h Sun Jul 2 13:43:59 2000 +++ verze3/uhcireg.h Thu Nov 28 00:32:23 2002 @@ -113,9 +113,16 @@ #define UHCI_QH_ALIGN 16 typedef u_int32_t uhci_physaddr_t; +#define UHCI_PTR_TD 0x00000000 #define UHCI_PTR_T 0x00000001 #define UHCI_PTR_Q 0x00000002 #define UHCI_PTR_VF 0x00000004 + +/* + * Wait this long after a QH has been removed. This gives that HC a + * chance to stop looking at it before it's recycled. + */ +#define UHCI_QH_REMOVE_DELAY 5 /* * The Queue Heads and Transfer Descriptors and accessed diff -u stable/uhcivar.h verze3/uhcivar.h --- stable/uhcivar.h Wed Nov 1 00:23:29 2000 +++ verze3/uhcivar.h Thu Nov 28 00:23:12 2002 @@ -141,10 +141,14 @@ usb_dma_t sc_dma; struct uhci_vframe sc_vframes[UHCI_VFRAMELIST_COUNT]; - uhci_soft_qh_t *sc_ctl_start; /* dummy QH for control */ - uhci_soft_qh_t *sc_ctl_end; /* last control QH */ + uhci_soft_qh_t *sc_lctl_start; /* dummy QH for low speed control */ + uhci_soft_qh_t *sc_lctl_end; /* last control QH */ + uhci_soft_qh_t *sc_hctl_start; /* dummy QH for high speed control */ + uhci_soft_qh_t *sc_hctl_end; /* last control QH */ uhci_soft_qh_t *sc_bulk_start; /* dummy QH for bulk */ uhci_soft_qh_t *sc_bulk_end; /* last bulk transfer */ + uhci_soft_qh_t *sc_last_qh; /* dummy QH at the end */ + u_int32_t sc_loops; /* number of QHs that wants looping */ uhci_soft_td_t *sc_freetds; /* TD free list */ uhci_soft_qh_t *sc_freeqhs; /* QH free list */