>
> This trivial patch changes memcpy_(to|from)io as to transfer as many
> 32-bit words as possible in 32-bit accesses (in the current solution,
> the last 32-bit word was transferred as 4 byte accesses).
>
> Signed-off-by: Albrecht Dreß <albrecht.dr...@arcor.de>
> ---
>
> diff -urpN -X linux-2.6.29.1.orig/Documentation/dontdiff
> linux-2.6.29.1.orig/arch/powerpc/kernel/io.c
> linux-2.6.29.1/arch/powerpc/kernel/io.c
> --- linux-2.6.29.1.orig/arch/powerpc/kernel/io.c   2009-04-02
> 22:55:27.000000000 +0200
> +++ linux-2.6.29.1/arch/powerpc/kernel/io.c   2009-05-27
> 11:36:09.000000000 +0200
> @@ -161,7 +161,7 @@ void _memcpy_fromio(void *dest, const vo
>         dest++;
>         n--;
>      }
> -   while(n > 4) {
> +   while(n >= 4) {
>         *((u32 *)dest) = *((volatile u32 *)vsrc);
>         eieio();
>         vsrc += 4;
> @@ -190,7 +190,7 @@ void _memcpy_toio(volatile void __iomem
>         vdest++;
>         n--;
>      }
> -   while(n > 4) {
> +   while(n >= 4) {
>         *((volatile u32 *)vdest) = *((volatile u32 *)src);
>         src += 4;
>         vdest += 4;

hmm, these do look a bit unoptimal anyway. Any reason not to write
them something like below(written by me for uClibc long time ago). You will
have to add eieio()/sync

void *memcpy(void *to, const void *from, size_t n)
/* PPC can do pre increment and load/store, but not post increment and 
load/store.
   Therefore use *++ptr instead of *ptr++. */
{
        unsigned long rem, chunks, tmp1, tmp2;
        unsigned char *tmp_to;
        unsigned char *tmp_from = (unsigned char *)from;

        chunks = n / 8;
        tmp_from -= 4;
        tmp_to = to - 4;
        if (!chunks)
                goto lessthan8;
        rem = (unsigned long )tmp_to % 4;
        if (rem)
                goto align;
 copy_chunks:
        do {
                /* make gcc to load all data, then store it */
                tmp1 = *(unsigned long *)(tmp_from+4);
                tmp_from += 8;
                tmp2 = *(unsigned long *)tmp_from;
                *(unsigned long *)(tmp_to+4) = tmp1;
                tmp_to += 8;
                *(unsigned long *)tmp_to = tmp2;
        } while (--chunks);
 lessthan8:
        n = n % 8;
        if (n >= 4) {
                *(unsigned long *)(tmp_to+4) = *(unsigned long *)(tmp_from+4);
                tmp_from += 4;
                tmp_to += 4;
                n = n-4;
        }
        if (!n ) return to;
        tmp_from += 3;
        tmp_to += 3;
        do {
                *++tmp_to = *++tmp_from;
        } while (--n);

        return to;
 align:
        rem = 4 - rem;
        n = n - rem;
        do {
                *(tmp_to+4) = *(tmp_from+4);
                ++tmp_from;
                ++tmp_to;
        } while (--rem);
        chunks = n / 8;
        if (chunks)
                goto copy_chunks;
        goto lessthan8;
}

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev

Reply via email to