> > This trivial patch changes memcpy_(to|from)io as to transfer as many > 32-bit words as possible in 32-bit accesses (in the current solution, > the last 32-bit word was transferred as 4 byte accesses). > > Signed-off-by: Albrecht Dreß <albrecht.dr...@arcor.de> > --- > > diff -urpN -X linux-2.6.29.1.orig/Documentation/dontdiff > linux-2.6.29.1.orig/arch/powerpc/kernel/io.c > linux-2.6.29.1/arch/powerpc/kernel/io.c > --- linux-2.6.29.1.orig/arch/powerpc/kernel/io.c 2009-04-02 > 22:55:27.000000000 +0200 > +++ linux-2.6.29.1/arch/powerpc/kernel/io.c 2009-05-27 > 11:36:09.000000000 +0200 > @@ -161,7 +161,7 @@ void _memcpy_fromio(void *dest, const vo > dest++; > n--; > } > - while(n > 4) { > + while(n >= 4) { > *((u32 *)dest) = *((volatile u32 *)vsrc); > eieio(); > vsrc += 4; > @@ -190,7 +190,7 @@ void _memcpy_toio(volatile void __iomem > vdest++; > n--; > } > - while(n > 4) { > + while(n >= 4) { > *((volatile u32 *)vdest) = *((volatile u32 *)src); > src += 4; > vdest += 4;
hmm, these do look a bit unoptimal anyway. Any reason not to write them something like below(written by me for uClibc long time ago). You will have to add eieio()/sync void *memcpy(void *to, const void *from, size_t n) /* PPC can do pre increment and load/store, but not post increment and load/store. Therefore use *++ptr instead of *ptr++. */ { unsigned long rem, chunks, tmp1, tmp2; unsigned char *tmp_to; unsigned char *tmp_from = (unsigned char *)from; chunks = n / 8; tmp_from -= 4; tmp_to = to - 4; if (!chunks) goto lessthan8; rem = (unsigned long )tmp_to % 4; if (rem) goto align; copy_chunks: do { /* make gcc to load all data, then store it */ tmp1 = *(unsigned long *)(tmp_from+4); tmp_from += 8; tmp2 = *(unsigned long *)tmp_from; *(unsigned long *)(tmp_to+4) = tmp1; tmp_to += 8; *(unsigned long *)tmp_to = tmp2; } while (--chunks); lessthan8: n = n % 8; if (n >= 4) { *(unsigned long *)(tmp_to+4) = *(unsigned long *)(tmp_from+4); tmp_from += 4; tmp_to += 4; n = n-4; } if (!n ) return to; tmp_from += 3; tmp_to += 3; do { *++tmp_to = *++tmp_from; } while (--n); return to; align: rem = 4 - rem; n = n - rem; do { *(tmp_to+4) = *(tmp_from+4); ++tmp_from; ++tmp_to; } while (--rem); chunks = n / 8; if (chunks) goto copy_chunks; goto lessthan8; } _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev