cacheable_memzero uses dcbz instruction and is more efficient than memset(0) when the destination is in RAM
This patch renames memset as generic_memset, and defines memset as a prolog to cacheable_memzero. This prolog checks if the byte to set is 0 and if the buffer is in RAM. If not, it falls back to generic_memcpy() Signed-off-by: Christophe Leroy <christophe.le...@c-s.fr> --- arch/powerpc/lib/copy_32.S | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index cbca76c..d8a9a86 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -12,6 +12,7 @@ #include <asm/cache.h> #include <asm/errno.h> #include <asm/ppc_asm.h> +#include <asm/page.h> #define COPY_16_BYTES \ lwz r7,4(r4); \ @@ -74,6 +75,18 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1) * to set them to zero. This requires that the destination * area is cacheable. -- paulus */ +_GLOBAL(memset) + cmplwi r4,0 + bne- generic_memset + cmplwi r5,L1_CACHE_BYTES + blt- generic_memset + lis r8,max_pfn@ha + lwz r8,max_pfn@l(r8) + tophys (r9,r3) + srwi r9,r9,PAGE_SHIFT + cmplw r9,r8 + bge- generic_memset + mr r4,r5 _GLOBAL(cacheable_memzero) li r5,0 addi r6,r3,-4 @@ -116,7 +129,7 @@ _GLOBAL(cacheable_memzero) bdnz 8b blr -_GLOBAL(memset) +_GLOBAL(generic_memset) rlwimi r4,r4,8,16,23 rlwimi r4,r4,16,0,15 addi r6,r3,-4 -- 2.1.0 _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev