PPC __arch_swab32 and __arch_swab16 generates non optimal code. It doesn't schedule very well, need to copy its input register and and swab16 needs an extra insn to clear its upper bits. I have improved these functions(see my__xx). Any problem with the new asm? If not I will send a patch.
Below some example code to illustrate: #include <stdio.h> unsigned long __arch_swab32(unsigned long value) { unsigned long result; __asm__("rlwimi %0,%1,24,16,23\n\t" "rlwimi %0,%1,8,8,15\n\t" "rlwimi %0,%1,24,0,7" : "=r" (result) : "r" (value), "0" (value >> 24)); return result; } unsigned long my__arch_swab32(unsigned long value) { unsigned long tmp; __asm__("rlwimi %0,%0,24,0xffffffff" : "+r" (value)); __asm__("rlwinm %0,%1,16,0xffffffff" : "=r" (tmp), "+r" (value)); __asm__("rlwimi %0,%1,0,0x00ff0000" : "+r" (value), "+r" (tmp)); __asm__("rlwimi %0,%1,0,0x000000ff" : "+r" (value), "+r" (tmp)); return value; } unsigned short __arch_swab16(unsigned short value) { unsigned short result; __asm__("rlwimi %0,%1,8,16,23" : "=r" (result) : "r" (value), "0" (value >> 8)); return result; } unsigned short my__arch_swab16(unsigned short value) { __asm__("rlwimi %0,%0,16,0x00ff0000" : "+r" (value)); __asm__("rlwinm %0,%0,24,0x0000ffff" : "+r"(value)); return value; } main() { unsigned long x=0x12345678, y; y = my__arch_swab32(x); printf("swab32 x:%x, y:%x\n", x, y); y = my__arch_swab16(x); printf("swab16 x:%x, y:%x\n", x, y); } Generated asm: .file "tst.c" .section ".text" .align 2 .globl __arch_swab32 .type __arch_swab32, @function __arch_swab32: mr %r0,%r3 srwi %r3,%r3,24 #APP rlwimi %r3,%r0,24,16,23 rlwimi %r3,%r0,8,8,15 rlwimi %r3,%r0,24,0,7 #NO_APP blr .size __arch_swab32, .-__arch_swab32 .align 2 .globl my__arch_swab32 .type my__arch_swab32, @function my__arch_swab32: #APP rlwimi %r3,%r3,24,0xffffffff rlwinm %r0,%r3,16,0xffffffff rlwimi %r3,%r0,0,0x00ff0000 rlwimi %r3,%r0,0,0x000000ff #NO_APP blr .size my__arch_swab32, .-my__arch_swab32 .align 2 .globl __arch_swab16 .type __arch_swab16, @function __arch_swab16: mr %r0,%r3 srwi %r3,%r3,8 #APP rlwimi %r3,%r0,8,16,23 #NO_APP rlwinm %r3,%r3,0,0xffff blr .size __arch_swab16, .-__arch_swab16 .align 2 .globl my__arch_swab16 .type my__arch_swab16, @function my__arch_swab16: #APP rlwimi %r3,%r3,16,0x00ff0000 rlwinm %r3,%r3,24,0x0000ffff #NO_APP blr .size my__arch_swab16, .-my__arch_swab16 .section .rodata.str1.4,"aMS",@progbits,1 .align 2 .LC0: .string "swab32 x:%x, y:%x\n" .align 2 .LC1: .string "swab16 x:%x, y:%x\n" .section ".text" .align 2 .globl main .type main, @function main: mflr %r0 lis %r3,0x1234 stwu %r1,-16(%r1) ori %r3,%r3,22136 stw %r0,20(%r1) bl my__arch_swab32 mr %r5,%r3 lis %r4,0x1234 lis %r3,.LC0@ha ori %r4,%r4,22136 la %r3,.LC0@l(%r3) bl printf li %r3,22136 bl my__arch_swab16 lis %r4,0x1234 mr %r5,%r3 lis %r3,.LC1@ha la %r3,.LC1@l(%r3) ori %r4,%r4,22136 bl printf lwz %r0,20(%r1) addi %r1,%r1,16 mtlr %r0 blr .size main, .-main .section .note.GNU-stack,"",@progbits .ident "GCC: (GNU) 3.4.6 (Gentoo 3.4.6-r2, ssp-3.4.6-1.0, pie-8.7.9)" _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev