Benjamin Herrenschmidt <b...@kernel.crashing.org> wrote on 2011/09/10 11:24:39:
>
> On Fri, 2011-09-09 at 14:10 +0200, Joakim Tjernlund wrote:
> > PPC __arch_swab32 and __arch_swab16 generates non optimal code.
> > They do not schedule very well, need to copy its input register
> > and swab16 needs an extra insn to clear its upper bits.
> > Fix this with better inline ASM.
> >
> > Signed-off-by: Joakim Tjernlund <joakim.tjernl...@transmode.se>
> > ---
> >  arch/powerpc/include/asm/swab.h |   28 ++++++++++++++--------------
> >  1 files changed, 14 insertions(+), 14 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/swab.h 
> > b/arch/powerpc/include/asm/swab.h
> > index c581e3e..3b9a200 100644
> > --- a/arch/powerpc/include/asm/swab.h
> > +++ b/arch/powerpc/include/asm/swab.h
> > @@ -61,25 +61,25 @@ static inline void __arch_swab32s(__u32 *addr)
> >
> >  static inline __attribute_const__ __u16 __arch_swab16(__u16 value)
> >  {
> > -   __u16 result;
> > -
> > -   __asm__("rlwimi %0,%1,8,16,23"
> > -       : "=r" (result)
> > -       : "r" (value), "0" (value >> 8));
> > -   return result;
> > +   __asm__("rlwimi %0,%0,16,0x00ff0000\n\t"
> > +      "rlwinm %0,%0,24,0x0000ffff"
> > +      : "+r"(value));
> > +   return value;
> >  }
> >  #define __arch_swab16 __arch_swab16
>
> I don't quite get the thing about needing to clear the high bits.
>
> Value is a u16 to start with, %0 is pre-filled with value >> 8 which
> won't add anything to the upper bits, neither will rlwimi, so why would
> you need to clear upper bits ?
>
> Now I do see why gcc might generate something sub-optimal here, but can
> you provide examples of asm output before/after in the patch commit ?

I did a simple test earlier like so:

unsigned short __arch_swab16(unsigned short value)
{
                 unsigned short result;

                 __asm__("rlwimi %0,%1,8,16,23"
                     : "=r" (result)
                     : "r" (value), "0" (value >> 8));
                 return result;
}

unsigned short my__arch_swab16(unsigned short value)
{
                 __asm__("rlwimi %0,%0,16,0x00ff0000\n\t"
                         "rlwinm %0,%0,24,0x0000ffff"
                         : "+r"(value));
                 return value;
}

gcc -O2 -S outputs:

        .file   "my_swab.c"
        .section        ".text"
        .align 2
        .globl __arch_swab16
        .type   __arch_swab16, @function
__arch_swab16:
        mr 0,3
        srwi 3,3,8
#APP
        rlwimi 3,0,8,16,23
#NO_APP
        rlwinm 3,3,0,0xffff
        blr
        .size   __arch_swab16, .-__arch_swab16
        .align 2
        .globl my__arch_swab16
        .type   my__arch_swab16, @function
my__arch_swab16:
#APP
        rlwimi 3,3,16,0x00ff0000
        rlwinm 3,3,24,0x0000ffff
#NO_APP
        blr
        .size   my__arch_swab16, .-my__arch_swab16
        .section        .note.GNU-stack,"",@progbits
        .ident  "GCC: (GNU) 3.4.6 (Gentoo 3.4.6-r2, ssp-3.4.6-1.0, pie-8.7.9)"

However I recently installed gcc 4.5.2 to do some tests and that gives
me this instead:
        .file   "my_swab.c"
        .gnu_attribute 4, 2
        .gnu_attribute 8, 1
        .gnu_attribute 12, 2
        .section        ".text"
        .align 2
        .globl __arch_swab16
        .type   __arch_swab16, @function
__arch_swab16:
        srwi 0,3,8
#APP
 # 5 "my_swab.c" 1
        rlwimi 0,3,8,16,23
 # 0 "" 2
#NO_APP
        rlwinm 3,0,0,0xffff
        blr
        .size   __arch_swab16, .-__arch_swab16
        .align 2
        .globl my__arch_swab16
        .type   my__arch_swab16, @function
my__arch_swab16:
#APP
 # 13 "my_swab.c" 1
        rlwimi 3,3,16,0x00ff0000
        rlwinm 3,3,24,0x0000ffff
 # 0 "" 2
#NO_APP
        rlwinm 3,3,0,0xffff
        blr
        .size   my__arch_swab16, .-my__arch_swab16
        .ident  "GCC: (Gentoo 4.5.2 p1.1, pie-0.4.5) 4.5.2"
        .section        .note.GNU-stack,"",@progbits

So now both versions mask off the high bits, strange.
Not sure what is going on here?

 Jocke

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to