> Date: Sat, 10 Nov 2012 18:10:53 +0100 (CET)
> From: Stefan Fritsch <[email protected]>
> 
> On Thu, 8 Nov 2012, Mark Kettenis wrote:
> >> On Tuesday 21 August 2012, Stefan Fritsch wrote:
> >>> On x86, the xchg operation between reg and mem has an implicit lock
> >>> prefix, i.e. it is a relatively expensive atomic operation. This is
> >>> not needed here.
> >>
> >> OKs, anyone?
> >
> > What you say makes sense, although it might matter only on MP
> > (capable) systems.
> 
> True, but MP is the norm nowadays.
> 
> > If you really want to make things faster, I
> > suppose you could change the code into something like
> >
> >    pushl   %esi
> >    pushl   %edi
> >    movl    12(%esp),%edi
> >    movl    16(%esp),%esi
> 
> That's true. Like this (suggestions for a better label name are 
> welcome):

What about doocpy?  And I would put the label on a line of its own,
such that it stands out more.

> --- locore.s
> +++ locore.s
> @@ -789,7 +789,7 @@ ENTRY(bcopy)
>       pushl   %edi
>       movl    12(%esp),%esi
>       movl    16(%esp),%edi
> -     movl    20(%esp),%ecx
> +bcopy2:      movl    20(%esp),%ecx
>       movl    %edi,%eax
>       subl    %esi,%eax
>       cmpl    %ecx,%eax               # overlapping?
> @@ -827,13 +827,15 @@ ENTRY(bcopy)
>       ret
> 
>   /*
> - * Emulate memcpy() by swapping the first two arguments and calling bcopy()
> + * Emulate memcpy() by loading the first two arguments in reverse order
> + * and jumping into bcopy()
>    */
>   ENTRY(memcpy)
> -     movl    4(%esp),%ecx
> -     xchg    8(%esp),%ecx
> -     movl    %ecx,4(%esp)
> -     jmp     _C_LABEL(bcopy)
> +     pushl   %esi
> +     pushl   %edi
> +     movl    12(%esp),%edi
> +     movl    16(%esp),%esi
> +     jmp     bcopy2
> 
>   
> /*****************************************************************************/

Reply via email to