Author: mjg
Date: Sat Jun  2 20:14:43 2018
New Revision: 334537
URL: https://svnweb.freebsd.org/changeset/base/334537

Log:
  amd64: add a mild depessimization to rep mov/stos users
  
  Currently all the primitives are waiting for a rewrite, tidy them up in the
  meantime.
  
  Vast majority of cases pass sizes which are multiple of 8. Which means the
  following rep stosb/movb has nothing to do. Turns out testing first if there
  is anything to do is a big win across the board (cpus with and without ERMS,
  Intel and AMD) while not pessimizing the case where there is work to do.
  
  Sample results for zeroing 64 bytes (ops/second):
  Ryzen Threadripper 1950X              91433212 -> 147265741
  Intel(R) Xeon(R) CPU X5675 @ 3.07GHz  90714044 -> 121992888
  
  bzero and bcopy are on their way out and were not modified. Nothing in the
  tree uses them.

Modified:
  head/sys/amd64/amd64/support.S

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S      Sat Jun  2 20:11:28 2018        
(r334536)
+++ head/sys/amd64/amd64/support.S      Sat Jun  2 20:14:43 2018        
(r334537)
@@ -205,6 +205,11 @@ ENTRY(memmove)
        movsq
        movq    %rdx,%rcx
        andq    $7,%rcx                         /* any bytes left? */
+       jne     2f
+       movq    %r9,%rax
+       POP_FRAME_POINTER
+       ret
+2:
        rep
        movsb
        movq    %r9,%rax
@@ -248,6 +253,10 @@ ENTRY(memcpy)
        movsq
        movq    %rdx,%rcx
        andq    $7,%rcx                         /* any bytes left? */
+       jne     1f
+       POP_FRAME_POINTER
+       ret
+1:
        rep
        movsb
        POP_FRAME_POINTER
@@ -269,6 +278,11 @@ ENTRY(memset)
        stosq
        movq    %rdx,%rcx
        andq    $7,%rcx
+       jne     1f
+       movq    %r9,%rax
+       POP_FRAME_POINTER
+       ret
+1:
        rep
        stosb
        movq    %r9,%rax
@@ -358,6 +372,7 @@ ENTRY(copyout)
        movsq
        movb    %dl,%cl
        andb    $7,%cl
+       je      done_copyout
        rep
        movsb
 
@@ -406,6 +421,7 @@ ENTRY(copyin)
        movsq
        movb    %al,%cl
        andb    $7,%cl                          /* copy remaining bytes */
+       je      done_copyin
        rep
        movsb
 
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to