On my Broadwell-era Xeon copying page with REP MOVSB is ~7.8% faster
than with REP MOVSQ. Choose REP MOVSB copy_page() at runtime
with alternatives.

Signed-off-by: Alexey Dobriyan <adobri...@gmail.com>
---

 arch/x86/lib/copy_page_64.S |   10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -13,13 +13,21 @@
  */
        ALIGN
 ENTRY(copy_page)
-       ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
+       ALTERNATIVE_2 "jmp copy_page_regs",     \
+               "", X86_FEATURE_REP_GOOD,       \
+               "jmp copy_page_rep_movsb", X86_FEATURE_ERMS
        movl    $4096/8, %ecx
        rep     movsq
        ret
 ENDPROC(copy_page)
 EXPORT_SYMBOL(copy_page)
 
+ENTRY(copy_page_rep_movsb)
+       mov     $4096, %ecx
+       rep movsb
+       ret
+ENDPROC(copy_page_rep_movsb)
+
 ENTRY(copy_page_regs)
        subq    $2*8,   %rsp
        movq    %rbx,   (%rsp)

Reply via email to