On my Broadwell-era Xeon copying page with REP MOVSB is ~7.8% faster than with REP MOVSQ. Choose REP MOVSB copy_page() at runtime with alternatives.
Signed-off-by: Alexey Dobriyan <adobri...@gmail.com> --- arch/x86/lib/copy_page_64.S | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -13,13 +13,21 @@ */ ALIGN ENTRY(copy_page) - ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD + ALTERNATIVE_2 "jmp copy_page_regs", \ + "", X86_FEATURE_REP_GOOD, \ + "jmp copy_page_rep_movsb", X86_FEATURE_ERMS movl $4096/8, %ecx rep movsq ret ENDPROC(copy_page) EXPORT_SYMBOL(copy_page) +ENTRY(copy_page_rep_movsb) + mov $4096, %ecx + rep movsb + ret +ENDPROC(copy_page_rep_movsb) + ENTRY(copy_page_regs) subq $2*8, %rsp movq %rbx, (%rsp)