Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

Eric Dumazet Fri, 16 Apr 2021 13:11:47 -0700

On Fri, Apr 16, 2021 at 9:44 PM Al Viro <[email protected]> wrote:
>
> On Fri, Apr 16, 2021 at 12:24:13PM -0700, Eric Dumazet wrote:
> > From: Eric Dumazet <[email protected]>
> >
> > We have to loop only to copy u64 values.
> > After this first loop, we copy at most one u32, one u16 and one byte.
>
> Does it actually yield a better code?
>


Yes, my patch gives a better code, on actual kernel use-case

(net-next tree, look at put_cmsg())

5ca: 48 89 0f              mov    %rcx,(%rdi)
 5cd: 89 77 08              mov    %esi,0x8(%rdi)
 5d0: 89 57 0c              mov    %edx,0xc(%rdi)
 5d3: 48 83 c7 10          add    $0x10,%rdi
 5d7: 48 83 c1 f0          add    $0xfffffffffffffff0,%rcx
 5db: 48 83 f9 07          cmp    $0x7,%rcx
 5df: 76 40                jbe    621 <put_cmsg+0x111>
 5e1: 66 66 66 66 66 66 2e data16 data16 data16 data16 data16 nopw
%cs:0x0(%rax,%rax,1)
 5e8: 0f 1f 84 00 00 00 00
 5ef: 00
 5f0: 49 8b 10              mov    (%r8),%rdx
 5f3: 48 89 17              mov    %rdx,(%rdi)
 5f6: 48 83 c7 08          add    $0x8,%rdi
 5fa: 49 83 c0 08          add    $0x8,%r8
 5fe: 48 83 c1 f8          add    $0xfffffffffffffff8,%rcx
 602: 48 83 f9 07          cmp    $0x7,%rcx
 606: 77 e8                ja     5f0 <put_cmsg+0xe0>
 608: eb 17                jmp    621 <put_cmsg+0x111>
 60a: 66 0f 1f 44 00 00    nopw   0x0(%rax,%rax,1)
 610: 41 8b 10              mov    (%r8),%edx
 613: 89 17                mov    %edx,(%rdi)
 615: 48 83 c7 04          add    $0x4,%rdi
 619: 49 83 c0 04          add    $0x4,%r8
 61d: 48 83 c1 fc          add    $0xfffffffffffffffc,%rcx
 621: 48 83 f9 03          cmp    $0x3,%rcx
 625: 77 e9                ja     610 <put_cmsg+0x100>
 627: eb 1a                jmp    643 <put_cmsg+0x133>
 629: 0f 1f 80 00 00 00 00 nopl   0x0(%rax)
 630: 41 0f b7 10          movzwl (%r8),%edx
 634: 66 89 17              mov    %dx,(%rdi)
 637: 48 83 c7 02          add    $0x2,%rdi
 63b: 49 83 c0 02          add    $0x2,%r8
 63f: 48 83 c1 fe          add    $0xfffffffffffffffe,%rcx
 643: 48 83 f9 01          cmp    $0x1,%rcx
 647: 77 e7                ja     630 <put_cmsg+0x120>
 649: eb 15                jmp    660 <put_cmsg+0x150>
 64b: 0f 1f 44 00 00        nopl   0x0(%rax,%rax,1)
 650: 41 0f b6 08          movzbl (%r8),%ecx
 654: 88 0f                mov    %cl,(%rdi)
 656: 48 83 c7 01          add    $0x1,%rdi
 65a: 49 83 c0 01          add    $0x1,%r8
 65e: 31 c9                xor    %ecx,%ecx
 660: 48 85 c9              test   %rcx,%rcx
 663: 75 eb                jne    650 <put_cmsg+0x140>


> FWIW, this
> void bar(unsigned);
> void foo(unsigned n)
> {
>         while (n >= 8) {
>                 bar(n);
>                 n -= 8;
>         }
>         while (n >= 4) {
>                 bar(n);
>                 n -= 4;
>         }
>         while (n >= 2) {
>                 bar(n);
>                 n -= 2;
>         }
>         while (n >= 1) {
>                 bar(n);
>                 n -= 1;
>         }
> }
>
> will compile (with -O2) to
>         pushq   %rbp
>         pushq   %rbx
>         movl    %edi, %ebx
>         subq    $8, %rsp
>         cmpl    $7, %edi
>         jbe     .L2
>         movl    %edi, %ebp
> .L3:
>         movl    %ebp, %edi
>         subl    $8, %ebp
>         call    bar@PLT
>         cmpl    $7, %ebp
>         ja      .L3
>         andl    $7, %ebx
> .L2:
>         cmpl    $3, %ebx
>         jbe     .L4
>         movl    %ebx, %edi
>         andl    $3, %ebx
>         call    bar@PLT
> .L4:
>         cmpl    $1, %ebx
>         jbe     .L5
>         movl    %ebx, %edi
>         andl    $1, %ebx
>         call    bar@PLT
> .L5:
>         testl   %ebx, %ebx
>         je      .L1
>         addq    $8, %rsp
>         movl    $1, %edi
>         popq    %rbx
>         popq    %rbp
>         jmp     bar@PLT
> .L1:
>         addq    $8, %rsp
>         popq    %rbx
>         popq    %rbp
>         ret
>
> i.e. loop + if + if + if...

Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

Reply via email to