https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113235

Jan Hubicka <hubicka at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |hubicka at gcc dot gnu.org

--- Comment #4 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
I keep mentioning to Larabel that he should use -fno-semantic-interposition,
but he doesn't.

Profile is very simple:

 96.75%  SMHasher                                        [.] keccakf.lto_priv.0
  ◆

All goes to simple loop. On Zen3 gcc 13 -march=native -Ofast -flto I get:

  3.85 │330:   mov    %r8,%rdi                                                  
  7.68 │       movslq (%rsi,%r9,1),%rcx                                         
  3.85 │       lea    (%rax,%rcx,8),%r10                                        
  3.86 │       mov    (%rdx,%r9,1),%ecx                                         
  3.83 │       add    $0x4,%r9                                                  
  3.86 │       mov    (%r10),%r8                                                
  7.37 │       rol    %cl,%rdi                                                  
  7.37 │       mov    %rdi,(%r10)                                               
  4.76 │       cmp    $0x60,%r9                                                 
  0.00 │     ↑ jne    330                                                       


Clang seems to unroll it:

 0.25 │ d0:   mov  -0x48(%rsp),%rdx                                            
  ▒
  0.25 │       xor  %r12,%rcx                                                  
   ▒
  0.25 │       mov  %r13,%r12                                                  
   ▒
  0.25 │       mov  %r13,0x10(%rsp)                                            
   ▒
  0.25 │       mov  %rax,%r13                                                  
   ◆
  0.26 │       xor  %r15,%r13                                                  
   ▒
  0.23 │       mov  %r11,-0x70(%rsp)                                           
   ▒
  0.25 │       mov  %r8,0x8(%rsp)                                              
   ▒
  0.25 │       mov  %r15,-0x40(%rsp)                                           
   ▒
  0.25 │       mov  %r10,%r15                                                  
   ▒
  0.26 │       mov  %r10,(%rsp)                                                
   ▒
  0.26 │       mov  %r14,%r10                                                  
   ▒
  0.25 │       xor  %r12,%r10                                                  
   ▒
  0.26 │       xor  %rsi,%r15                                                  
   ▒
  0.24 │       mov  %rbp,-0x80(%rsp)                                           
   ▒
  0.25 │       xor  %rcx,%r15                                                  
   ▒
  0.26 │       mov  -0x60(%rsp),%rcx                                           
   ▒
  0.25 │       xor  -0x68(%rsp),%r15                                           
   ▒
  0.26 │       xor  %rbp,%rdx                                                  
   ▒
  0.25 │       mov  -0x30(%rsp),%rbp                                           
   ▒
  0.25 │       xor  %rdx,%r13                                                  
   ▒
  0.24 │       mov  -0x10(%rsp),%rdx                                           
   ▒
  0.25 │       mov  %rcx,%r12                                                  
   ▒
  0.24 │       xor  %rcx,%r13                                                  
   ▒
  0.25 │       mov  $0x1,%ecx                                                  
   ▒
  0.25 │       xor  %r11,%rdx                                                  
   ▒
  0.24 │       mov  %r8,%r11                                                   
   ▒
  0.25 │       mov  -0x28(%rsp),%r8                                            
   ▒
  0.26 │       xor  -0x58(%rsp),%r8                                            
   ▒
  0.24 │       xor  %rdx,%r8                                                   
   ▒
  0.26 │       mov  -0x8(%rsp),%rdx                                            
   ▒
  0.25 │       xor  %rbp,%r8                                                   
   ▒
  0.26 │       xor  %r11,%rdx                                                  
   ▒
  0.25 │       mov  -0x20(%rsp),%r11                                           
   ▒
  0.25 │       xor  %rdx,%r10                                                  
   ▒....

Reply via email to