On Mon, 21 Nov 2016 09:45:04 -0800
Andi Kleen <a...@firstfloor.org> wrote:

> On Mon, Nov 21, 2016 at 06:18:53PM +0100, Peter Zijlstra wrote:
> > On Mon, Nov 21, 2016 at 09:06:13AM -0800, Andi Kleen wrote:  
> > > > > it got away with attached change.. but this rcu logic
> > > > > is far beyond me, so it's just wild guess.. ;-)  
> > > > 
> > > > I think I prefer something like the below, that only annotates the one
> > > > RDMSR in question, instead of all of them.  
> > > 
> > > It would be far better to just fix trace points that they always work.
> > > 
> > > This whole thing is a travesty: we have tens of thousands of lines of 
> > > code in
> > > ftrace to support tracing in NMIs, but then "debug features"[1] like
> > > this come around and make trace points unusable for far more code than
> > > just the NMI handlers.  
> > 
> > Not sure the idle handlers are more code than we have NMI code, but yes,
> > its annoying.
> > 
> > Its not ftrace as such though, its RCU, ftrace simply uses RCU to avoid
> > locking, as one does.
> > 
> > Biggest objection would be that the rcu_irq_enter_irqson() thing does
> > POPF and rcu_irq_exit_irqson() does again. So wrapping every tracepoint
> > with that is quite a few cycles.  
> 
> This is only when the trace point is enabled right?
> 
> ftrace is already executing a lot of instructions when enabled. Adding two 
> POPF
> won't be end of the world.

And a popf can be much more expensive than any of these. You should
know, not all instructions are equal.

Using perf, I've seen popf take up almst 30% of a function the size of
this.

-- Steve

> 
> -Andi
> 
> 
> 264171.903577780:  ffffffff810d6040 trace_event_raw_event_sched_switch        
> push %rbp
> 264171.903577780:  ffffffff810d6041 trace_event_raw_event_sched_switch        
> mov %rsp, %rbp
> 264171.903577780:  ffffffff810d6044 trace_event_raw_event_sched_switch        
> push %r15
> 264171.903577780:  ffffffff810d6046 trace_event_raw_event_sched_switch        
> push %r14
> 264171.903577780:  ffffffff810d6048 trace_event_raw_event_sched_switch        
> push %r13
> 264171.903577780:  ffffffff810d604a trace_event_raw_event_sched_switch        
> push %r12
> 264171.903577780:  ffffffff810d604c trace_event_raw_event_sched_switch        
> mov %esi, %r13d
> 264171.903577780:  ffffffff810d604f trace_event_raw_event_sched_switch        
> push %rbx
> 264171.903577780:  ffffffff810d6050 trace_event_raw_event_sched_switch        
> mov %rdi, %r12
> 264171.903577780:  ffffffff810d6053 trace_event_raw_event_sched_switch        
> mov %rdx, %r14
> 264171.903577780:  ffffffff810d6056 trace_event_raw_event_sched_switch        
> mov %rcx, %r15
> 264171.903577780:  ffffffff810d6059 trace_event_raw_event_sched_switch        
> sub $0x30, %rsp
> 264171.903577780:  ffffffff810d605d trace_event_raw_event_sched_switch        
> mov 0x48(%rdi), %rbx
> 264171.903577780:  ffffffff810d6061 trace_event_raw_event_sched_switch        
> test $0x80, %bl
> 264171.903577780:  ffffffff810d6064 trace_event_raw_event_sched_switch        
> jnz 0x810d608c
> 264171.903577780:  ffffffff810d6066 trace_event_raw_event_sched_switch        
> test $0x40, %bl
> 264171.903577780:  ffffffff810d6069 trace_event_raw_event_sched_switch        
> jnz trace_event_raw_event_sched_switch+216
> 264171.903577780:  ffffffff810d606f trace_event_raw_event_sched_switch        
> test $0x20, %bl
> 264171.903577780:  ffffffff810d6072 trace_event_raw_event_sched_switch        
> jz 0x810d6083
> 264171.903577780:  ffffffff810d6083 trace_event_raw_event_sched_switch        
> and $0x1, %bh
> 264171.903577780:  ffffffff810d6086 trace_event_raw_event_sched_switch        
> jnz trace_event_raw_event_sched_switch+237
> 264171.903578113:  ffffffff810d608c trace_event_raw_event_sched_switch        
> mov $0x40, %edx
> 264171.903578113:  ffffffff810d6091 trace_event_raw_event_sched_switch        
> mov %r12, %rsi
> 264171.903578113:  ffffffff810d6094 trace_event_raw_event_sched_switch        
> mov %rsp, %rdi
> 264171.903578113:  ffffffff810d6097 trace_event_raw_event_sched_switch        
> call trace_event_buffer_reserve
> 264171.903578113:  ffffffff8114f150 trace_event_buffer_reserve        test 
> $0x1, 0x49(%rsi)
> 264171.903578113:  ffffffff8114f154 trace_event_buffer_reserve        push 
> %rbp
> 264171.903578113:  ffffffff8114f155 trace_event_buffer_reserve        mov 
> 0x10(%rsi), %r10
> 264171.903578113:  ffffffff8114f159 trace_event_buffer_reserve        mov 
> %rsp, %rbp
> 264171.903578113:  ffffffff8114f15c trace_event_buffer_reserve        push 
> %rbx
> 264171.903578113:  ffffffff8114f15d trace_event_buffer_reserve        jz 
> 0x8114f17e
> 264171.903578113:  ffffffff8114f17e trace_event_buffer_reserve        mov 
> %rdx, %rcx
> 264171.903578113:  ffffffff8114f181 trace_event_buffer_reserve        mov 
> %rdi, %rbx
> 264171.903578113:  ffffffff8114f184 trace_event_buffer_reserve        pushfq 
> 264171.903578113:  ffffffff8114f185 trace_event_buffer_reserve        pop %r8
> 264171.903578113:  ffffffff8114f187 trace_event_buffer_reserve        mov 
> %gs:0x7eebd2c1(%rip), %r9d
> 264171.903578113:  ffffffff8114f18f trace_event_buffer_reserve        and 
> $0x7fffffff, %r9d
> 264171.903578113:  ffffffff8114f196 trace_event_buffer_reserve        mov 
> %r8, 0x20(%rdi)
> 264171.903578113:  ffffffff8114f19a trace_event_buffer_reserve        mov 
> %rsi, 0x10(%rdi)
> 264171.903578113:  ffffffff8114f19e trace_event_buffer_reserve        mov 
> %r9d, 0x28(%rdi)
> 264171.903578113:  ffffffff8114f1a2 trace_event_buffer_reserve        mov 
> 0x40(%r10), %edx
> 264171.903578113:  ffffffff8114f1a6 trace_event_buffer_reserve        call 
> trace_event_buffer_lock_reserve
> 264171.903578113:  ffffffff81142210 trace_event_buffer_lock_reserve   push 
> %rbp
> 264171.903578113:  ffffffff81142211 trace_event_buffer_lock_reserve   mov 
> %rsp, %rbp
> 264171.903578113:  ffffffff81142214 trace_event_buffer_lock_reserve   push 
> %r15
> 264171.903578113:  ffffffff81142216 trace_event_buffer_lock_reserve   push 
> %r14
> 264171.903578113:  ffffffff81142218 trace_event_buffer_lock_reserve   push 
> %r13
> 264171.903578113:  ffffffff8114221a trace_event_buffer_lock_reserve   push 
> %r12
> 264171.903578113:  ffffffff8114221c trace_event_buffer_lock_reserve   mov 
> %rdi, %r14
> 264171.903578113:  ffffffff8114221f trace_event_buffer_lock_reserve   push 
> %rbx
> 264171.903578113:  ffffffff81142220 trace_event_buffer_lock_reserve   mov 
> %edx, %r12d
> 264171.903578113:  ffffffff81142223 trace_event_buffer_lock_reserve   mov 
> %rsi, %rbx
> 264171.903578113:  ffffffff81142226 trace_event_buffer_lock_reserve   mov 
> %rcx, %r13
> 264171.903578113:  ffffffff81142229 trace_event_buffer_lock_reserve   mov 
> %r9d, %r15d
> 264171.903578113:  ffffffff8114222c trace_event_buffer_lock_reserve   sub 
> $0x10, %rsp
> 264171.903578113:  ffffffff81142230 trace_event_buffer_lock_reserve   mov 
> 0x28(%rsi), %rax
> 264171.903578113:  ffffffff81142234 trace_event_buffer_lock_reserve   mov 
> %r8, -0x30(%rbp)
> 264171.903578113:  ffffffff81142238 trace_event_buffer_lock_reserve   mov 
> 0x20(%rax), %rdi
> 264171.903578113:  ffffffff8114223c trace_event_buffer_lock_reserve   mov 
> %rdi, (%r14)
> 264171.903578113:  ffffffff8114223f trace_event_buffer_lock_reserve   test 
> $0x24, 0x48(%rsi)
> 264171.903578113:  ffffffff81142243 trace_event_buffer_lock_reserve   jz 
> 0x8114226d
> 264171.903578113:  ffffffff8114226d trace_event_buffer_lock_reserve   mov 
> -0x30(%rbp), %rcx
> 264171.903578113:  ffffffff81142271 trace_event_buffer_lock_reserve   mov 
> %r15d, %r8d
> 264171.903578113:  ffffffff81142274 trace_event_buffer_lock_reserve   mov 
> %r13, %rdx
> 264171.903578113:  ffffffff81142277 trace_event_buffer_lock_reserve   mov 
> %r12d, %esi
> 264171.903578113:  ffffffff8114227a trace_event_buffer_lock_reserve   call 
> trace_buffer_lock_reserve
> 264171.903578113:  ffffffff811421c0 trace_buffer_lock_reserve push %rbp
> 264171.903578113:  ffffffff811421c1 trace_buffer_lock_reserve mov %rsp, %rbp
> 264171.903578113:  ffffffff811421c4 trace_buffer_lock_reserve push %r14
> 264171.903578113:  ffffffff811421c6 trace_buffer_lock_reserve push %r13
> 264171.903578113:  ffffffff811421c8 trace_buffer_lock_reserve push %r12
> 264171.903578113:  ffffffff811421ca trace_buffer_lock_reserve push %rbx
> 264171.903578113:  ffffffff811421cb trace_buffer_lock_reserve mov %esi, %r12d
> 264171.903578113:  ffffffff811421ce trace_buffer_lock_reserve mov %rdx, %rsi
> 264171.903578113:  ffffffff811421d1 trace_buffer_lock_reserve mov %rcx, %r13
> 264171.903578113:  ffffffff811421d4 trace_buffer_lock_reserve mov %r8d, %r14d
> 264171.903578113:  ffffffff811421d7 trace_buffer_lock_reserve call 
> ring_buffer_lock_reserve
> 264171.903578113:  ffffffff8113d950 ring_buffer_lock_reserve  mov 0x8(%rdi), 
> %eax
> 264171.903578113:  ffffffff8113d953 ring_buffer_lock_reserve  test %eax, %eax
> 264171.903578113:  ffffffff8113d955 ring_buffer_lock_reserve  jnz 
> ring_buffer_lock_reserve+166
> 264171.903578113:  ffffffff8113d95b ring_buffer_lock_reserve  mov 
> %gs:0x7eecc826(%rip), %eax
> 264171.903578113:  ffffffff8113d962 ring_buffer_lock_reserve  mov %eax, %ecx
> 264171.903578113:  ffffffff8113d964 ring_buffer_lock_reserve  bt %rcx, 
> 0x10(%rdi)
> 264171.903578113:  ffffffff8113d969 ring_buffer_lock_reserve  setb %cl
> 264171.903578113:  ffffffff8113d96c ring_buffer_lock_reserve  test %cl, %cl
> 264171.903578113:  ffffffff8113d96e ring_buffer_lock_reserve  jz 
> ring_buffer_lock_reserve+166
> 264171.903578113:  ffffffff8113d974 ring_buffer_lock_reserve  mov 0x60(%rdi), 
> %rdx
> 264171.903578113:  ffffffff8113d978 ring_buffer_lock_reserve  push %rbp
> 264171.903578113:  ffffffff8113d979 ring_buffer_lock_reserve  cdqe 
> 264171.903578113:  ffffffff8113d97b ring_buffer_lock_reserve  mov %rsp, %rbp
> 264171.903578113:  ffffffff8113d97e ring_buffer_lock_reserve  push %rbx
> 264171.903578113:  ffffffff8113d97f ring_buffer_lock_reserve  mov 
> (%rdx,%rax,8), %rbx
> 264171.903578113:  ffffffff8113d983 ring_buffer_lock_reserve  mov 0x4(%rbx), 
> %eax
> 264171.903578113:  ffffffff8113d986 ring_buffer_lock_reserve  test %eax, %eax
> 264171.903578113:  ffffffff8113d988 ring_buffer_lock_reserve  jnz 0x8113d9f1
> 264171.903578113:  ffffffff8113d98a ring_buffer_lock_reserve  cmp $0xfe8, %rsi
> 264171.903578113:  ffffffff8113d991 ring_buffer_lock_reserve  ja 0x8113d9f1
> 264171.903578113:  ffffffff8113d993 ring_buffer_lock_reserve  mov 
> %gs:0x7eeceab6(%rip), %edx
> 264171.903578113:  ffffffff8113d99a ring_buffer_lock_reserve  test $0x1fff00, 
> %edx
> 264171.903578113:  ffffffff8113d9a0 ring_buffer_lock_reserve  mov 0x20(%rbx), 
> %ecx
> 264171.903578113:  ffffffff8113d9a3 ring_buffer_lock_reserve  mov $0x8, %eax
> 264171.903578113:  ffffffff8113d9a8 ring_buffer_lock_reserve  jnz 0x8113d9c6
> 264171.903578113:  ffffffff8113d9aa ring_buffer_lock_reserve  test %eax, %ecx
> 264171.903578113:  ffffffff8113d9ac ring_buffer_lock_reserve  jnz 0x8113d9f1
> 264171.903578113:  ffffffff8113d9ae ring_buffer_lock_reserve  or %ecx, %eax
> 264171.903578113:  ffffffff8113d9b0 ring_buffer_lock_reserve  mov %rsi, %rdx
> 264171.903578113:  ffffffff8113d9b3 ring_buffer_lock_reserve  mov %rbx, %rsi
> 264171.903578113:  ffffffff8113d9b6 ring_buffer_lock_reserve  mov %eax, 
> 0x20(%rbx)
> 264171.903578113:  ffffffff8113d9b9 ring_buffer_lock_reserve  call 
> rb_reserve_next_event
> 264171.903578113:  ffffffff8113d610 rb_reserve_next_event     inc 0x88(%rsi)
> 264171.903578113:  ffffffff8113d617 rb_reserve_next_event     inc 0x90(%rsi)
> 264171.903578113:  ffffffff8113d61e rb_reserve_next_event     mov 0x8(%rsi), 
> %rax
> 264171.903578113:  ffffffff8113d622 rb_reserve_next_event     cmp %rdi, %rax
> 264171.903578113:  ffffffff8113d625 rb_reserve_next_event     jnz 
> rb_reserve_next_event+678
> 264171.903578113:  ffffffff8113d62b rb_reserve_next_event     push %rbp
> 264171.903578113:  ffffffff8113d62c rb_reserve_next_event     mov %edx, %eax
> 264171.903578113:  ffffffff8113d62e rb_reserve_next_event     mov $0x8, %ecx
> 264171.903578113:  ffffffff8113d633 rb_reserve_next_event     mov %rsp, %rbp
> 264171.903578113:  ffffffff8113d636 rb_reserve_next_event     push %r15
> 264171.903578113:  ffffffff8113d638 rb_reserve_next_event     push %r14
> 264171.903578113:  ffffffff8113d63a rb_reserve_next_event     push %r13
> 264171.903578113:  ffffffff8113d63c rb_reserve_next_event     push %r12
> 264171.903578113:  ffffffff8113d63e rb_reserve_next_event     push %rbx
> 264171.903578113:  ffffffff8113d63f rb_reserve_next_event     lea 0x88(%rsi), 
> %rbx
> 264171.903578113:  ffffffff8113d646 rb_reserve_next_event     sub $0x50, %rsp
> 264171.903578113:  ffffffff8113d64a rb_reserve_next_event     test %edx, %edx
> 264171.903578113:  ffffffff8113d64c rb_reserve_next_event     jnz 
> rb_reserve_next_event+430
> 264171.903578113:  ffffffff8113d7be rb_reserve_next_event     cmp $0x70, %edx
> 264171.903578113:  ffffffff8113d7c1 rb_reserve_next_event     jbe 0x8113d7c6
> 264171.903578113:  ffffffff8113d7c6 rb_reserve_next_event     add $0x7, %eax
> 264171.903578113:  ffffffff8113d7c9 rb_reserve_next_event     mov $0x10, %ecx
> 264171.903578113:  ffffffff8113d7ce rb_reserve_next_event     and 
> $0xfffffffc, %eax
> 264171.903578113:  ffffffff8113d7d1 rb_reserve_next_event     mov %eax, %edx
> 264171.903578113:  ffffffff8113d7d3 rb_reserve_next_event     cmp $0xc, %eax
> 264171.903578113:  ffffffff8113d7d6 rb_reserve_next_event     cmovnz %rdx, 
> %rcx
> 264171.903578113:  ffffffff8113d7da rb_reserve_next_event     jmp 
> rb_reserve_next_event+66
> 264171.903578113:  ffffffff8113d652 rb_reserve_next_event     lea 
> -0x50(%rbp), %rax
> 264171.903578113:  ffffffff8113d656 rb_reserve_next_event     lea 0xa8(%rsi), 
> %r12
> 264171.903578113:  ffffffff8113d65d rb_reserve_next_event     mov %rsi, %r13
> 264171.903578113:  ffffffff8113d660 rb_reserve_next_event     mov %rcx, 
> -0x40(%rbp)
> 264171.903578113:  ffffffff8113d664 rb_reserve_next_event     mov $0x0, 
> -0x30(%rbp)
> 264171.903578113:  ffffffff8113d66b rb_reserve_next_event     lea 0x18(%rax), 
> %r15
> 264171.903578113:  ffffffff8113d66f rb_reserve_next_event     lea 0x10(%rax), 
> %r14
> 264171.903578113:  ffffffff8113d673 rb_reserve_next_event     mov $0x0, 
> -0x48(%rbp)
> 264171.903578113:  ffffffff8113d67b rb_reserve_next_event     mov $0x3e8, 
> -0x74(%rbp)
> 264171.903578113:  ffffffff8113d682 rb_reserve_next_event     mov 0x8(%r13), 
> %rax
> 264171.903578113:  ffffffff8113d686 rb_reserve_next_event     call 0x80(%rax)
> 264171.903578113:  ffffffff81134520 trace_clock_local push %rbp
> 264171.903578113:  ffffffff81134521 trace_clock_local mov %rsp, %rbp
> 264171.903578113:  ffffffff81134524 trace_clock_local call native_sched_clock
> 264171.903578113:  ffffffff81078270 native_sched_clock        push %rbp
> 264171.903578113:  ffffffff81078271 native_sched_clock        mov %rsp, %rbp
> 264171.903578113:  ffffffff81078274 native_sched_clock        and 
> $0xfffffffffffffff0, %rsp
> 264171.903578113:  ffffffff81078278 native_sched_clock        nop (%rax,%rax)
> 264171.903578113:  ffffffff8107827d native_sched_clock        rdtsc 
> 264171.903578113:  ffffffff8107827f native_sched_clock        shl $0x20, %rdx
> 264171.903578113:  ffffffff81078283 native_sched_clock        or %rdx, %rax
> 264171.903578113:  ffffffff81078286 native_sched_clock        mov 
> %gs:0x7ef9cea2(%rip), %rsi
> 264171.903578113:  ffffffff8107828e native_sched_clock        mov 
> %gs:0x7ef9cea2(%rip), %rdx
> 264171.903578113:  ffffffff81078296 native_sched_clock        cmp %rdx, %rsi
> 264171.903578113:  ffffffff81078299 native_sched_clock        jnz 0x810782d1
> 264171.903578113:  ffffffff8107829b native_sched_clock        mov (%rsi), %edx
> 264171.903578113:  ffffffff8107829d native_sched_clock        mov 0x4(%rsi), 
> %ecx
> 264171.903578113:  ffffffff810782a0 native_sched_clock        mul %rdx
> 264171.903578113:  ffffffff810782a3 native_sched_clock        shrd %cl, %rdx, 
> %rax
> 264171.903578113:  ffffffff810782a7 native_sched_clock        shr %cl, %rdx
> 264171.903578113:  ffffffff810782aa native_sched_clock        test $0x40, %cl
> 264171.903578113:  ffffffff810782ad native_sched_clock        cmovnz %rdx, 
> %rax
> 264171.903578113:  ffffffff810782b1 native_sched_clock        add 0x8(%rsi), 
> %rax
> 264171.903578113:  ffffffff810782b5 native_sched_clock        leave 
> 264171.903578113:  ffffffff810782b6 native_sched_clock        ret 
> 264171.903578113:  ffffffff81134529 trace_clock_local pop %rbp
> 264171.903578113:  ffffffff8113452a trace_clock_local ret 
> 264171.903578113:  ffffffff8113d68c rb_reserve_next_event     mov %rax, 
> -0x50(%rbp)
> 264171.903578113:  ffffffff8113d690 rb_reserve_next_event     sub 0xa8(%r13), 
> %rax
> 264171.903578113:  ffffffff8113d697 rb_reserve_next_event     mov 0xa8(%r13), 
> %rcx
> 264171.903578113:  ffffffff8113d69e rb_reserve_next_event     cmp %rcx, 
> -0x50(%rbp)
> 264171.903578113:  ffffffff8113d6a2 rb_reserve_next_event     jb 0x8113d6b4
> 264171.903578113:  ffffffff8113d6a4 rb_reserve_next_event     test 
> $0xfffffffff8000000, %rax
> 264171.903578113:  ffffffff8113d6aa rb_reserve_next_event     mov %rax, 
> -0x48(%rbp)
> 264171.903578113:  ffffffff8113d6ae rb_reserve_next_event     jnz 
> rb_reserve_next_event+476
> 264171.903578113:  ffffffff8113d6b4 rb_reserve_next_event     mov 
> -0x30(%rbp), %esi
> 264171.903578113:  ffffffff8113d6b7 rb_reserve_next_event     test %esi, %esi
> 264171.903578113:  ffffffff8113d6b9 rb_reserve_next_event     jnz 
> rb_reserve_next_event+718

Reply via email to