On Mon, 21 Nov 2016 09:45:04 -0800 Andi Kleen <a...@firstfloor.org> wrote:
> On Mon, Nov 21, 2016 at 06:18:53PM +0100, Peter Zijlstra wrote: > > On Mon, Nov 21, 2016 at 09:06:13AM -0800, Andi Kleen wrote: > > > > > it got away with attached change.. but this rcu logic > > > > > is far beyond me, so it's just wild guess.. ;-) > > > > > > > > I think I prefer something like the below, that only annotates the one > > > > RDMSR in question, instead of all of them. > > > > > > It would be far better to just fix trace points that they always work. > > > > > > This whole thing is a travesty: we have tens of thousands of lines of > > > code in > > > ftrace to support tracing in NMIs, but then "debug features"[1] like > > > this come around and make trace points unusable for far more code than > > > just the NMI handlers. > > > > Not sure the idle handlers are more code than we have NMI code, but yes, > > its annoying. > > > > Its not ftrace as such though, its RCU, ftrace simply uses RCU to avoid > > locking, as one does. > > > > Biggest objection would be that the rcu_irq_enter_irqson() thing does > > POPF and rcu_irq_exit_irqson() does again. So wrapping every tracepoint > > with that is quite a few cycles. > > This is only when the trace point is enabled right? > > ftrace is already executing a lot of instructions when enabled. Adding two > POPF > won't be end of the world. And a popf can be much more expensive than any of these. You should know, not all instructions are equal. Using perf, I've seen popf take up almst 30% of a function the size of this. -- Steve > > -Andi > > > 264171.903577780: ffffffff810d6040 trace_event_raw_event_sched_switch > push %rbp > 264171.903577780: ffffffff810d6041 trace_event_raw_event_sched_switch > mov %rsp, %rbp > 264171.903577780: ffffffff810d6044 trace_event_raw_event_sched_switch > push %r15 > 264171.903577780: ffffffff810d6046 trace_event_raw_event_sched_switch > push %r14 > 264171.903577780: ffffffff810d6048 trace_event_raw_event_sched_switch > push %r13 > 264171.903577780: ffffffff810d604a trace_event_raw_event_sched_switch > push %r12 > 264171.903577780: ffffffff810d604c trace_event_raw_event_sched_switch > mov %esi, %r13d > 264171.903577780: ffffffff810d604f trace_event_raw_event_sched_switch > push %rbx > 264171.903577780: ffffffff810d6050 trace_event_raw_event_sched_switch > mov %rdi, %r12 > 264171.903577780: ffffffff810d6053 trace_event_raw_event_sched_switch > mov %rdx, %r14 > 264171.903577780: ffffffff810d6056 trace_event_raw_event_sched_switch > mov %rcx, %r15 > 264171.903577780: ffffffff810d6059 trace_event_raw_event_sched_switch > sub $0x30, %rsp > 264171.903577780: ffffffff810d605d trace_event_raw_event_sched_switch > mov 0x48(%rdi), %rbx > 264171.903577780: ffffffff810d6061 trace_event_raw_event_sched_switch > test $0x80, %bl > 264171.903577780: ffffffff810d6064 trace_event_raw_event_sched_switch > jnz 0x810d608c > 264171.903577780: ffffffff810d6066 trace_event_raw_event_sched_switch > test $0x40, %bl > 264171.903577780: ffffffff810d6069 trace_event_raw_event_sched_switch > jnz trace_event_raw_event_sched_switch+216 > 264171.903577780: ffffffff810d606f trace_event_raw_event_sched_switch > test $0x20, %bl > 264171.903577780: ffffffff810d6072 trace_event_raw_event_sched_switch > jz 0x810d6083 > 264171.903577780: ffffffff810d6083 trace_event_raw_event_sched_switch > and $0x1, %bh > 264171.903577780: ffffffff810d6086 trace_event_raw_event_sched_switch > jnz trace_event_raw_event_sched_switch+237 > 264171.903578113: ffffffff810d608c trace_event_raw_event_sched_switch > mov $0x40, %edx > 264171.903578113: ffffffff810d6091 trace_event_raw_event_sched_switch > mov %r12, %rsi > 264171.903578113: ffffffff810d6094 trace_event_raw_event_sched_switch > mov %rsp, %rdi > 264171.903578113: ffffffff810d6097 trace_event_raw_event_sched_switch > call trace_event_buffer_reserve > 264171.903578113: ffffffff8114f150 trace_event_buffer_reserve test > $0x1, 0x49(%rsi) > 264171.903578113: ffffffff8114f154 trace_event_buffer_reserve push > %rbp > 264171.903578113: ffffffff8114f155 trace_event_buffer_reserve mov > 0x10(%rsi), %r10 > 264171.903578113: ffffffff8114f159 trace_event_buffer_reserve mov > %rsp, %rbp > 264171.903578113: ffffffff8114f15c trace_event_buffer_reserve push > %rbx > 264171.903578113: ffffffff8114f15d trace_event_buffer_reserve jz > 0x8114f17e > 264171.903578113: ffffffff8114f17e trace_event_buffer_reserve mov > %rdx, %rcx > 264171.903578113: ffffffff8114f181 trace_event_buffer_reserve mov > %rdi, %rbx > 264171.903578113: ffffffff8114f184 trace_event_buffer_reserve pushfq > 264171.903578113: ffffffff8114f185 trace_event_buffer_reserve pop %r8 > 264171.903578113: ffffffff8114f187 trace_event_buffer_reserve mov > %gs:0x7eebd2c1(%rip), %r9d > 264171.903578113: ffffffff8114f18f trace_event_buffer_reserve and > $0x7fffffff, %r9d > 264171.903578113: ffffffff8114f196 trace_event_buffer_reserve mov > %r8, 0x20(%rdi) > 264171.903578113: ffffffff8114f19a trace_event_buffer_reserve mov > %rsi, 0x10(%rdi) > 264171.903578113: ffffffff8114f19e trace_event_buffer_reserve mov > %r9d, 0x28(%rdi) > 264171.903578113: ffffffff8114f1a2 trace_event_buffer_reserve mov > 0x40(%r10), %edx > 264171.903578113: ffffffff8114f1a6 trace_event_buffer_reserve call > trace_event_buffer_lock_reserve > 264171.903578113: ffffffff81142210 trace_event_buffer_lock_reserve push > %rbp > 264171.903578113: ffffffff81142211 trace_event_buffer_lock_reserve mov > %rsp, %rbp > 264171.903578113: ffffffff81142214 trace_event_buffer_lock_reserve push > %r15 > 264171.903578113: ffffffff81142216 trace_event_buffer_lock_reserve push > %r14 > 264171.903578113: ffffffff81142218 trace_event_buffer_lock_reserve push > %r13 > 264171.903578113: ffffffff8114221a trace_event_buffer_lock_reserve push > %r12 > 264171.903578113: ffffffff8114221c trace_event_buffer_lock_reserve mov > %rdi, %r14 > 264171.903578113: ffffffff8114221f trace_event_buffer_lock_reserve push > %rbx > 264171.903578113: ffffffff81142220 trace_event_buffer_lock_reserve mov > %edx, %r12d > 264171.903578113: ffffffff81142223 trace_event_buffer_lock_reserve mov > %rsi, %rbx > 264171.903578113: ffffffff81142226 trace_event_buffer_lock_reserve mov > %rcx, %r13 > 264171.903578113: ffffffff81142229 trace_event_buffer_lock_reserve mov > %r9d, %r15d > 264171.903578113: ffffffff8114222c trace_event_buffer_lock_reserve sub > $0x10, %rsp > 264171.903578113: ffffffff81142230 trace_event_buffer_lock_reserve mov > 0x28(%rsi), %rax > 264171.903578113: ffffffff81142234 trace_event_buffer_lock_reserve mov > %r8, -0x30(%rbp) > 264171.903578113: ffffffff81142238 trace_event_buffer_lock_reserve mov > 0x20(%rax), %rdi > 264171.903578113: ffffffff8114223c trace_event_buffer_lock_reserve mov > %rdi, (%r14) > 264171.903578113: ffffffff8114223f trace_event_buffer_lock_reserve test > $0x24, 0x48(%rsi) > 264171.903578113: ffffffff81142243 trace_event_buffer_lock_reserve jz > 0x8114226d > 264171.903578113: ffffffff8114226d trace_event_buffer_lock_reserve mov > -0x30(%rbp), %rcx > 264171.903578113: ffffffff81142271 trace_event_buffer_lock_reserve mov > %r15d, %r8d > 264171.903578113: ffffffff81142274 trace_event_buffer_lock_reserve mov > %r13, %rdx > 264171.903578113: ffffffff81142277 trace_event_buffer_lock_reserve mov > %r12d, %esi > 264171.903578113: ffffffff8114227a trace_event_buffer_lock_reserve call > trace_buffer_lock_reserve > 264171.903578113: ffffffff811421c0 trace_buffer_lock_reserve push %rbp > 264171.903578113: ffffffff811421c1 trace_buffer_lock_reserve mov %rsp, %rbp > 264171.903578113: ffffffff811421c4 trace_buffer_lock_reserve push %r14 > 264171.903578113: ffffffff811421c6 trace_buffer_lock_reserve push %r13 > 264171.903578113: ffffffff811421c8 trace_buffer_lock_reserve push %r12 > 264171.903578113: ffffffff811421ca trace_buffer_lock_reserve push %rbx > 264171.903578113: ffffffff811421cb trace_buffer_lock_reserve mov %esi, %r12d > 264171.903578113: ffffffff811421ce trace_buffer_lock_reserve mov %rdx, %rsi > 264171.903578113: ffffffff811421d1 trace_buffer_lock_reserve mov %rcx, %r13 > 264171.903578113: ffffffff811421d4 trace_buffer_lock_reserve mov %r8d, %r14d > 264171.903578113: ffffffff811421d7 trace_buffer_lock_reserve call > ring_buffer_lock_reserve > 264171.903578113: ffffffff8113d950 ring_buffer_lock_reserve mov 0x8(%rdi), > %eax > 264171.903578113: ffffffff8113d953 ring_buffer_lock_reserve test %eax, %eax > 264171.903578113: ffffffff8113d955 ring_buffer_lock_reserve jnz > ring_buffer_lock_reserve+166 > 264171.903578113: ffffffff8113d95b ring_buffer_lock_reserve mov > %gs:0x7eecc826(%rip), %eax > 264171.903578113: ffffffff8113d962 ring_buffer_lock_reserve mov %eax, %ecx > 264171.903578113: ffffffff8113d964 ring_buffer_lock_reserve bt %rcx, > 0x10(%rdi) > 264171.903578113: ffffffff8113d969 ring_buffer_lock_reserve setb %cl > 264171.903578113: ffffffff8113d96c ring_buffer_lock_reserve test %cl, %cl > 264171.903578113: ffffffff8113d96e ring_buffer_lock_reserve jz > ring_buffer_lock_reserve+166 > 264171.903578113: ffffffff8113d974 ring_buffer_lock_reserve mov 0x60(%rdi), > %rdx > 264171.903578113: ffffffff8113d978 ring_buffer_lock_reserve push %rbp > 264171.903578113: ffffffff8113d979 ring_buffer_lock_reserve cdqe > 264171.903578113: ffffffff8113d97b ring_buffer_lock_reserve mov %rsp, %rbp > 264171.903578113: ffffffff8113d97e ring_buffer_lock_reserve push %rbx > 264171.903578113: ffffffff8113d97f ring_buffer_lock_reserve mov > (%rdx,%rax,8), %rbx > 264171.903578113: ffffffff8113d983 ring_buffer_lock_reserve mov 0x4(%rbx), > %eax > 264171.903578113: ffffffff8113d986 ring_buffer_lock_reserve test %eax, %eax > 264171.903578113: ffffffff8113d988 ring_buffer_lock_reserve jnz 0x8113d9f1 > 264171.903578113: ffffffff8113d98a ring_buffer_lock_reserve cmp $0xfe8, %rsi > 264171.903578113: ffffffff8113d991 ring_buffer_lock_reserve ja 0x8113d9f1 > 264171.903578113: ffffffff8113d993 ring_buffer_lock_reserve mov > %gs:0x7eeceab6(%rip), %edx > 264171.903578113: ffffffff8113d99a ring_buffer_lock_reserve test $0x1fff00, > %edx > 264171.903578113: ffffffff8113d9a0 ring_buffer_lock_reserve mov 0x20(%rbx), > %ecx > 264171.903578113: ffffffff8113d9a3 ring_buffer_lock_reserve mov $0x8, %eax > 264171.903578113: ffffffff8113d9a8 ring_buffer_lock_reserve jnz 0x8113d9c6 > 264171.903578113: ffffffff8113d9aa ring_buffer_lock_reserve test %eax, %ecx > 264171.903578113: ffffffff8113d9ac ring_buffer_lock_reserve jnz 0x8113d9f1 > 264171.903578113: ffffffff8113d9ae ring_buffer_lock_reserve or %ecx, %eax > 264171.903578113: ffffffff8113d9b0 ring_buffer_lock_reserve mov %rsi, %rdx > 264171.903578113: ffffffff8113d9b3 ring_buffer_lock_reserve mov %rbx, %rsi > 264171.903578113: ffffffff8113d9b6 ring_buffer_lock_reserve mov %eax, > 0x20(%rbx) > 264171.903578113: ffffffff8113d9b9 ring_buffer_lock_reserve call > rb_reserve_next_event > 264171.903578113: ffffffff8113d610 rb_reserve_next_event inc 0x88(%rsi) > 264171.903578113: ffffffff8113d617 rb_reserve_next_event inc 0x90(%rsi) > 264171.903578113: ffffffff8113d61e rb_reserve_next_event mov 0x8(%rsi), > %rax > 264171.903578113: ffffffff8113d622 rb_reserve_next_event cmp %rdi, %rax > 264171.903578113: ffffffff8113d625 rb_reserve_next_event jnz > rb_reserve_next_event+678 > 264171.903578113: ffffffff8113d62b rb_reserve_next_event push %rbp > 264171.903578113: ffffffff8113d62c rb_reserve_next_event mov %edx, %eax > 264171.903578113: ffffffff8113d62e rb_reserve_next_event mov $0x8, %ecx > 264171.903578113: ffffffff8113d633 rb_reserve_next_event mov %rsp, %rbp > 264171.903578113: ffffffff8113d636 rb_reserve_next_event push %r15 > 264171.903578113: ffffffff8113d638 rb_reserve_next_event push %r14 > 264171.903578113: ffffffff8113d63a rb_reserve_next_event push %r13 > 264171.903578113: ffffffff8113d63c rb_reserve_next_event push %r12 > 264171.903578113: ffffffff8113d63e rb_reserve_next_event push %rbx > 264171.903578113: ffffffff8113d63f rb_reserve_next_event lea 0x88(%rsi), > %rbx > 264171.903578113: ffffffff8113d646 rb_reserve_next_event sub $0x50, %rsp > 264171.903578113: ffffffff8113d64a rb_reserve_next_event test %edx, %edx > 264171.903578113: ffffffff8113d64c rb_reserve_next_event jnz > rb_reserve_next_event+430 > 264171.903578113: ffffffff8113d7be rb_reserve_next_event cmp $0x70, %edx > 264171.903578113: ffffffff8113d7c1 rb_reserve_next_event jbe 0x8113d7c6 > 264171.903578113: ffffffff8113d7c6 rb_reserve_next_event add $0x7, %eax > 264171.903578113: ffffffff8113d7c9 rb_reserve_next_event mov $0x10, %ecx > 264171.903578113: ffffffff8113d7ce rb_reserve_next_event and > $0xfffffffc, %eax > 264171.903578113: ffffffff8113d7d1 rb_reserve_next_event mov %eax, %edx > 264171.903578113: ffffffff8113d7d3 rb_reserve_next_event cmp $0xc, %eax > 264171.903578113: ffffffff8113d7d6 rb_reserve_next_event cmovnz %rdx, > %rcx > 264171.903578113: ffffffff8113d7da rb_reserve_next_event jmp > rb_reserve_next_event+66 > 264171.903578113: ffffffff8113d652 rb_reserve_next_event lea > -0x50(%rbp), %rax > 264171.903578113: ffffffff8113d656 rb_reserve_next_event lea 0xa8(%rsi), > %r12 > 264171.903578113: ffffffff8113d65d rb_reserve_next_event mov %rsi, %r13 > 264171.903578113: ffffffff8113d660 rb_reserve_next_event mov %rcx, > -0x40(%rbp) > 264171.903578113: ffffffff8113d664 rb_reserve_next_event mov $0x0, > -0x30(%rbp) > 264171.903578113: ffffffff8113d66b rb_reserve_next_event lea 0x18(%rax), > %r15 > 264171.903578113: ffffffff8113d66f rb_reserve_next_event lea 0x10(%rax), > %r14 > 264171.903578113: ffffffff8113d673 rb_reserve_next_event mov $0x0, > -0x48(%rbp) > 264171.903578113: ffffffff8113d67b rb_reserve_next_event mov $0x3e8, > -0x74(%rbp) > 264171.903578113: ffffffff8113d682 rb_reserve_next_event mov 0x8(%r13), > %rax > 264171.903578113: ffffffff8113d686 rb_reserve_next_event call 0x80(%rax) > 264171.903578113: ffffffff81134520 trace_clock_local push %rbp > 264171.903578113: ffffffff81134521 trace_clock_local mov %rsp, %rbp > 264171.903578113: ffffffff81134524 trace_clock_local call native_sched_clock > 264171.903578113: ffffffff81078270 native_sched_clock push %rbp > 264171.903578113: ffffffff81078271 native_sched_clock mov %rsp, %rbp > 264171.903578113: ffffffff81078274 native_sched_clock and > $0xfffffffffffffff0, %rsp > 264171.903578113: ffffffff81078278 native_sched_clock nop (%rax,%rax) > 264171.903578113: ffffffff8107827d native_sched_clock rdtsc > 264171.903578113: ffffffff8107827f native_sched_clock shl $0x20, %rdx > 264171.903578113: ffffffff81078283 native_sched_clock or %rdx, %rax > 264171.903578113: ffffffff81078286 native_sched_clock mov > %gs:0x7ef9cea2(%rip), %rsi > 264171.903578113: ffffffff8107828e native_sched_clock mov > %gs:0x7ef9cea2(%rip), %rdx > 264171.903578113: ffffffff81078296 native_sched_clock cmp %rdx, %rsi > 264171.903578113: ffffffff81078299 native_sched_clock jnz 0x810782d1 > 264171.903578113: ffffffff8107829b native_sched_clock mov (%rsi), %edx > 264171.903578113: ffffffff8107829d native_sched_clock mov 0x4(%rsi), > %ecx > 264171.903578113: ffffffff810782a0 native_sched_clock mul %rdx > 264171.903578113: ffffffff810782a3 native_sched_clock shrd %cl, %rdx, > %rax > 264171.903578113: ffffffff810782a7 native_sched_clock shr %cl, %rdx > 264171.903578113: ffffffff810782aa native_sched_clock test $0x40, %cl > 264171.903578113: ffffffff810782ad native_sched_clock cmovnz %rdx, > %rax > 264171.903578113: ffffffff810782b1 native_sched_clock add 0x8(%rsi), > %rax > 264171.903578113: ffffffff810782b5 native_sched_clock leave > 264171.903578113: ffffffff810782b6 native_sched_clock ret > 264171.903578113: ffffffff81134529 trace_clock_local pop %rbp > 264171.903578113: ffffffff8113452a trace_clock_local ret > 264171.903578113: ffffffff8113d68c rb_reserve_next_event mov %rax, > -0x50(%rbp) > 264171.903578113: ffffffff8113d690 rb_reserve_next_event sub 0xa8(%r13), > %rax > 264171.903578113: ffffffff8113d697 rb_reserve_next_event mov 0xa8(%r13), > %rcx > 264171.903578113: ffffffff8113d69e rb_reserve_next_event cmp %rcx, > -0x50(%rbp) > 264171.903578113: ffffffff8113d6a2 rb_reserve_next_event jb 0x8113d6b4 > 264171.903578113: ffffffff8113d6a4 rb_reserve_next_event test > $0xfffffffff8000000, %rax > 264171.903578113: ffffffff8113d6aa rb_reserve_next_event mov %rax, > -0x48(%rbp) > 264171.903578113: ffffffff8113d6ae rb_reserve_next_event jnz > rb_reserve_next_event+476 > 264171.903578113: ffffffff8113d6b4 rb_reserve_next_event mov > -0x30(%rbp), %esi > 264171.903578113: ffffffff8113d6b7 rb_reserve_next_event test %esi, %esi > 264171.903578113: ffffffff8113d6b9 rb_reserve_next_event jnz > rb_reserve_next_event+718