On Thu, Jul 03, 2014 at 05:20:50PM +1000, Anton Blanchard wrote: > Knowing how long we spend in firmware calls is an important part of > minimising OS jitter. > > This patch adds tracepoints to each OPAL call. If tracepoints are > enabled we branch out to a common routine that calls an entry and exit > tracepoint. > > This allows us to write tools that monitor the frequency and duration > of OPAL calls, eg: > > name count total(ms) min(ms) max(ms) avg(ms) period(ms) > OPAL_HANDLE_INTERRUPT 5 0.199 0.037 0.042 0.040 12547.545 > OPAL_POLL_EVENTS 204 2.590 0.012 0.036 0.013 2264.899 > OPAL_PCI_MSI_EOI 2830 3.066 0.001 0.005 0.001 81.166 > > We use jump labels if configured, which means we only add a single > nop instruction to every OPAL call when the tracepoints are disabled. > > Signed-off-by: Anton Blanchard <an...@samba.org>
That is what I call invoking tracepoints the hard way -- from assembly! Just one question -- can these tracepoints be invoked from the idle loop? If so, you need to use the _rcuidle suffix, for example, as in trace_opal_entry_rcuidle(). If not: Acked-by: Paul E. McKenney <paul...@linux.vnet.ibm.com> > --- > > Index: b/arch/powerpc/include/asm/trace.h > =================================================================== > --- a/arch/powerpc/include/asm/trace.h > +++ b/arch/powerpc/include/asm/trace.h > @@ -99,6 +99,51 @@ TRACE_EVENT_FN(hcall_exit, > ); > #endif > > +#ifdef CONFIG_PPC_POWERNV > +extern void opal_tracepoint_regfunc(void); > +extern void opal_tracepoint_unregfunc(void); > + > +TRACE_EVENT_FN(opal_entry, > + > + TP_PROTO(unsigned long opcode, unsigned long *args), > + > + TP_ARGS(opcode, args), > + > + TP_STRUCT__entry( > + __field(unsigned long, opcode) > + ), > + > + TP_fast_assign( > + __entry->opcode = opcode; > + ), > + > + TP_printk("opcode=%lu", __entry->opcode), > + > + opal_tracepoint_regfunc, opal_tracepoint_unregfunc > +); > + > +TRACE_EVENT_FN(opal_exit, > + > + TP_PROTO(unsigned long opcode, unsigned long retval), > + > + TP_ARGS(opcode, retval), > + > + TP_STRUCT__entry( > + __field(unsigned long, opcode) > + __field(unsigned long, retval) > + ), > + > + TP_fast_assign( > + __entry->opcode = opcode; > + __entry->retval = retval; > + ), > + > + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), > + > + opal_tracepoint_regfunc, opal_tracepoint_unregfunc > +); > +#endif > + > #endif /* _TRACE_POWERPC_H */ > > #undef TRACE_INCLUDE_PATH > Index: b/arch/powerpc/platforms/powernv/Makefile > =================================================================== > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -8,3 +8,4 @@ obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o > obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o > obj-$(CONFIG_PPC_SCOM) += opal-xscom.o > obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o > +obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o > Index: b/arch/powerpc/platforms/powernv/opal-wrappers.S > =================================================================== > --- a/arch/powerpc/platforms/powernv/opal-wrappers.S > +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S > @@ -13,30 +13,69 @@ > #include <asm/hvcall.h> > #include <asm/asm-offsets.h> > #include <asm/opal.h> > +#include <asm/jump_label.h> > + > + .section ".text" > + > +#ifdef CONFIG_TRACEPOINTS > +#ifdef CONFIG_JUMP_LABEL > +#define OPAL_BRANCH(LABEL) \ > + ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) > +#else > + > + .section ".toc","aw" > + > + .globl opal_tracepoint_refcount > +opal_tracepoint_refcount: > + .llong 0 > + > + .section ".text" > + > +/* > + * We branch around this in early init by using an unconditional cpu > + * feature. > + */ > +#define OPAL_BRANCH(LABEL) \ > +BEGIN_FTR_SECTION; \ > + b 1f; \ > +END_FTR_SECTION(0, 1); \ > + ld r12,opal_tracepoint_refcount@toc(r2); \ > + std r12,32(r1); \ > + cmpdi r12,0; \ > + bne- LABEL; \ > +1: > + > +#endif > + > +#else > +#define OPAL_BRANCH(LABEL) > +#endif > > /* TODO: > * > * - Trace irqs in/off (needs saving/restoring all args, argh...) > * - Get r11 feed up by Dave so I can have better register usage > */ > + > #define OPAL_CALL(name, token) \ > _GLOBAL(name); \ > mflr r0; \ > - mfcr r12; \ > std r0,16(r1); \ > + li r0,token; \ > + OPAL_BRANCH(opal_tracepoint_entry) \ > + mfcr r12; \ > stw r12,8(r1); \ > std r1,PACAR1(r13); \ > - li r0,0; \ > + li r11,0; \ > mfmsr r12; \ > - ori r0,r0,MSR_EE; \ > + ori r11,r11,MSR_EE; \ > std r12,PACASAVEDMSR(r13); \ > - andc r12,r12,r0; \ > + andc r12,r12,r11; \ > mtmsrd r12,1; \ > - LOAD_REG_ADDR(r0,opal_return); \ > - mtlr r0; \ > - li r0,MSR_DR|MSR_IR|MSR_LE;\ > - andc r12,r12,r0; \ > - li r0,token; \ > + LOAD_REG_ADDR(r11,opal_return); \ > + mtlr r11; \ > + li r11,MSR_DR|MSR_IR|MSR_LE;\ > + andc r12,r12,r11; \ > mtspr SPRN_HSRR1,r12; \ > LOAD_REG_ADDR(r11,opal); \ > ld r12,8(r11); \ > @@ -61,6 +100,64 @@ opal_return: > mtcr r4; > rfid > > +#ifdef CONFIG_TRACEPOINTS > +opal_tracepoint_entry: > + stdu r1,-STACKFRAMESIZE(r1) > + std r0,STK_REG(R23)(r1) > + std r3,STK_REG(R24)(r1) > + std r4,STK_REG(R25)(r1) > + std r5,STK_REG(R26)(r1) > + std r6,STK_REG(R27)(r1) > + std r7,STK_REG(R28)(r1) > + std r8,STK_REG(R29)(r1) > + std r9,STK_REG(R30)(r1) > + std r10,STK_REG(R31)(r1) > + mr r3,r0 > + addi r4,r1,STK_REG(R24) > + bl __trace_opal_entry > + ld r0,STK_REG(R23)(r1) > + ld r3,STK_REG(R24)(r1) > + ld r4,STK_REG(R25)(r1) > + ld r5,STK_REG(R26)(r1) > + ld r6,STK_REG(R27)(r1) > + ld r7,STK_REG(R28)(r1) > + ld r8,STK_REG(R29)(r1) > + ld r9,STK_REG(R30)(r1) > + ld r10,STK_REG(R31)(r1) > + LOAD_REG_ADDR(r11,opal_tracepoint_return) > + mfcr r12 > + std r11,16(r1) > + stw r12,8(r1) > + std r1,PACAR1(r13) > + li r11,0 > + mfmsr r12 > + ori r11,r11,MSR_EE > + std r12,PACASAVEDMSR(r13) > + andc r12,r12,r11 > + mtmsrd r12,1 > + LOAD_REG_ADDR(r11,opal_return) > + mtlr r11 > + li r11,MSR_DR|MSR_IR|MSR_LE > + andc r12,r12,r11 > + mtspr SPRN_HSRR1,r12 > + LOAD_REG_ADDR(r11,opal) > + ld r12,8(r11) > + ld r2,0(r11) > + mtspr SPRN_HSRR0,r12 > + hrfid > + > +opal_tracepoint_return: > + std r3,STK_REG(R31)(r1) > + mr r4,r3 > + ld r0,STK_REG(R23)(r1) > + bl __trace_opal_exit > + ld r3,STK_REG(R31)(r1) > + addi r1,r1,STACKFRAMESIZE > + ld r0,16(r1) > + mtlr r0 > + blr > +#endif > + > OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); > OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); > OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); > Index: b/arch/powerpc/platforms/powernv/opal-tracepoints.c > =================================================================== > --- /dev/null > +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c > @@ -0,0 +1,84 @@ > +#include <linux/percpu.h> > +#include <linux/jump_label.h> > +#include <asm/trace.h> > + > +#ifdef CONFIG_JUMP_LABEL > +struct static_key opal_tracepoint_key = STATIC_KEY_INIT; > + > +void opal_tracepoint_regfunc(void) > +{ > + static_key_slow_inc(&opal_tracepoint_key); > +} > + > +void opal_tracepoint_unregfunc(void) > +{ > + static_key_slow_dec(&opal_tracepoint_key); > +} > +#else > +/* > + * We optimise OPAL calls by placing opal_tracepoint_refcount > + * directly in the TOC so we can check if the opal tracepoints are > + * enabled via a single load. > + */ > + > +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ > +extern long opal_tracepoint_refcount; > + > +void opal_tracepoint_regfunc(void) > +{ > + opal_tracepoint_refcount++; > +} > + > +void opal_tracepoint_unregfunc(void) > +{ > + opal_tracepoint_refcount--; > +} > +#endif > + > +/* > + * Since the tracing code might execute OPAL calls we need to guard against > + * recursion. > + */ > +static DEFINE_PER_CPU(unsigned int, opal_trace_depth); > + > +void __trace_opal_entry(unsigned long opcode, unsigned long *args) > +{ > + unsigned long flags; > + unsigned int *depth; > + > + local_irq_save(flags); > + > + depth = &__get_cpu_var(opal_trace_depth); > + > + if (*depth) > + goto out; > + > + (*depth)++; > + preempt_disable(); > + trace_opal_entry(opcode, args); > + (*depth)--; > + > +out: > + local_irq_restore(flags); > +} > + > +void __trace_opal_exit(long opcode, unsigned long retval) > +{ > + unsigned long flags; > + unsigned int *depth; > + > + local_irq_save(flags); > + > + depth = &__get_cpu_var(opal_trace_depth); > + > + if (*depth) > + goto out; > + > + (*depth)++; > + trace_opal_exit(opcode, retval); > + preempt_enable(); > + (*depth)--; > + > +out: > + local_irq_restore(flags); > +} > _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev