Phil, When you have a chance, could you please take a look at the arch-independent pieces of the OProfile kernel driver that this patch touches? In short, this patch fixes a bug that I was responsible for in my original OProfile-Cell SPE port (argh! :-[ ) where I was improperly adding events to the main event buffer without first getting the buffer_mutex. Under certain timing conditions, this caused some synchronicity problems between the daemon and the driver, resulting in a very confused daemon (only when running on Cell, by the way). So part of the patch backs out some of the changes I had originally made (like adding add_event_entry to include/linux/oprofile.h), and the rest of the patch reworks the Cell code to use a new interface Carl added to oprofile.h.
Let me know if you have any questions about the patch as I worked pretty closely with Carl while he was developing it. P.S. I'm cc'ing Robert since he expressed an interest in reviewing kernel driver patches. Thanks. Regards, -Maynard Carl Love wrote: > Sorry, looks like my mailer mangled the file. > > This is a reworked patch to fix the SPU data storage. Currently, the > SPU escape sequences and program counter data is being added directly > into the kernel buffer without holding the buffer_mutex lock. This > patch changes how the data is stored. A new function, > oprofile_add_value, is added into the oprofile driver to allow adding > generic data to the per cpu buffers. This enables a series of calls > to the oprofile_add_value to enter the needed SPU escape sequences > and SPU program data into the kernel buffer via the per cpu buffers > without any additional processing. The oprofile_add_value function is > generic so it could be used by other architecures as well provided > the needed postprocessing was added to opreport. > > Finally, this patch backs out the changes previously added to the > oprofile generic code for handling the architecture specific > ops.sync_start and ops.sync_stop that allowed the architecture > to skip the per CPU buffer creation. > > Signed-off-by: Carl Love <[EMAIL PROTECTED]> > > Index: Cell_kernel_4_15_2008/arch/powerpc/oprofile/cell/pr_util.h > =================================================================== > --- Cell_kernel_4_15_2008.orig/arch/powerpc/oprofile/cell/pr_util.h > +++ Cell_kernel_4_15_2008/arch/powerpc/oprofile/cell/pr_util.h > @@ -20,11 +20,6 @@ > #include <asm/cell-regs.h> > #include <asm/spu.h> > > -/* Defines used for sync_start */ > -#define SKIP_GENERIC_SYNC 0 > -#define SYNC_START_ERROR -1 > -#define DO_GENERIC_SYNC 1 > - > struct spu_overlay_info { /* map of sections within an SPU overlay */ > unsigned int vma; /* SPU virtual memory address from elf */ > unsigned int size; /* size of section from elf */ > @@ -85,7 +80,7 @@ void stop_spu_profiling(void); > int spu_sync_start(void); > > /* remove the hooks */ > -int spu_sync_stop(void); > +void spu_sync_stop(void); > > /* Record SPU program counter samples to the oprofile event buffer. */ > void spu_sync_buffer(int spu_num, unsigned int *samples, > Index: Cell_kernel_4_15_2008/arch/powerpc/oprofile/cell/spu_task_sync.c > =================================================================== > --- Cell_kernel_4_15_2008.orig/arch/powerpc/oprofile/cell/spu_task_sync.c > +++ Cell_kernel_4_15_2008/arch/powerpc/oprofile/cell/spu_task_sync.c > @@ -31,11 +31,12 @@ > > #define RELEASE_ALL 9999 > > -static DEFINE_SPINLOCK(buffer_lock); > +static DEFINE_SPINLOCK(add_value_lock); > static DEFINE_SPINLOCK(cache_lock); > static int num_spu_nodes; > int spu_prof_num_nodes; > int last_guard_val[MAX_NUMNODES * 8]; > +static int spu_ctx_sw_seen[MAX_NUMNODES * 8]; > > /* Container for caching information about an active SPU task. */ > struct cached_info { > @@ -289,6 +290,7 @@ static int process_context_switch(struct > int retval; > unsigned int offset = 0; > unsigned long spu_cookie = 0, app_dcookie; > + int cpu_buf; > > retval = prepare_cached_spu_info(spu, objectId); > if (retval) > @@ -303,17 +305,28 @@ static int process_context_switch(struct > goto out; > } > > - /* Record context info in event buffer */ > - spin_lock_irqsave(&buffer_lock, flags); > - add_event_entry(ESCAPE_CODE); > - add_event_entry(SPU_CTX_SWITCH_CODE); > - add_event_entry(spu->number); > - add_event_entry(spu->pid); > - add_event_entry(spu->tgid); > - add_event_entry(app_dcookie); > - add_event_entry(spu_cookie); > - add_event_entry(offset); > - spin_unlock_irqrestore(&buffer_lock, flags); > + /* Record context info in event buffer. Note, there are 4x more > + * SPUs then CPUs. Map the SPU events/data for a given SPU to > + * the same CPU buffer. Need to ensure the cntxt switch data and > + * samples stay in order. > + */ > + cpu_buf = spu->number >> 2; > + spin_lock_irqsave(&add_value_lock, flags); > + oprofile_add_value(ESCAPE_CODE, cpu_buf); > + oprofile_add_value(SPU_CTX_SWITCH_CODE, cpu_buf); > + oprofile_add_value(spu->number, cpu_buf); > + oprofile_add_value(spu->pid, cpu_buf); > + oprofile_add_value(spu->tgid, cpu_buf); > + oprofile_add_value(app_dcookie, cpu_buf); > + oprofile_add_value(spu_cookie, cpu_buf); > + oprofile_add_value(offset, cpu_buf); > + > + /* Set flag to indicate SPU PC data can now be written out. If > + * the SPU program counter data is seen before an SPU context > + * record is seen, the postprocessing will fail. > + */ > + spu_ctx_sw_seen[spu->number] = 1; > + spin_unlock_irqrestore(&add_value_lock, flags); > smp_wmb(); /* insure spu event buffer updates are written */ > /* don't want entries intermingled... */ > out: > @@ -333,7 +346,6 @@ static int spu_active_notify(struct noti > unsigned long flags; > struct spu *the_spu = data; > > - pr_debug("SPU event notification arrived\n"); > if (!val) { > spin_lock_irqsave(&cache_lock, flags); > retval = release_cached_info(the_spu->number); > @@ -363,38 +375,38 @@ static int number_of_online_nodes(void) > /* The main purpose of this function is to synchronize > * OProfile with SPUFS by registering to be notified of > * SPU task switches. > - * > - * NOTE: When profiling SPUs, we must ensure that only > - * spu_sync_start is invoked and not the generic sync_start > - * in drivers/oprofile/oprof.c. A return value of > - * SKIP_GENERIC_SYNC or SYNC_START_ERROR will > - * accomplish this. > */ > int spu_sync_start(void) > { > int k; > - int ret = SKIP_GENERIC_SYNC; > + int ret = 0; > int register_ret; > - unsigned long flags = 0; > + int cpu; > > spu_prof_num_nodes = number_of_online_nodes(); > num_spu_nodes = spu_prof_num_nodes * 8; > > - spin_lock_irqsave(&buffer_lock, flags); > - add_event_entry(ESCAPE_CODE); > - add_event_entry(SPU_PROFILING_CODE); > - add_event_entry(num_spu_nodes); > - spin_unlock_irqrestore(&buffer_lock, flags); > + /* The SPU_PROFILING_CODE escape sequence must proceed > + * the SPU context switch info. > + */ > + for_each_online_cpu(cpu) { > + oprofile_add_value(ESCAPE_CODE, cpu); > + oprofile_add_value(SPU_PROFILING_CODE, cpu); > + oprofile_add_value((unsigned long int) > + num_spu_nodes, cpu); > + } > > /* Register for SPU events */ > register_ret = spu_switch_event_register(&spu_active); > if (register_ret) { > - ret = SYNC_START_ERROR; > + ret = -1; > goto out; > } > > - for (k = 0; k < (MAX_NUMNODES * 8); k++) > + for (k = 0; k < (MAX_NUMNODES * 8); k++) { > last_guard_val[k] = 0; > + spu_ctx_sw_seen[k] = 0; > + } > pr_debug("spu_sync_start -- running.\n"); > out: > return ret; > @@ -432,7 +444,7 @@ void spu_sync_buffer(int spu_num, unsign > > map = c_info->map; > the_spu = c_info->the_spu; > - spin_lock(&buffer_lock); > + spin_lock(&add_value_lock); > for (i = 0; i < num_samples; i++) { > unsigned int sample = *(samples+i); > int grd_val = 0; > @@ -452,31 +464,38 @@ void spu_sync_buffer(int spu_num, unsign > break; > } > > - add_event_entry(file_offset | spu_num_shifted); > + /* We must ensure that the SPU context switch has been written > + * out before samples for the SPU. Otherwise, the SPU context > + * information is not available and the postprocessing of the > + * SPU PC will fail with no available anonymous map information. > + */ > + if (spu_ctx_sw_seen[spu_num]) > + oprofile_add_value((file_offset | spu_num_shifted), > + (spu_num >> 2)); > } > - spin_unlock(&buffer_lock); > + spin_unlock(&add_value_lock); > out: > spin_unlock_irqrestore(&cache_lock, flags); > } > > > -int spu_sync_stop(void) > +void spu_sync_stop(void) > { > unsigned long flags = 0; > - int ret = spu_switch_event_unregister(&spu_active); > - if (ret) { > - printk(KERN_ERR "SPU_PROF: " > - "%s, line %d: spu_switch_event_unregister returned > %d\n", > - __FUNCTION__, __LINE__, ret); > - goto out; > - } > + > + /* Ignoring the return value from the unregister > + * call. A failed return value simply says there > + * was no registered event. Hence there will not > + * be any calls to process a switch event that > + * could cause a problem. > + */ > + spu_switch_event_unregister(&spu_active); > > spin_lock_irqsave(&cache_lock, flags); > - ret = release_cached_info(RELEASE_ALL); > + release_cached_info(RELEASE_ALL); > spin_unlock_irqrestore(&cache_lock, flags); > -out: > pr_debug("spu_sync_stop -- done.\n"); > - return ret; > + return; > } > > > Index: Cell_kernel_4_15_2008/arch/powerpc/oprofile/op_model_cell.c > =================================================================== > --- Cell_kernel_4_15_2008.orig/arch/powerpc/oprofile/op_model_cell.c > +++ Cell_kernel_4_15_2008/arch/powerpc/oprofile/op_model_cell.c > @@ -1191,15 +1191,15 @@ static int cell_sync_start(void) > if (spu_cycle_reset) > return spu_sync_start(); > else > - return DO_GENERIC_SYNC; > + return 0; > } > > -static int cell_sync_stop(void) > +static void cell_sync_stop(void) > { > if (spu_cycle_reset) > - return spu_sync_stop(); > - else > - return 1; > + spu_sync_stop(); > + > + return; > } > > struct op_powerpc_model op_model_cell = { > Index: Cell_kernel_4_15_2008/drivers/oprofile/buffer_sync.c > =================================================================== > --- Cell_kernel_4_15_2008.orig/drivers/oprofile/buffer_sync.c > +++ Cell_kernel_4_15_2008/drivers/oprofile/buffer_sync.c > @@ -521,6 +521,20 @@ void sync_buffer(int cpu) > } else if (s->event == CPU_TRACE_BEGIN) { > state = sb_bt_start; > add_trace_begin(); > + } else if (s->event == VALUE_HEADER_ID) { > + /* The next event contains a value > + * to enter directly into the event buffer. > + */ > + increment_tail(cpu_buf); > + i++; /* one less entry in buffer to process */ > + > + s = &cpu_buf->buffer[cpu_buf->tail_pos]; > + > + if (unlikely(is_code(s->eip))) > + add_event_entry(s->event); > + else > + printk("KERN_ERR Oprofile per CPU" \ > + "buffer sequence error.\n"); > } else { > struct mm_struct * oldmm = mm; > > Index: Cell_kernel_4_15_2008/drivers/oprofile/cpu_buffer.c > =================================================================== > --- Cell_kernel_4_15_2008.orig/drivers/oprofile/cpu_buffer.c > +++ Cell_kernel_4_15_2008/drivers/oprofile/cpu_buffer.c > @@ -224,6 +224,29 @@ static void oprofile_end_trace(struct op > cpu_buf->tracing = 0; > } > > +/* > + * The first entry in the per cpu buffer consists of the escape code and > + * the VALUE_HEADER_ID value. The next entry consists of an escape code > + * with the value to store. The syn_buffer routine takes the value from > + * the second entry and put it into the kernel buffer. > + */ > +void oprofile_add_value(unsigned long value, int cpu) { > + struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu]; > + > + /* Enter a sequence of two events. The first event says the > + * next event contains a value that is to be put directly into the > + * event buffer. > + */ > + > + if (nr_available_slots(cpu_buf) < 3) { > + cpu_buf->sample_lost_overflow++; > + return; > + } > + > + add_sample(cpu_buf, ESCAPE_CODE, VALUE_HEADER_ID); > + add_sample(cpu_buf, ESCAPE_CODE, value); > +} > + > void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, > unsigned long event, int is_kernel) > { > Index: Cell_kernel_4_15_2008/drivers/oprofile/cpu_buffer.h > =================================================================== > --- Cell_kernel_4_15_2008.orig/drivers/oprofile/cpu_buffer.h > +++ Cell_kernel_4_15_2008/drivers/oprofile/cpu_buffer.h > @@ -54,5 +54,6 @@ void cpu_buffer_reset(struct oprofile_cp > /* transient events for the CPU buffer -> event buffer */ > #define CPU_IS_KERNEL 1 > #define CPU_TRACE_BEGIN 2 > +#define VALUE_HEADER_ID 3 > > #endif /* OPROFILE_CPU_BUFFER_H */ > Index: Cell_kernel_4_15_2008/drivers/oprofile/oprof.c > =================================================================== > --- Cell_kernel_4_15_2008.orig/drivers/oprofile/oprof.c > +++ Cell_kernel_4_15_2008/drivers/oprofile/oprof.c > @@ -53,24 +53,13 @@ int oprofile_setup(void) > * us missing task deaths and eventually oopsing > * when trying to process the event buffer. > */ > - if (oprofile_ops.sync_start) { > - int sync_ret = oprofile_ops.sync_start(); > - switch (sync_ret) { > - case 0: > - goto post_sync; > - case 1: > - goto do_generic; > - case -1: > - goto out3; > - default: > - goto out3; > - } > - } > -do_generic: > + if (oprofile_ops.sync_start > + && ((err = oprofile_ops.sync_start()))) > + goto out2; > + > if ((err = sync_start())) > goto out3; > > -post_sync: > is_setup = 1; > mutex_unlock(&start_mutex); > return 0; > @@ -133,20 +122,9 @@ out: > void oprofile_shutdown(void) > { > mutex_lock(&start_mutex); > - if (oprofile_ops.sync_stop) { > - int sync_ret = oprofile_ops.sync_stop(); > - switch (sync_ret) { > - case 0: > - goto post_sync; > - case 1: > - goto do_generic; > - default: > - goto post_sync; > - } > - } > -do_generic: > + if (oprofile_ops.sync_stop) > + oprofile_ops.sync_stop(); > sync_stop(); > -post_sync: > if (oprofile_ops.shutdown) > oprofile_ops.shutdown(); > is_setup = 0; > Index: Cell_kernel_4_15_2008/include/linux/oprofile.h > =================================================================== > --- Cell_kernel_4_15_2008.orig/include/linux/oprofile.h > +++ Cell_kernel_4_15_2008/include/linux/oprofile.h > @@ -56,12 +56,10 @@ struct oprofile_operations { > /* Stop delivering interrupts. */ > void (*stop)(void); > /* Arch-specific buffer sync functions. > - * Return value = 0: Success > - * Return value = -1: Failure > - * Return value = 1: Run generic sync function > + * Sync start: Return 0 for Success, -1 for Failure > */ > int (*sync_start)(void); > - int (*sync_stop)(void); > + void (*sync_stop)(void); > > /* Initiate a stack backtrace. Optional. */ > void (*backtrace)(struct pt_regs * const regs, unsigned int depth); > @@ -84,13 +82,6 @@ int oprofile_arch_init(struct oprofile_o > void oprofile_arch_exit(void); > > /** > - * Add data to the event buffer. > - * The data passed is free-form, but typically consists of > - * file offsets, dcookies, context information, and ESCAPE codes. > - */ > -void add_event_entry(unsigned long data); > - > -/** > * Add a sample. This may be called from any context. Pass > * smp_processor_id() as cpu. > */ > @@ -106,6 +97,17 @@ void oprofile_add_sample(struct pt_regs > void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, > unsigned long event, int is_kernel); > > +/* > + * Add a value to the per CPU buffer. The value is passed from the per CPU > + * buffer to the kernel buffer with no additional processing. The assumption > + * is any processing of the value will be done in the postprocessor. This > + * function should only be used for special architecture specific data. > + * Currently only used by the CELL processor. > + * > + * This function does not perform a backtrace. > + */ > +void oprofile_add_value(unsigned long value, int cpu); > + > /* Use this instead when the PC value is not from the regs. Doesn't > * backtrace. */ > void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event); > Index: Cell_kernel_4_15_2008/include/asm-powerpc/oprofile_impl.h > =================================================================== > --- Cell_kernel_4_15_2008.orig/include/asm-powerpc/oprofile_impl.h > +++ Cell_kernel_4_15_2008/include/asm-powerpc/oprofile_impl.h > @@ -48,7 +48,7 @@ struct op_powerpc_model { > void (*stop) (void); > void (*global_stop) (void); > int (*sync_start)(void); > - int (*sync_stop)(void); > + void (*sync_stop)(void); > void (*handle_interrupt) (struct pt_regs *, > struct op_counter_config *); > int num_counters; > Index: Cell_kernel_4_15_2008/drivers/oprofile/event_buffer.h > =================================================================== > --- Cell_kernel_4_15_2008.orig/drivers/oprofile/event_buffer.h > +++ Cell_kernel_4_15_2008/drivers/oprofile/event_buffer.h > @@ -17,6 +17,14 @@ int alloc_event_buffer(void); > > void free_event_buffer(void); > > + > +/** > + * Add data to the event buffer. > + * The data passed is free-form, but typically consists of > + * file offsets, dcookies, context information, and ESCAPE codes. > + */ > +void add_event_entry(unsigned long data); > + > /* wake up the process sleeping on the event file */ > void wake_up_buffer_waiter(void); > > > > > ------------------------------------------------------------------------- > This SF.net email is sponsored by the 2008 JavaOne(SM) Conference > Don't miss this year's exciting event. There's still time to save $100. > Use priority code J8TL2D2. > http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone > _______________________________________________ > oprofile-list mailing list > [EMAIL PROTECTED] > https://lists.sourceforge.net/lists/listinfo/oprofile-list > _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev