Hi, On 10.10.2018 13:45, Peter Zijlstra wrote: <SNIP> > -static bool perf_rotate_context(struct perf_cpu_context *cpuctx) > +/* > + * XXX somewhat completely buggered; this is in cpu_pmu_context, but we need > + * event_pmu_context for rotations. We also need event_pmu_context specific > + * scheduling routines. ARGH > + * > + * - fixed the cpu_pmu_context vs event_pmu_context thingy > + * (cpu_pmu_context embeds an event_pmu_context) > + * > + * - need nr_events/nr_active in epc to do per epc rotation > + * (done) > + * > + * - need cpu and task pmu ctx together... > + * (cpc->task_epc) > + */ > +static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc)
Since it reduces to single cpu context (and single task context) at all times, ideally, it would probably be coded as simple as this: perf_rotate_context() { cpu = this_cpu_ptr(&cpu_context) for_every_pmu(pmu, cpu) for_every_event_ctx(event_ctx, pmu) rotate(event_ctx, pmu) } so rotate(event_ctx, pmu) would operate on common events objects semantics and memory layout, and PMU specific code handle SW/HW programming differences. Implementing that implies this data relations: cpu (struct perf_cpu_context) | v cpu_context ---> cpu_0->cpu_1->cpu_2->cpu_3 | | | | v v v v pmu0 (struct pmu) pmu00 pmu01 pmu02 pmu03 | | | | v v v v pmu1 pmu10 pmu11 pmu12 pmu13 | | | | v v v v pmu2 pmu20 pmu21 *pmu22* pmu23 <- pmu (struct perf_cpu_pmu_context) event_ctx | v *pmu22* (struct perf_cpu_pmu_context) -> ctx22_0 -> ctx22_1 | | v v event00 event01 | | v v *event10* event11 <- event | | v v event20 event21 In new schema that would result in one more link on the right: cpu_context[NR_CPUS] | v task_struct::perf_event_ctxp -> perf_event_context <- perf_cpu_context -----, ^ | ^ ^ | `---------------------------------' | | | | `--> perf_event_pmu_context | <- link | ^ ^ | | | | | | ,-----' v | | | perf_cpu_pmu_context <-' | | ^ | | | v v v perf_event ---> pmu[,cpu_pmu_ctx[NR_CPUS],] Thanks, Alexey > { > + struct perf_cpu_context *cpuctx = this_cpu_ptr(&cpu_context); > + struct perf_event_pmu_context *cpu_epc, *task_epc = NULL; > struct perf_event *cpu_event = NULL, *task_event = NULL; > bool cpu_rotate = false, task_rotate = false; > struct perf_event_context *ctx = NULL; > + struct pmu *pmu; > > /* > * Since we run this from IRQ context, nobody can install new > * events, thus the event count values are stable. > */ > > - if (cpuctx->ctx.nr_events) { > - if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) > - cpu_rotate = true; > - } > + cpu_epc = &cpc->epc; > + pmu = cpu_epc->pmu; > > - ctx = cpuctx->task_ctx; > - if (ctx && ctx->nr_events) { > - if (ctx->nr_events != ctx->nr_active) > + if (cpu_epc->nr_events && cpu_epc->nr_events != cpu_epc->nr_active) > + cpu_rotate = true; > + > + task_epc = cpc->task_epc; > + if (task_epc) { > + WARN_ON_ONCE(task_epc->pmu != pmu); > + if (task_epc->nr_events && task_epc->nr_events != > task_epc->nr_active) > task_rotate = true; > } > > if (!(cpu_rotate || task_rotate)) > return false; > > - perf_ctx_lock(cpuctx, cpuctx->task_ctx); > - perf_pmu_disable(cpuctx->ctx.pmu); > + perf_ctx_lock(cpuctx, ctx); > + perf_pmu_disable(pmu); > > if (task_rotate) > - task_event = ctx_first_active(ctx); > + task_event = ctx_first_active(task_epc); > + > if (cpu_rotate) > - cpu_event = ctx_first_active(&cpuctx->ctx); > + cpu_event = ctx_first_active(cpu_epc); > > /* > * As per the order given at ctx_resched() first 'pop' task flexible > * and then, if needed CPU flexible. > */ > - if (task_event || (ctx && cpu_event)) > - ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); > - if (cpu_event) > - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); > + if (task_event || (task_epc && cpu_event)) { > + update_context_time(ctx); > + __pmu_ctx_sched_out(task_epc, EVENT_FLEXIBLE); > + } > + > + if (cpu_event) { > + update_context_time(&cpuctx->ctx); > + __pmu_ctx_sched_out(cpu_epc, EVENT_FLEXIBLE); > + rotate_ctx(&cpuctx->ctx, cpu_event); > + __pmu_ctx_sched_in(&cpuctx->ctx, pmu); > + } > > if (task_event) > rotate_ctx(ctx, task_event); > - if (cpu_event) > - rotate_ctx(&cpuctx->ctx, cpu_event); > > - perf_event_sched_in(cpuctx, ctx, current); > + if (task_event || (task_epc && cpu_event)) > + __pmu_ctx_sched_in(ctx, pmu); > > - perf_pmu_enable(cpuctx->ctx.pmu); > - perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > + perf_pmu_enable(pmu); > + perf_ctx_unlock(cpuctx, ctx); > > return true; > }