On 29 July 2016 at 23:41, Vegard Nossum <vegard.nos...@gmail.com> wrote:
> On 27 July 2016 at 16:15, Vegard Nossum <vegard.nos...@gmail.com> wrote:
>> Hi,
>>
>> I'm seeing this on latest linus/master:
[...]
>> RIP: 0010:[<ffffffff81327820>]  [<ffffffff81327820>] 
>> perf_iterate_sb+0x1b0/0x6a0
[...]
>>
>> In particular, it looks to me like event->ctx is NULL.
>
> Digging a bit deeper into this, it seems the event itself is getting
> created by perf_event_open() and it gets added to the pmu_event_list
> through:
>
> perf_event_open()
>  - perf_event_alloc()
>     - account_event()
>        - account_pmu_sb_event()
>           - attach_sb_event()
>
> so at this point the event is being attached but its ->ctx is still
> NULL. It seems like ->ctx is set just a bit later in
> perf_event_open(), though.
>
> But before that, __schedule() comes along and creates a stack trace
> similar to the one above:
>
> __schedule()
>  - __perf_event_task_sched_out()
>    - perf_iterate_sb()
>      - perf_iterate_sb_cpu()
>         - event_filter_match()
>           - perf_cgroup_match()
>             - __get_cpu_context()
>               - (dereference ctx which is NULL)
>
> So I guess the question is... should the event be attached (= put on
> the list) before ->ctx gets set? Or should the cgroup code check for a
> NULL ->ctx?
>
> I'm seeing the NUL ptr deref in __perf_event_task_sched_in() as well, btw.
>
> I'm thinking this is probably where the bug was introduced:
>
> commit f2fb6bef92514432398a653df1c2f1041d79ac46
> Author: Kan Liang <kan.li...@intel.com>
> Date:   Wed Mar 23 11:24:37 2016 -0700
>
>    perf/core: Optimize side-band event delivery

Reverting aab5b71ef2b5c62323b9abe397e2db57b18e1f78 and
f2fb6bef92514432398a653df1c2f1041d79ac46 does indeed fix the issue for
me.

(Just to be clear, I'm not suggesting a revert as the final fix to
this issue, but it shows quite clearly where the problem is.)


Vegard
From 18abcfa13157ae840e425da21047c734442bee57 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nos...@oracle.com>
Date: Fri, 29 Jul 2016 14:32:44 -0700
Subject: [PATCH] Revert "perf/core: Rename the perf_event_aux*() APIs to
 perf_event_sb*(), to separate them from AUX ring-buffer records" and
 "perf/core: Optimize side-band event delivery"

This reverts commit aab5b71ef2b5c62323b9abe397e2db57b18e1f78.
This reverts commit f2fb6bef92514432398a653df1c2f1041d79ac46.
---
 include/linux/perf_event.h |   6 --
 kernel/events/core.c       | 144 ++++++++++++---------------------------------
 2 files changed, 39 insertions(+), 111 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e1f921c..1f37113 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -530,11 +530,6 @@ struct swevent_hlist {
 struct perf_cgroup;
 struct ring_buffer;
 
-struct pmu_event_list {
-	raw_spinlock_t		lock;
-	struct list_head	list;
-};
-
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -693,7 +688,6 @@ struct perf_event {
 	int				cgrp_defer_enabled;
 #endif
 
-	struct list_head		sb_list;
 #endif /* CONFIG_PERF_EVENTS */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 09ae27b..54baf9b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -335,7 +335,6 @@ static atomic_t perf_sched_count;
 
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
-static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -3694,39 +3693,6 @@ static void free_event_rcu(struct rcu_head *head)
 static void ring_buffer_attach(struct perf_event *event,
 			       struct ring_buffer *rb);
 
-static void detach_sb_event(struct perf_event *event)
-{
-	struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
-
-	raw_spin_lock(&pel->lock);
-	list_del_rcu(&event->sb_list);
-	raw_spin_unlock(&pel->lock);
-}
-
-static bool is_sb_event(struct perf_event *event)
-{
-	struct perf_event_attr *attr = &event->attr;
-
-	if (event->parent)
-		return false;
-
-	if (event->attach_state & PERF_ATTACH_TASK)
-		return false;
-
-	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
-	    attr->comm || attr->comm_exec ||
-	    attr->task ||
-	    attr->context_switch)
-		return true;
-	return false;
-}
-
-static void unaccount_pmu_sb_event(struct perf_event *event)
-{
-	if (is_sb_event(event))
-		detach_sb_event(event);
-}
-
 static void unaccount_event_cpu(struct perf_event *event, int cpu)
 {
 	if (event->parent)
@@ -3790,8 +3756,6 @@ static void unaccount_event(struct perf_event *event)
 	}
 
 	unaccount_event_cpu(event, event->cpu);
-
-	unaccount_pmu_sb_event(event);
 }
 
 static void perf_sched_delayed(struct work_struct *work)
@@ -5942,11 +5906,11 @@ perf_event_read_event(struct perf_event *event,
 	perf_output_end(&handle);
 }
 
-typedef void (perf_iterate_f)(struct perf_event *event, void *data);
+typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
 
 static void
-perf_iterate_ctx(struct perf_event_context *ctx,
-		   perf_iterate_f output,
+perf_event_aux_ctx(struct perf_event_context *ctx,
+		   perf_event_aux_output_cb output,
 		   void *data, bool all)
 {
 	struct perf_event *event;
@@ -5963,55 +5927,52 @@ perf_iterate_ctx(struct perf_event_context *ctx,
 	}
 }
 
-static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
+static void
+perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
+			struct perf_event_context *task_ctx)
 {
-	struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
-	struct perf_event *event;
-
-	list_for_each_entry_rcu(event, &pel->list, sb_list) {
-		if (event->state < PERF_EVENT_STATE_INACTIVE)
-			continue;
-		if (!event_filter_match(event))
-			continue;
-		output(event, data);
-	}
+	rcu_read_lock();
+	preempt_disable();
+	perf_event_aux_ctx(task_ctx, output, data, false);
+	preempt_enable();
+	rcu_read_unlock();
 }
 
-/*
- * Iterate all events that need to receive side-band events.
- *
- * For new callers; ensure that account_pmu_sb_event() includes
- * your event, otherwise it might not get delivered.
- */
 static void
-perf_iterate_sb(perf_iterate_f output, void *data,
+perf_event_aux(perf_event_aux_output_cb output, void *data,
 	       struct perf_event_context *task_ctx)
 {
+	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
+	struct pmu *pmu;
 	int ctxn;
 
-	rcu_read_lock();
-	preempt_disable();
-
 	/*
-	 * If we have task_ctx != NULL we only notify the task context itself.
-	 * The task_ctx is set only for EXIT events before releasing task
+	 * If we have task_ctx != NULL we only notify
+	 * the task context itself. The task_ctx is set
+	 * only for EXIT events before releasing task
 	 * context.
 	 */
 	if (task_ctx) {
-		perf_iterate_ctx(task_ctx, output, data, false);
-		goto done;
+		perf_event_aux_task_ctx(output, data, task_ctx);
+		return;
 	}
 
-	perf_iterate_sb_cpu(output, data);
-
-	for_each_task_context_nr(ctxn) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(pmu, &pmus, entry) {
+		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+		if (cpuctx->unique_pmu != pmu)
+			goto next;
+		perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
+		ctxn = pmu->task_ctx_nr;
+		if (ctxn < 0)
+			goto next;
 		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
 		if (ctx)
-			perf_iterate_ctx(ctx, output, data, false);
+			perf_event_aux_ctx(ctx, output, data, false);
+next:
+		put_cpu_ptr(pmu->pmu_cpu_context);
 	}
-done:
-	preempt_enable();
 	rcu_read_unlock();
 }
 
@@ -6060,7 +6021,7 @@ void perf_event_exec(void)
 
 		perf_event_enable_on_exec(ctxn);
 
-		perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
+		perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
 				   true);
 	}
 	rcu_read_unlock();
@@ -6104,9 +6065,9 @@ static int __perf_pmu_output_stop(void *info)
 	};
 
 	rcu_read_lock();
-	perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
+	perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
 	if (cpuctx->task_ctx)
-		perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+		perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
 				   &ro, false);
 	rcu_read_unlock();
 
@@ -6235,7 +6196,7 @@ static void perf_event_task(struct task_struct *task,
 		},
 	};
 
-	perf_iterate_sb(perf_event_task_output,
+	perf_event_aux(perf_event_task_output,
 		       &task_event,
 		       task_ctx);
 }
@@ -6314,7 +6275,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 
 	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
 
-	perf_iterate_sb(perf_event_comm_output,
+	perf_event_aux(perf_event_comm_output,
 		       comm_event,
 		       NULL);
 }
@@ -6545,7 +6506,7 @@ got_name:
 
 	mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
 
-	perf_iterate_sb(perf_event_mmap_output,
+	perf_event_aux(perf_event_mmap_output,
 		       mmap_event,
 		       NULL);
 
@@ -6628,7 +6589,7 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma)
 		if (!ctx)
 			continue;
 
-		perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+		perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
 	}
 	rcu_read_unlock();
 }
@@ -6815,7 +6776,7 @@ static void perf_event_switch(struct task_struct *task,
 		},
 	};
 
-	perf_iterate_sb(perf_event_switch_output,
+	perf_event_aux(perf_event_switch_output,
 		       &switch_event,
 		       NULL);
 }
@@ -8739,28 +8700,6 @@ unlock:
 	return pmu;
 }
 
-static void attach_sb_event(struct perf_event *event)
-{
-	struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
-
-	raw_spin_lock(&pel->lock);
-	list_add_rcu(&event->sb_list, &pel->list);
-	raw_spin_unlock(&pel->lock);
-}
-
-/*
- * We keep a list of all !task (and therefore per-cpu) events
- * that need to receive side-band records.
- *
- * This avoids having to scan all the various PMU per-cpu contexts
- * looking for them.
- */
-static void account_pmu_sb_event(struct perf_event *event)
-{
-	if (is_sb_event(event))
-		attach_sb_event(event);
-}
-
 static void account_event_cpu(struct perf_event *event, int cpu)
 {
 	if (event->parent)
@@ -8841,8 +8780,6 @@ static void account_event(struct perf_event *event)
 enabled:
 
 	account_event_cpu(event, event->cpu);
-
-	account_pmu_sb_event(event);
 }
 
 /*
@@ -10351,9 +10288,6 @@ static void __init perf_event_init_all_cpus(void)
 		swhash = &per_cpu(swevent_htable, cpu);
 		mutex_init(&swhash->hlist_mutex);
 		INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
-
-		INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
-		raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
 	}
 }
 
-- 
2.7.4

Reply via email to