Adds bit perf_event_attr::remove_on_exec, to support removing an event
from a task on exec.

This option supports the case where an event is supposed to be
process-wide only, and should not propagate beyond exec, to limit
monitoring to the original process image only.

Signed-off-by: Marco Elver <el...@google.com>
---
v2:
* Add patch to series.
---
 include/uapi/linux/perf_event.h |  3 ++-
 kernel/events/core.c            | 45 +++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 813efb65fea8..8c5b9f5ad63f 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -390,7 +390,8 @@ struct perf_event_attr {
                                text_poke      :  1, /* include text poke 
events */
                                build_id       :  1, /* use build id in mmap2 
events */
                                inherit_thread :  1, /* children only inherit 
if cloned with CLONE_THREAD */
-                               __reserved_1   : 28;
+                               remove_on_exec :  1, /* event is removed from 
task on exec */
+                               __reserved_1   : 27;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a8382e6c907c..bc9e6e35e414 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4195,6 +4195,46 @@ static void perf_event_enable_on_exec(int ctxn)
                put_ctx(clone_ctx);
 }
 
+static void perf_remove_from_owner(struct perf_event *event);
+static void perf_event_exit_event(struct perf_event *child_event,
+                                 struct perf_event_context *child_ctx,
+                                 struct task_struct *child);
+
+/*
+ * Removes all events from the current task that have been marked
+ * remove-on-exec, and feeds their values back to parent events.
+ */
+static void perf_event_remove_on_exec(void)
+{
+       int ctxn;
+
+       for_each_task_context_nr(ctxn) {
+               struct perf_event_context *ctx;
+               struct perf_event *event, *next;
+
+               ctx = perf_pin_task_context(current, ctxn);
+               if (!ctx)
+                       continue;
+               mutex_lock(&ctx->mutex);
+
+               list_for_each_entry_safe(event, next, &ctx->event_list, 
event_entry) {
+                       if (!event->attr.remove_on_exec)
+                               continue;
+
+                       if (!is_kernel_event(event))
+                               perf_remove_from_owner(event);
+                       perf_remove_from_context(event, DETACH_GROUP);
+                       /*
+                        * Remove the event and feed back its values to the
+                        * parent event.
+                        */
+                       perf_event_exit_event(event, ctx, current);
+               }
+               mutex_unlock(&ctx->mutex);
+               put_ctx(ctx);
+       }
+}
+
 struct perf_read_data {
        struct perf_event *event;
        bool group;
@@ -7519,6 +7559,8 @@ void perf_event_exec(void)
                                   true);
        }
        rcu_read_unlock();
+
+       perf_event_remove_on_exec();
 }
 
 struct remote_output {
@@ -11600,6 +11642,9 @@ static int perf_copy_attr(struct perf_event_attr __user 
*uattr,
        if (!attr->inherit && attr->inherit_thread)
                return -EINVAL;
 
+       if (attr->remove_on_exec && attr->enable_on_exec)
+               return -EINVAL;
+
 out:
        return ret;
 
-- 
2.30.1.766.gb4fecdf3b7-goog

Reply via email to