Currently, there is no mechanism to filter events based on containers. perf -g can be used, but it will not filter events for the containers created after perf is invoked, making it difficult to assess/analyze performance issues of multiple containers at once. This limitation can be overcome, if there is a standard kernel identifier for containers.
This patch introduces a container identifier entry field in perf sample data to identify or distinguish sample data of different containers. It uses the cgroup namespace inode number of a given task as it's container identifier (cid). Alternatively, inode number of pid namespace can also be used as cid. This patch assumes each container is created with it's own cgroup namespace. Suggested-by: Ananth N Mavinakayanahalli <ana...@linux.vnet.ibm.com> Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com> --- include/linux/perf_event.h | 4 ++++ include/uapi/linux/perf_event.h | 3 ++- kernel/events/core.c | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8ed43261..d43bbf2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -904,6 +904,10 @@ struct perf_sample_data { struct perf_regs regs_intr; u64 stack_user_size; + struct { + u32 cid; + u32 reserved; + } cid_entry; } ____cacheline_aligned; /* default value for data source */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485..fb4f902 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -139,8 +139,9 @@ enum perf_event_sample_format { PERF_SAMPLE_IDENTIFIER = 1U << 16, PERF_SAMPLE_TRANSACTION = 1U << 17, PERF_SAMPLE_REGS_INTR = 1U << 18, + PERF_SAMPLE_CID = 1U << 19, - PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ }; /* diff --git a/kernel/events/core.c b/kernel/events/core.c index a19550d..b5d774c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5716,6 +5716,9 @@ void perf_output_sample(struct perf_output_handle *handle, } } + if (sample_type & PERF_SAMPLE_CID) + perf_output_put(handle, data->cid_entry); + if (!event->attr.watermark) { int wakeup_events = event->attr.wakeup_events; @@ -5849,6 +5852,18 @@ void perf_prepare_sample(struct perf_event_header *header, header->size += size; } + + if (sample_type & PERF_SAMPLE_CID) { + int size = sizeof(u64); + + /* + * Container identifier for a given task. + * Using cgroup namespace inode number for this. + */ + data->cid_entry.cid = current->nsproxy->cgroup_ns->ns.inum; + data->cid_entry.reserved = 0; + header->size += size; + } } static void __always_inline