Low-level kernel functions are called from many different paths. When debugging, it is often useful to filter trace events to only those occurring within a specific call chain.
Add a "within" filter predicate that tests whether a given function appears in the current call stack at event time. The function name is resolved to its address range via kallsyms during filter setup; at runtime, stack_trace_save() captures the call stack and compares each return address against the stored range. Example: echo 'within == "vfs_read"' > events/sched/sched_switch/filter Only "==" and "!=" operators are supported. The filter depends on CONFIG_STACKTRACE. Signed-off-by: Chen Jun <[email protected]> --- Documentation/trace/events.rst | 12 +++++++++ include/linux/trace_events.h | 1 + kernel/trace/trace.h | 3 ++- kernel/trace/trace_events.c | 3 +++ kernel/trace/trace_events_filter.c | 41 ++++++++++++++++++++++++++++-- 5 files changed, 57 insertions(+), 3 deletions(-) diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst index 18d112963dec..6e3877d376a9 100644 --- a/Documentation/trace/events.rst +++ b/Documentation/trace/events.rst @@ -243,6 +243,18 @@ the function "security_prepare_creds" and less than the end of that function. The ".function" postfix can only be attached to values of size long, and can only be compared with "==" or "!=". +The special field "within" can be used to filter events based on whether +a specific function appears in the current call stack:: + + within == "function_name" + within != "function_name" + +For example, to only trace events where "vfs_read" is in the call stack:: + + # echo 'within == "vfs_read"' > events/sched/sched_switch/filter + +The within field supports only the "==" and "!=" operators. + Cpumask fields or scalar fields that encode a CPU number can be filtered using a user-provided cpumask in cpulist format. The format is as follows:: diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 40a43a4c7caf..9ed22c210add 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -851,6 +851,7 @@ enum { FILTER_COMM, FILTER_CPU, FILTER_STACKTRACE, + FILTER_WITHIN, }; extern int trace_event_raw_init(struct trace_event_call *call); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 80fe152af1dd..a383da42badf 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1825,7 +1825,8 @@ static inline bool is_string_field(struct ftrace_event_field *field) field->filter_type == FILTER_RDYN_STRING || field->filter_type == FILTER_STATIC_STRING || field->filter_type == FILTER_PTR_STRING || - field->filter_type == FILTER_COMM; + field->filter_type == FILTER_COMM || + field->filter_type == FILTER_WITHIN; } static inline bool is_function_field(struct ftrace_event_field *field) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c46e623e7e0d..b7d681e55b0c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -199,6 +199,9 @@ static int trace_define_generic_fields(void) __generic_field(char *, comm, FILTER_COMM); __generic_field(char *, stacktrace, FILTER_STACKTRACE); __generic_field(char *, STACKTRACE, FILTER_STACKTRACE); +#ifdef CONFIG_STACKTRACE + __generic_field(char *, within, FILTER_WITHIN); +#endif return ret; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 609325f57942..34e1a7f0b3cd 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -72,6 +72,7 @@ enum filter_pred_fn { FILTER_PRED_FN_CPUMASK, FILTER_PRED_FN_CPUMASK_CPU, FILTER_PRED_FN_FUNCTION, + FILTER_PRED_FN_WITHIN, FILTER_PRED_FN_, FILTER_PRED_TEST_VISITED, }; @@ -1009,6 +1010,22 @@ static int filter_pred_function(struct filter_pred *pred, void *event) return pred->op == OP_EQ ? ret : !ret; } +/* Filter predicate for within. */ +static int filter_pred_within(struct filter_pred *pred, void *event) +{ +#ifdef CONFIG_STACKTRACE + unsigned long entries[16]; + unsigned int nr_entries; + int i; + + nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); + for (i = 0; i < nr_entries; i++) + if (pred->val <= entries[i] && entries[i] < pred->val2) + return !pred->not; +#endif + return pred->not; +} + /* * regex_match_foo - Basic regex callbacks * @@ -1617,6 +1634,8 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event) return filter_pred_cpumask_cpu(pred, event); case FILTER_PRED_FN_FUNCTION: return filter_pred_function(pred, event); + case FILTER_PRED_FN_WITHIN: + return filter_pred_within(pred, event); case FILTER_PRED_TEST_VISITED: return test_pred_visited_fn(pred, event); default: @@ -2002,10 +2021,28 @@ static int parse_pred(const char *str, void *data, } else if (field->filter_type == FILTER_DYN_STRING) { pred->fn_num = FILTER_PRED_FN_STRLOC; - } else if (field->filter_type == FILTER_RDYN_STRING) + } else if (field->filter_type == FILTER_RDYN_STRING) { pred->fn_num = FILTER_PRED_FN_STRRELLOC; - else { + } else if (field->filter_type == FILTER_WITHIN) { + unsigned long func; + + if (op == OP_GLOB) + goto err_free; + pred->fn_num = FILTER_PRED_FN_WITHIN; + func = kallsyms_lookup_name(pred->regex->pattern); + if (!func) { + parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i); + goto err_free; + } + /* Now find the function start and end address */ + if (!kallsyms_lookup_size_offset(func, &size, &offset)) { + parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i); + goto err_free; + } + pred->val = func - offset; + pred->val2 = pred->val + size; + } else { if (!ustring_per_cpu) { /* Once allocated, keep it around for good */ ustring_per_cpu = alloc_percpu(struct ustring_buffer); -- 2.43.0
