On Fri, 25 Jul 2025 09:21:28 -0400 Steven Rostedt <[email protected]> wrote:
> On Thu, 24 Jul 2025 13:46:35 +0900 > "Masami Hiramatsu (Google)" <[email protected]> wrote: > > > From: Masami Hiramatsu (Google) <[email protected]> > > > > With CONFIG_DEBUG_INFO_BTF=y and PAHOLE_HAS_BTF_TAG=y, `__user` is > > converted to `__attribute__((btf_type_tag("user")))`. In this case, > > some syscall events have it for __user data, like below; > > > > /sys/kernel/tracing # cat events/syscalls/sys_enter_openat/format > > name: sys_enter_openat > > ID: 720 > > format: > > field:unsigned short common_type; offset:0; size:2; > > signed:0; > > field:unsigned char common_flags; offset:2; size:1; > > signed:0; > > field:unsigned char common_preempt_count; offset:3; > > size:1; signed:0; > > field:int common_pid; offset:4; size:4; signed:1; > > > > field:int __syscall_nr; offset:8; size:4; signed:1; > > field:int dfd; offset:16; size:8; signed:0; > > field:const char __attribute__((btf_type_tag("user"))) * filename; > > offset:24; size:8; signed:0; > > field:int flags; offset:32; size:8; signed:0; > > field:umode_t mode; offset:40; size:8; signed:0; > > > > > > Then the trace event filter fails to set the string acceptable flag > > (FILTER_PTR_STRING) to the field and rejects setting string filter; > > > > # echo 'filename.ustring ~ "*ftracetest-dir.wbx24v*"' \ > > >> events/syscalls/sys_enter_openat/filter > > sh: write error: Invalid argument > > # cat error_log > > [ 723.743637] event filter parse error: error: Expecting numeric field > > Command: filename.ustring ~ "*ftracetest-dir.wbx24v*" > > > > Since this __attribute__ makes format parsing complicated and not > > needed, remove the __attribute__(.*) from the type string. > > > > Signed-off-by: Masami Hiramatsu (Google) <[email protected]> > > --- > > Changes in v4: > > - Run sanitizer only if btf_type_tag() attribute is defined. > > Changes in v3: > > - Sanitize field in update_event_field() to avoid boottime performance > > overhead. > > - Change the function names because those are not always require eval > > maps. > > - Remove unneeded alloc_type flag. > > Changes in v2: > > - Add memory allocation check flag. > > - Check the flag in update_event_fields() to avoid memory leak. > > - Fix 'static const int ... strlen()' issue. > > - Fix to find 2nd __attribute__ correctly. (adjust next after strcpy) > > --- > > kernel/trace/trace.c | 25 ++++++--- > > kernel/trace/trace.h | 4 + > > kernel/trace/trace_events.c | 115 > > +++++++++++++++++++++++++++++++++++++------ > > 3 files changed, 116 insertions(+), 28 deletions(-) > > > > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > > index 95ae7c4e5835..b62528c42e26 100644 > > --- a/kernel/trace/trace.c > > +++ b/kernel/trace/trace.c > > @@ -5937,17 +5937,26 @@ static inline void > > trace_insert_eval_map_file(struct module *mod, > > struct trace_eval_map **start, int len) { } > > #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ > > > > -static void trace_insert_eval_map(struct module *mod, > > - struct trace_eval_map **start, int len) > > +static void > > +trace_event_update_with_eval_map(struct module *mod, > > + struct trace_eval_map **start, > > + int len) > > { > > struct trace_eval_map **map; > > > > - if (len <= 0) > > + /* Always run sanitizer only if btf_type_tag attr exists. */ > > + if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && > > + IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && > > + __has_attribute(btf_type_tag)) && > > + len <= 0) > > return; > > The above is quite messy and hard to read. Can you change it to: > > if (len <= 0) { > /* If bpf_type_tag attr is used, still need to sanitize */ > if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && > IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && > __has_attribute(btf_type_tag)) > return; > } > > Not much better, but a little easier to read. OK. > > > > > map = start; > > > > - trace_event_eval_update(map, len); > > + trace_event_update_all(map, len); > > + > > + if (len <= 0) > > + return; > > > > trace_insert_eval_map_file(mod, start, len); > > } > > @@ -10335,7 +10344,7 @@ static void __init eval_map_work_func(struct > > work_struct *work) > > int len; > > > > len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; > > - trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len); > > + trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len); > > } > > > > static int __init trace_eval_init(void) > > @@ -10388,9 +10397,6 @@ bool module_exists(const char *module) > > > > static void trace_module_add_evals(struct module *mod) > > { > > - if (!mod->num_trace_evals) > > - return; > > - > > /* > > * Modules with bad taint do not have events created, do > > * not bother with enums either. > > @@ -10398,7 +10404,8 @@ static void trace_module_add_evals(struct module > > *mod) > > if (trace_module_has_bad_taint(mod)) > > return; > > > > - trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals); > > + /* Even if no trace_evals, this need to sanitize field types. */ > > + trace_event_update_with_eval_map(mod, mod->trace_evals, > > mod->num_trace_evals); > > } > > > > #ifdef CONFIG_TRACE_EVAL_MAP_FILE > > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > > index bd084953a98b..1dbf1d3cf2f1 100644 > > --- a/kernel/trace/trace.h > > +++ b/kernel/trace/trace.h > > @@ -2125,13 +2125,13 @@ static inline const char *get_syscall_name(int > > syscall) > > > > #ifdef CONFIG_EVENT_TRACING > > void trace_event_init(void); > > -void trace_event_eval_update(struct trace_eval_map **map, int len); > > +void trace_event_update_all(struct trace_eval_map **map, int len); > > /* Used from boot time tracer */ > > extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int > > set); > > extern int trigger_process_regex(struct trace_event_file *file, char > > *buff); > > #else > > static inline void __init trace_event_init(void) { } > > -static inline void trace_event_eval_update(struct trace_eval_map **map, > > int len) { } > > +static inline void trace_event_update_all(struct trace_eval_map **map, int > > len) { } > > #endif > > > > #ifdef CONFIG_TRACER_SNAPSHOT > > diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c > > index 120531268abf..5289e2032678 100644 > > --- a/kernel/trace/trace_events.c > > +++ b/kernel/trace/trace_events.c > > @@ -3264,41 +3264,115 @@ static void add_str_to_module(struct module > > *module, char *str) > > list_add(&modstr->next, &module_strings); > > } > > > > +#define ATTRIBUTE_STR "__attribute__" > > If you are going to test for the '(' right after, why not add it to the > strstr test? Ah, indeed. > > > +#define ATTRIBUTE_STR_LEN (sizeof(ATTRIBUTE_STR) - 1) > > + > > +/* Remove all __attribute__() from type */ > > +static void sanitize_field_type(char *type) > > +{ > > + char *attr, *tmp, *next; > > + int depth; > > + > > + next = type; > > + while ((attr = strstr(next, ATTRIBUTE_STR))) { > > + next = attr + ATTRIBUTE_STR_LEN; > > + > > + /* Retry if __attribute__ is a part of type name. */ > > + if ((attr != type && !isspace(attr[-1])) || > > + *next != '(') > > Now you wouldn't need the *next != '(' here. > > > + continue; > > + > > + depth = 0; > > /* the ATTRIBUTE_STR already has the first '(' */ > depth = 1; Yeah, OK. > > > + while ((tmp = strpbrk(next, "()"))) { > > + if (*tmp == '(') > > + depth++; > > + else > > + depth--; > > + next = tmp + 1; > > + if (depth == 0) > > + break; > > + } > > + next = skip_spaces(next); > > + strcpy(attr, next); > > + next = attr; > > + } > > +} > > + > > +static bool need_sanitize_field_type(const char *type) > > +{ > > + return !!strstr(type, ATTRIBUTE_STR); > > +} > > + > > +static char *find_replacable_eval(const char *type, const char > > *eval_string, > > + int len) > > +{ > > + char *ptr; > > + > > + if (!eval_string) > > + return NULL; > > + > > + ptr = strchr(type, '['); > > + if (!ptr) > > + return NULL; > > + ptr++; > > + > > + if (!isalpha(*ptr) && *ptr != '_') > > + return NULL; > > + > > + if (strncmp(eval_string, ptr, len) != 0) > > + return NULL; > > + > > + return ptr; > > +} > > + > > static void update_event_fields(struct trace_event_call *call, > > struct trace_eval_map *map) > > { > > struct ftrace_event_field *field; > > + const char *eval_string = NULL; > > struct list_head *head; > > + bool need_sanitize; > > + int len = 0; > > char *ptr; > > char *str; > > - int len = strlen(map->eval_string); > > > > /* Dynamic events should never have field maps */ > > - if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC)) > > + if (call->flags & TRACE_EVENT_FL_DYNAMIC) > > return; > > > > + if (map) { > > + eval_string = map->eval_string; > > + len = strlen(map->eval_string); > > + } > > + > > head = trace_get_fields(call); > > list_for_each_entry(field, head, link) { > > - ptr = strchr(field->type, '['); > > - if (!ptr) > > - continue; > > - ptr++; > > > > - if (!isalpha(*ptr) && *ptr != '_') > > - continue; > > - > > - if (strncmp(map->eval_string, ptr, len) != 0) > > + /* Check the field has bad string or eval. */ > > + need_sanitize = need_sanitize_field_type(field->type); > > Is there a reason you can't call the sanitize first? > > Then you wouldn't need to do the strstr(ATTRIBUTE_STR) twice. OK, let me fix to call strstr() once. Thank you, > > -- Steve > > > > + ptr = find_replacable_eval(field->type, eval_string, len); > > + if (likely(!need_sanitize && !ptr)) > > continue; > > > > str = kstrdup(field->type, GFP_KERNEL); > > if (WARN_ON_ONCE(!str)) > > return; > > - ptr = str + (ptr - field->type); > > - ptr = eval_replace(ptr, map, len); > > - /* enum/sizeof string smaller than value */ > > - if (WARN_ON_ONCE(!ptr)) { > > - kfree(str); > > - continue; > > + > > + if (ptr) { > > + ptr = str + (ptr - field->type); > > + > > + ptr = eval_replace(ptr, map, len); > > + /* enum/sizeof string smaller than value */ > > + if (WARN_ON_ONCE(!ptr) && !need_sanitize) { > > + kfree(str); > > + continue; > > + } > > + } > > + if (need_sanitize) { > > + sanitize_field_type(str); > > + /* Update field type */ > > + if (field->filter_type == FILTER_OTHER) > > + field->filter_type = filter_assign_type(str); > > } > > > > /* > > @@ -3313,11 +3387,13 @@ static void update_event_fields(struct > > trace_event_call *call, > > } > > } > > > > -void trace_event_eval_update(struct trace_eval_map **map, int len) > > +/* Update all events for replacing eval and sanitizing */ > > +void trace_event_update_all(struct trace_eval_map **map, int len) > > { > > struct trace_event_call *call, *p; > > const char *last_system = NULL; > > bool first = false; > > + bool updated; > > int last_i; > > int i; > > > > @@ -3330,6 +3406,7 @@ void trace_event_eval_update(struct trace_eval_map > > **map, int len) > > last_system = call->class->system; > > } > > > > + updated = false; > > /* > > * Since calls are grouped by systems, the likelihood that the > > * next call in the iteration belongs to the same system as the > > @@ -3349,8 +3426,12 @@ void trace_event_eval_update(struct trace_eval_map > > **map, int len) > > } > > update_event_printk(call, map[i]); > > update_event_fields(call, map[i]); > > + updated = true; > > } > > } > > + /* If not updated yet, update field for sanitizing. */ > > + if (!updated) > > + update_event_fields(call, NULL); > > cond_resched(); > > } > > up_write(&trace_event_sem); > > -- Masami Hiramatsu (Google) <[email protected]>
