On 01.09.2021 14:36, Nikita Yushchenko wrote: > High order allocation detector monitors allocations of order greater > than zero, and generates an uevent if a configured number of allocations > happen within configured time. > > In return to this uevent, userspace can enable event tracing. If a > stream of high-order allocations continues, the trace could help to > detect the code path causing them. > > HOAD has a sysfs control interface, at /sys/kernel/mm/hoad/control: > - "enable ORDER COUNT MSECS" > Sets up monitoring allocations of order ORDER: if COUNT such > allocations are detected within MSECS, uevent is sent. Then further > uevents is suspended, to avoid userspace races. > - "disable ORDER" > Stops monitoring allocations of order ORDER. > - "resume [delay-msecs]" > Allow sending a new uevent, either immediately or after the given > delay. > > The uevent is generated with ACTION="change", SUBSYSTEM="hoad", ORDER > set to the order of the allocation that has caused the uevent. > > Also HOAD provides a tracepoint named "hoad", under kmem/ group, that > could be used for tracing. This tracepoint hits on every allocation of > order greater or equal to minimal order for which monitoring is enabled. > > https://jira.sw.ru/browse/PSBM-92088 > Signed-off-by: Nikita Yushchenko <[email protected]>
Reviewed-by: Kirill Tkhai <[email protected]> > --- > include/trace/events/kmem.h | 12 ++ > mm/page_alloc.c | 266 ++++++++++++++++++++++++++++++++++++ > 2 files changed, 278 insertions(+) > > diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h > index 9cb647609df3..b425c6856bfd 100644 > --- a/include/trace/events/kmem.h > +++ b/include/trace/events/kmem.h > @@ -305,6 +305,18 @@ TRACE_EVENT(mm_page_alloc_extfrag, > __entry->alloc_migratetype == __entry->fallback_migratetype) > ); > > +TRACE_EVENT(hoad, > + TP_PROTO(int order), > + TP_ARGS(order), > + TP_STRUCT__entry( > + __field(int, order) > + ), > + TP_fast_assign( > + __entry->order = order; > + ), > + TP_printk("order=%d", __entry->order) > +); > + > #endif /* _TRACE_KMEM_H */ > > /* This part must be outside protection */ > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 1ae193b26a1d..959b1bfbafef 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -3533,6 +3533,270 @@ static __always_inline void warn_high_order(int > order, gfp_t gfp_mask) > } > } > > +struct hoad_order_info { > + unsigned long interval; > + int max_allocs; > + atomic_t counter; > + unsigned long since_jiffies; > + struct timer_list reset_counter_timer; > +}; > + > +static struct hoad_order_info *hoad_table[MAX_ORDER]; > +static DEFINE_MUTEX(hoad_mutex); > +static struct kobject *hoad_kobj; > +static int hoad_uevent_order; > +static unsigned long hoad_resume_jiffies; > +static int hoad_trace_min_order; > + > +#define MSEC_PER_MINUTE (60 * MSEC_PER_SEC) > +#define MSEC_PER_HOUR (60 * MSEC_PER_MINUTE) > +#define MSEC_PER_DAY (60 * MSEC_PER_HOUR) > + > +static void hoad_reset_counter(struct timer_list *timer) > +{ > + struct hoad_order_info *hoi = container_of(timer, > + struct hoad_order_info, reset_counter_timer); > + > + atomic_set(&hoi->counter, 0); > +} > + > +static void hoad_send_uevent(struct work_struct *work) > +{ > + char order_string[16]; > + char *envp[] = { order_string, NULL }; > + > + sprintf(order_string, "ORDER=%d", hoad_uevent_order); > + kobject_uevent_env(hoad_kobj, KOBJ_CHANGE, envp); > +} > +static DECLARE_WORK(hoad_send_uevent_work, hoad_send_uevent); > + > +static void hoad_resume(unsigned long unused) > +{ > + hoad_uevent_order = 0; > +} > +static DEFINE_TIMER(hoad_resume_timer, hoad_resume, 0, 0); > + > +static void hoad_notice_alloc(int order, gfp_t gfp) > +{ > + struct hoad_order_info *hoi; > + int count; > + bool hit = false; > + > + if (gfp & (__GFP_NORETRY | __GFP_ORDER_NOWARN)) > + return; > + > + if (order >= hoad_trace_min_order) > + trace_hoad(order); > + > + rcu_read_lock(); > + hoi = rcu_dereference(hoad_table[order]); > + if (hoi) { > + count = atomic_inc_return(&hoi->counter); > + if (count == 1) { > + hoi->since_jiffies = jiffies; > + mod_timer(&hoi->reset_counter_timer, > + hoi->since_jiffies + hoi->interval); > + } > + hit = (count == hoi->max_allocs); > + } > + rcu_read_unlock(); > + > + if (hit) { > + if (cmpxchg(&hoad_uevent_order, 0, order) == 0) > + schedule_work(&hoad_send_uevent_work); > + } > +} > + > +static void hoad_install_order_info(int order, struct hoad_order_info *hoi) > +{ > + struct hoad_order_info *oldhoi; > + int i; > + > + mutex_lock(&hoad_mutex); > + oldhoi = hoad_table[order]; > + rcu_assign_pointer(hoad_table[order], hoi); > + for (i = 1; i < MAX_ORDER; i++) { > + if (hoad_table[i]) > + break; > + } > + hoad_trace_min_order = i; > + mutex_unlock(&hoad_mutex); > + > + if (oldhoi) { > + synchronize_rcu(); > + del_timer_sync(&oldhoi->reset_counter_timer); > + kfree(oldhoi); > + } > +} > + > +static int hoad_enable_for_order(int order, int max_allocs, > + unsigned int interval_msecs) > +{ > + struct hoad_order_info *hoi; > + unsigned long interval; > + > + if (order < 1 || order >= MAX_ORDER) > + return -EINVAL; > + if (max_allocs < 1) > + return -EINVAL; > + interval = msecs_to_jiffies(interval_msecs); > + if (interval < 1) > + return -EINVAL; > + > + hoi = kzalloc(sizeof(*hoi), GFP_KERNEL); > + if (!hoi) > + return -ENOMEM; > + hoi->interval = interval; > + hoi->max_allocs = max_allocs; > + timer_setup(&hoi->reset_counter_timer, hoad_reset_counter, 0); > + > + hoad_install_order_info(order, hoi); > + return 0; > +} > + > +static int hoad_disable_for_order(int order) > +{ > + if (order < 1 || order >= MAX_ORDER) > + return -EINVAL; > + > + hoad_install_order_info(order, NULL); > + return 0; > +} > + > +static ssize_t hoad_control_show(struct kobject *kobj, > + struct kobj_attribute *attr, char *buf) > +{ > + char *p = buf, *endp = &p[PAGE_SIZE - 1]; > + int order; > + struct hoad_order_info *hoi; > + int counter; > + long d; > + unsigned int msecs; > + > + rcu_read_lock(); > + for (order = 1; order < MAX_ORDER; order++) { > + hoi = rcu_dereference(hoad_table[order]); > + if (hoi) { > + counter = atomic_read(&hoi->counter); > + msecs = counter ? > + jiffies_to_msecs(jiffies - hoi->since_jiffies) : > + 0; > + p += snprintf(p, endp - p, > + "order %u: %u/%u in %u/%u msecs\n", > + order, counter, hoi->max_allocs, > + msecs, jiffies_to_msecs(hoi->interval)); > + } > + } > + rcu_read_unlock(); > + if (hoad_uevent_order) { > + p += snprintf(p, endp - p, "event generation suspended"); > + d = (long)(hoad_resume_jiffies - jiffies); > + if (d > 0) { > + p += snprintf(p, endp - p, ", resume in "); > + msecs = jiffies_to_msecs(d); > + if (msecs >= 2 * MSEC_PER_HOUR) > + p += snprintf(p, endp - p, "%lu hours", > + (msecs + (MSEC_PER_HOUR / 2)) / > + MSEC_PER_HOUR); > + else if (msecs > 2 * MSEC_PER_MINUTE) > + p += snprintf(p, endp - p, "%lu minutes", > + (msecs + (MSEC_PER_MINUTE) / 2) / > + MSEC_PER_MINUTE); > + else > + p += snprintf(p, endp - p, "%lu seconds", > + (msecs + MSEC_PER_SEC - 1) / > + MSEC_PER_SEC); > + } > + p += snprintf(p, endp - p, "\n"); > + } > + > + return p - buf; > +} > + > +static ssize_t hoad_control_store(struct kobject *kobj, > + struct kobj_attribute *attr, const char *buf, size_t len) > +{ > + char *p, *q; > + int order, max_allocs, ret; > + unsigned int msecs; > + unsigned long d; > + char c; > + > + if (len == 0) > + return 0; > + p = kstrdup(buf, GFP_KERNEL); > + if (!p) > + return -ENOMEM; > + q = strim(p); > + if (*q == '\0') { > + ret = 0; > + goto out; > + } > + > + if (sscanf(q, "enable %u %u %u%c", > + &order, &max_allocs, &msecs, &c) == 3) > + ret = hoad_enable_for_order(order, max_allocs, msecs); > + else if (sscanf(q, "disable %u%c", &order, &c) == 1) > + ret = hoad_disable_for_order(order); > + else if (sscanf(q, "resume %u%c", &msecs, &c) == 1) { > + if (msecs > 5 * MSEC_PER_DAY) > + ret = -EINVAL; > + else { > +do_resume: > + d = msecs_to_jiffies(msecs); > + hoad_resume_jiffies = jiffies + d; > + mod_timer(&hoad_resume_timer, hoad_resume_jiffies); > + ret = 0; > + } > + } else if (!strcmp(q, "resume")) { > + msecs = 0; > + goto do_resume; > + } else { > + ret = -EINVAL; > + } > + > +out: > + kfree(p); > + return ret ? ret : len; > +} > + > +static struct kobj_attribute hoad_control_attr = { > + .attr.name = "control", > + .attr.mode = S_IRUSR | S_IWUSR, > + .show = hoad_control_show, > + .store = hoad_control_store, > +}; > + > +static int hoad_init(void) > +{ > + struct kset *kset; > + int ret; > + > + /* To be able to generate uevents, need a kobject with kset defined. > + * > + * To avoid extra depth inside sysfs, create a kset and use it's > + * internal kobject, by setting it's 'kset' field to itself. > + */ > + kset = kset_create_and_add("hoad", NULL, mm_kobj); > + if (!kset) > + return -ENOMEM; > + hoad_kobj = &kset->kobj; > + hoad_kobj->kset = kset; > + > + ret = sysfs_create_file(hoad_kobj, &hoad_control_attr.attr); > + if (ret) { > + hoad_kobj->kset = NULL; > + hoad_kobj = NULL; > + kset_put(kset); > + return ret; > + } > + > + hoad_trace_min_order = MAX_ORDER; > + hoad_resume_jiffies = jiffies; > + return 0; > +} > +late_initcall(hoad_init); > + > /* > * This is the 'heart' of the zoned buddy allocator. > */ > @@ -3557,6 +3821,8 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int > order, > !(current->flags & PF_MEMALLOC)); > > warn_high_order(order, gfp_mask); > + if (order > 0) > + hoad_notice_alloc(order, gfp_mask); > > if (should_fail_alloc_page(gfp_mask, order)) > return NULL; > _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
