Pretend marked SLABs are always allocated by a PF_MEMALLOC process to bypass memory cgroup limits.
Added per-SLAB "pf_memalloc" attribute to disable/enable the logic on the fly. https://virtuozzo.atlassian.net/browse/PSBM-155867 Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com> --- include/linux/slab.h | 1 + mm/slub.c | 59 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 8722dc8864be..263f10a436f6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -24,6 +24,7 @@ #define SLAB_CONSISTENCY_CHECKS 0x00000100UL /* DEBUG: Perform (expensive) checks on alloc/free */ #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ #define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ +#define SLAB_PF_MEMALLOC 0x00001000UL /* DEBUG: Ignore memcg limits */ #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ #define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */ #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ diff --git a/mm/slub.c b/mm/slub.c index 630d39a0d63f..ea3e20e73fe6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1296,6 +1296,9 @@ static int __init setup_slub_debug(char *str) case 'a': slub_debug |= SLAB_FAILSLAB; break; + case 'm': + slub_debug |= SLAB_PF_MEMALLOC; + break; default: printk(KERN_ERR "slub_debug option '%c' " "unknown. skipped\n", *str); @@ -2482,8 +2485,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) * Version of __slab_alloc to use when we know that interrupts are * already disabled (which is the case for bulk allocation). */ -static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, - unsigned long addr, struct kmem_cache_cpu *c) +static void *___slab_alloc_limited(struct kmem_cache *s, gfp_t gfpflags, int node, + unsigned long addr, struct kmem_cache_cpu *c) { void *freelist; struct page *page; @@ -2584,6 +2587,41 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, return freelist; } +/* + * "unlimited" function honors SLAB_PF_MEMALLOC flag on the SLAB and if set, + * ignores the memcg limits during the SLAB object allocation. + */ +static void *___slab_alloc_unlimited(struct kmem_cache *s, gfp_t gfpflags, int node, + unsigned long addr, struct kmem_cache_cpu *c) +{ + /* + * A big crutch: we suspect some SLAB allocation failure leads + * to a memory corruption, so for now we want to make marked SLABs + * to always succeed allocation. + */ + if (s->flags & SLAB_PF_MEMALLOC) { + void *freelist; + unsigned long pflags = current->flags; + + current->flags |= PF_MEMALLOC; + + freelist = ___slab_alloc_limited(s, gfpflags, node, addr, c); + + tsk_restore_flags(current, pflags, PF_MEMALLOC); + return freelist; + } else + return ___slab_alloc_limited(s, gfpflags, node, addr, c); +} + +/* + * A wrapper to keep the original function name. + */ +static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + unsigned long addr, struct kmem_cache_cpu *c) +{ + return ___slab_alloc_unlimited(s, gfpflags, node, addr, c); +} + /* * Another one that disabled interrupt and compensates for possible * cpu changes by refetching the per cpu area pointer. @@ -5253,6 +5291,22 @@ static ssize_t poison_store(struct kmem_cache *s, } SLAB_ATTR(poison); +static ssize_t pf_memalloc_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_PF_MEMALLOC)); +} + +static ssize_t pf_memalloc_store(struct kmem_cache *s, + const char *buf, size_t length) +{ + s->flags &= ~SLAB_PF_MEMALLOC; + if (buf[0] == '1') { + s->flags |= SLAB_PF_MEMALLOC; + } + return length; +} +SLAB_ATTR(pf_memalloc); + static ssize_t store_user_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER)); @@ -5475,6 +5529,7 @@ static struct attribute *slab_attrs[] = { &trace_attr.attr, &red_zone_attr.attr, &poison_attr.attr, + &pf_memalloc_attr.attr, &store_user_attr.attr, &validate_attr.attr, &alloc_calls_attr.attr, -- 2.24.3 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel