reassign 912411 src:linux 4.9.110-3+deb9u2~deb8u1
thanks
On Wed, Oct 31, 2018 at 05:21:39PM +0800, 段熊春 wrote:
> Package: linux-image-4.9.0-0.bpo.7-amd64
> Version: 4.9.110-3+deb9u2~deb8u1
>
> Package: systemd
> Version: 230-7~bpo8+2
>
> hi guys:
> We suspect that we may have found a memory leak bug in cgroup memory
> subsystem, with 1GBytes/Hour leak speed for a special case.
> This bug could be reproduced 100% on the mainstream kernel version 4.19.
> (Tried on Debian's latest kernel 4.14 and 4.9, the same result.)
>
> This is what we have observed (Debian 9 Stretch, with mainstream kernel
> version 4.19, kconfig attached) and how to reprocude:
> System with Cgroup enabled. A demo service which simulates an "ill" behavior:
> program broken, and exit immediately after just startup:
>
> service code
> #include "stdio.h"
> #include "stdlib.h"
> int main()
> {
> void * p = malloc(10240);
> return 1;
> }
> Compile the above code and put the binary as /usr/bin/test
> systemd service
> [Service]
> ExecStart=/usr/bin/test
> Restart=always
> RestartSec=2s
> MemoryLimit=1G
> StartLimitInterval=0
> [Install]
> WantedBy=default.target
> Enable and start the above service with the tool systemctl.
>
> Some additional information:
> With strace attach to systemd before start the service: systemd will mkdir
> under /sys/fs/cgroup/memory for that service(/usr/bin/test). After the
> service stops, rmdir will remove the correspond entry under
> /sys/fs/cgrou/memory
> With kprobe hook to cgroup_mkdir and cgroup_rmdir: the number of call
> cgroup_mkdir and cgroup_rmdir are equally.
> With kprobe hook to (1)mem_cgroup_css_alloc (2)mem_cgroup_css_free
> (3)mem_cgroup_css_released (4)mem_cgroup_css_offline:
> the invoke number of mem_cgroup_css_alloc and mem_cgroup_css_offline are
> equally (Assume the number is A)
> the invoke number of alloc mem_cgroup_css_free and mem_cgroup_css_released
> are equally (Assume the number is B)
> A > B
> With jprobe: we have collected some addresses of memcg. With the crash tool,
> inspect the living kernel: the member named refcnt's flag in the memcg->css
> is change to __PERCPU_REF_ATOMIC_DEAD. memcg->css->refcnt->count keeps
> the same value as memcg->memory->count. After 24 hours, we observed the data
> structure is still in use, and the value of the two count both are 1.
> we wrote a kmod to put a memcg which counter is 1, nothing happen except this
> struct has been free
> We suspect the issue maybe caused by incorrect call to try_charge and
> cancel_charge. Anyway, just guess.
> Following is some inspection code we used as described above:
> kprobe code
> #include <linux/kernel.h <https://wiki.bytedance.net/pages/kernel.h>>
> #include <linux/module.h <https://wiki.bytedance.net/pages/module.h>>
> #include <linux/kprobes.h <https://wiki.bytedance.net/pages/kprobes.h>>
>
>
> static struct kprobe mmalloc = {
> .symbol_name = "mem_cgroup_css_alloc",
> };
>
> static struct kprobe mmrealse = {
> .symbol_name = "mem_cgroup_css_free",
> };
> static struct kprobe mmmkdir = {
> .symbol_name = "mem_cgroup_css_released",
> };
> static struct kprobe mmrmdir = {
> .symbol_name = "mem_cgroup_css_offline",
> };
> atomic_t alloc;
> atomic_t realse;
> atomic_t cmkdir;
> atomic_t crmdir;
>
>
> static int handler_alloc_pre(struct kprobe *p, struct pt_regs *regs)
> {
> atomic_inc(&alloc);
> printk(KERN_INFO "alloc release %d offline %d alloc %d free
> %d\n",atomic_read(&cmkdir),atomic_read(&crmdir),atomic_read(&alloc),atomic_read(&realse));
> return 0;
> }
> static int handler_realse_pre(struct kprobe *p,struct pt_regs *regs)
> {
> atomic_inc(&realse);
> printk(KERN_INFO "free release %d offline %d alloc %d free
> %d\n",atomic_read(&cmkdir),atomic_read(&crmdir),atomic_read(&alloc),atomic_read(&realse));
> return 0;
> }
> static int handler_mkdir_pre(struct kprobe *p,struct pt_regs *regs)
> {
> atomic_inc(&cmkdir);
> printk(KERN_INFO "release release %d offline %d alloc %d free
> %d\n",atomic_read(&cmkdir),atomic_read(&crmdir),atomic_read(&alloc),atomic_read(&realse));
> return 0;
> }
> static int handler_rmdir_pre(struct kprobe *p,struct pt_regs *regs)
> {
> atomic_inc(&crmdir);
> printk(KERN_INFO "offline release %d offline %d alloc %d free
> %d\n",atomic_read(&cmkdir),atomic_read(&crmdir),atomic_read(&alloc),atomic_read(&realse));
> return 0;
> }
>
>
> static void handler_post(struct kprobe *p, struct pt_regs *regs,
> unsigned long flags)
> {
> }
>
>
> static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
> {
> return 0;
> }
>
> static int __init kprobe_init(void)
> {
> int ret;
> mmalloc.pre_handler
> <https://wiki.bytedance.net/pages/mmalloc.pre_handler> = handler_alloc_pre;
> mmalloc.post_handler
> <https://wiki.bytedance.net/pages/mmalloc.post_handler> = handler_post;
> mmalloc.fault_handler
> <https://wiki.bytedance.net/pages/mmalloc.fault_handler> = handler_fault;
>
> mmrealse.pre_handler
> <https://wiki.bytedance.net/pages/mmrealse.pre_handler> = handler_realse_pre;
> mmrealse.post_handler
> <https://wiki.bytedance.net/pages/mmrealse.post_handler> = handler_post;
> mmrealse.fault_handler
> <https://wiki.bytedance.net/pages/mmrealse.fault_handler> = handler_fault;
>
> mmmkdir.pre_handler
> <https://wiki.bytedance.net/pages/mmmkdir.pre_handler> = handler_mkdir_pre;
> mmmkdir.post_handler
> <https://wiki.bytedance.net/pages/mmmkdir.post_handler> = handler_post;
> mmmkdir.fault_handler
> <https://wiki.bytedance.net/pages/mmmkdir.fault_handler> = handler_fault;
>
> mmrmdir.pre_handler
> <https://wiki.bytedance.net/pages/mmrmdir.pre_handler> = handler_rmdir_pre;
> mmrmdir.post_handler
> <https://wiki.bytedance.net/pages/mmrmdir.post_handler> = handler_post;
> mmrmdir.fault_handler
> <https://wiki.bytedance.net/pages/mmrmdir.fault_handler> = handler_fault;
>
> atomic_set(&alloc,0);
> atomic_set(&realse,0);
> atomic_set(&cmkdir,0);
> atomic_set(&crmdir,0);
>
> ret = register_kprobe(&mmalloc);
> if (ret < 0) {
> printk(KERN_INFO "register_kprobe failed, returned %d\n", ret);
> return ret;
> }
> ret = register_kprobe(&mmrealse);
> if (ret < 0) {
> printk(KERN_INFO "register_kprobe failed, returned %d\n", ret);
> return ret;
> }
> ret = register_kprobe(&mmmkdir);
> if (ret < 0) {
> printk(KERN_INFO "register_kprobe failed, returned %d\n", ret);
> return ret;
> }
> ret = register_kprobe(&mmrmdir);
> if (ret < 0) {
> printk(KERN_INFO "register_kprobe failed, returned %d\n", ret);
> return ret;
> }
> printk(KERN_INFO "Planted kprobe at %p\n", mmalloc.addr);
> <https://wiki.bytedance.net/pages/mmalloc.addr);>
> printk(KERN_INFO "Planted kprobe at %p\n", mmrealse.addr);
> <https://wiki.bytedance.net/pages/mmrealse.addr);>
> printk(KERN_INFO "Planted kprobe at %p\n", mmmkdir.addr);
> <https://wiki.bytedance.net/pages/mmmkdir.addr);>
> printk(KERN_INFO "Planted kprobe at %p\n", mmrmdir.addr);
> <https://wiki.bytedance.net/pages/mmrmdir.addr);>
> return 0;
> }
>
> static void __exit kprobe_exit(void)
> {
> unregister_kprobe(&mmalloc);
> unregister_kprobe(&mmrealse);
> unregister_kprobe(&mmmkdir);
> unregister_kprobe(&mmrmdir);
> printk(KERN_INFO "kprobe at %p unregistered\n", mmalloc.addr);
> <https://wiki.bytedance.net/pages/mmalloc.addr);>
> printk(KERN_INFO "kprobe at %p unregistered\n", mmrealse.addr);
> <https://wiki.bytedance.net/pages/mmrealse.addr);>
> printk(KERN_INFO "kprobe at %p unregistered\n", mmmkdir.addr);
> <https://wiki.bytedance.net/pages/mmmkdir.addr);>
> printk(KERN_INFO "kprobe at %p unregistered\n", mmrmdir.addr);
> <https://wiki.bytedance.net/pages/mmrmdir.addr);>
> }
>
> module_init(kprobe_init)
> module_exit(kprobe_exit)
> MODULE_LICENSE("GPL");
> jprobe code
> #include <linux/kernel.h <https://wiki.bytedance.net/pages/kernel.h>>
> #include <linux/module.h <https://wiki.bytedance.net/pages/module.h>>
> #include <linux/kprobes.h <https://wiki.bytedance.net/pages/kprobes.h>>
> #include <linux/cgroup-defs.h
> <https://wiki.bytedance.net/pages/cgroup-defs.h>>
>
> static void test(struct cgroup_subsys_state *css){
> printk(KERN_INFO"memcg address %p refcnt %p !\n",css,(void
> *)css->refcnt.percpu_count_ptr);
> <https://wiki.bytedance.net/pages/refcnt.percpu_count_ptr);>
> jprobe_return();
> return;
> }
> static struct jprobe my_jprobe = {
> .entry = test,
> .kp = {
> .symbol_name = "mem_cgroup_css_offline",
> },
> };
>
> static int __init jprobe_init(void)
> {
> int ret;
>
> ret = register_jprobe(&my_jprobe);
> if (ret < 0) {
> printk(KERN_INFO "register_jprobe failed, returned %d\n", ret);
> return -1;
> }
> printk(KERN_INFO "Planted jprobe at %p, handler addr %p\n",
> my_jprobe.kp.addr,
> <https://wiki.bytedance.net/pages/my_jprobe.kp.addr,> my_jprobe.entry);
> <https://wiki.bytedance.net/pages/my_jprobe.entry);>
> return 0;
> }
>
> static void __exit jprobe_exit(void)
> {
> unregister_jprobe(&my_jprobe);
> printk(KERN_INFO "jprobe at %p unregistered\n", my_jprobe.kp.addr);
> <https://wiki.bytedance.net/pages/my_jprobe.kp.addr);>
> }
>
> module_init(jprobe_init)
> module_exit(jprobe_exit)
> MODULE_LICENSE("GPL");
> realse kmode
> #include <linux/module.h <https://wiki.bytedance.net/pages/module.h>>
> #include <linux/cgroup-defs.h
> <https://wiki.bytedance.net/pages/cgroup-defs.h>>
> #include <linux/memcontrol.h <https://wiki.bytedance.net/pages/memcontrol.h>>
> #include <linux/cgroup.h <https://wiki.bytedance.net/pages/cgroup.h>>
>
> int mymsr_init (void)
> {
> struct mem_cgroup *memcg_ptr=(void *)0xffff8c1986ff1000;
> struct cgroup_subsys_state * css_ptr = &memcg_ptr->css;
> css_put(css_ptr);
> return 0;
> }
>
> void mymsr_exit(void)
> {
> }
>
> MODULE_AUTHOR("xuyun.xy <https://wiki.bytedance.net/pages/xuyun.xy>");
> MODULE_LICENSE("GPL");
> module_init(mymsr_init);
> module_exit(mymsr_exit);
> crash information
> crash> struct mem_cgroup 0xffff8c1c43b86400
> struct mem_cgroup {
> css = {
> cgroup = 0xffff8c1c8a879000,
> ss = 0xffffffffac12aa40,
> refcnt = {
> count = {
> counter = 1
> },
> percpu_count_ptr = 67753193126051,
> release = 0xffffffffab112030,
> confirm_switch = 0x0,
> force_atomic = false,
> rcu = {
> next = 0xffff8c1c8a879038,
> func = 0xffffffffab37fe70
> }
> },
> sibling = {
> next = 0xffff8c1c8ab0f448,
> prev = 0xffff8c18584fac48
> },
> children = {
> next = 0xffff8c1c43b86458,
> prev = 0xffff8c1c43b86458
> },
> id = 34535,
> flags = 16,
> serial_nr = 314540,
> online_cnt = {
> counter = 0
> },
> callback_head = {
> next = 0x0,
> func = 0x0
> },
> destroy_work = {
> data = {
> counter = 960
> },
> entry = {
> next = 0xffff8c1c43b86498,
> prev = 0xffff8c1c43b86498
> },
> func = 0xffffffffab1141c0
> },
> parent = 0xffff8c1106f48800
> },
> id = {
> id = 0,
> ref = {
> counter = 0
> }
> },
> memory = {
> count = {
> counter = 1
> },
> limit = 262144,
> parent = 0xffff8c1106f488c0,
> watermark = 8045,
>
>
> crash> struct mem_cgroup 0xffff8c1986ff1000
> struct mem_cgroup {
> css = {
> cgroup = 0xffff8c196f533400,
> ss = 0xffffffffac12aa40,
> refcnt = {
> count = {
> counter = 1
> },
> percpu_count_ptr = 67756691197419,
> release = 0xffffffffab112030,
> confirm_switch = 0x0,
> force_atomic = false,
> rcu = {
> next = 0xffff8c196f533438,
> func = 0xffffffffab37fe70
> }
> },
> sibling = {
> next = 0xffff8c197a9fdc48,
> prev = 0xffff8c196ebbf048
> },
> children = {
> next = 0xffff8c1986ff1058,
> prev = 0xffff8c1986ff1058
> },
> id = 25717,
> flags = 16,
> serial_nr = 201081,
> online_cnt = {
> counter = 0
> },
> callback_head = {
> next = 0x0,
> func = 0x0
> },
> destroy_work = {
> data = {
> counter = 2432
> },
> entry = {
> next = 0xffff8c1986ff1098,
> prev = 0xffff8c1986ff1098
> },
> func = 0xffffffffab1141c0
> },
> parent = 0xffff8c1106f48800
> },
> id = {
> id = 0,
> ref = {
> counter = 0
> }
> },
> memory = {
> count = {
> counter = 1
> },
> limit = 262144,
> parent = 0xffff8c1106f488c0,
> watermark = 6067,
> failcnt = 0
> },
> swap = {
> count = {
> counter = 0
> },
> limit = 2251799813685247,
> parent = 0xffff8c1106f488e8,
> watermark = 0,
> failcnt = 0
> },
> memsw = {
> count = {
> counter = 1
> },
> limit = 2251799813685247,
> parent = 0xffff8c1106f48910,
> watermark = 6067,
> failcnt = 0
> },
> kmem = {
> count = {
> counter = 0
> },
> limit = 2251799813685247,
> parent = 0xffff8c1106f48938,
> watermark = 574,
> failcnt = 0
> },
> tcpmem = {
> count = {
> counter = 0
> },
> limit = 2251799813685247,
> parent = 0xffff8c1106f48960,
> watermark = 0,
> failcnt = 0
> },
> low = 0,
> high = 2251799813685247,
> high_work = {
> data = {
> counter = 68719476704
> },
> entry = {
> next = 0xffff8c1986ff11a0,
> prev = 0xffff8c1986ff11a0
> },
> func = 0xffffffffab217610
> },
> soft_limit = 2251799813685247,
> vmpressure = {
> scanned = 0,
> reclaimed = 0,
> tree_scanned = 0,
> tree_reclaimed = 0,
> sr_lock = {
> {
> rlock = {
> raw_lock = {
> val = {
> counter = 0
> }
> }
> }
> }
> },
> events = {
> next = 0xffff8c1986ff11e8,
> prev = 0xffff8c1986ff11e8
> },
> events_lock = {
> owner = {
> counter = 0
> },
> wait_lock = {
> {
> rlock = {
> raw_lock = {
> val = {
> counter = 0
> }
> }
> }
> }
> },
> osq = {
> tail = {
> counter = 0
> }
> },
> wait_list = {
> next = 0xffff8c1986ff1208,
> prev = 0xffff8c1986ff1208
> }
> },
> work = {
> data = {
> counter = 68719476704
> },
> entry = {
> next = 0xffff8c1986ff1220,
> prev = 0xffff8c1986ff1220
> },
> func = 0xffffffffab21e610
> }
> },
> use_hierarchy = true,
> oom_lock = false,
> under_oom = 0,
> swappiness = 0,
> oom_kill_disable = 0,
> events_file = {
> kn = 0x0
> },
> thresholds_lock = {
> owner = {
> counter = 0
> },
> wait_lock = {
> {
> rlock = {
> raw_lock = {
> val = {
> counter = 0
> }
> }
> }
> }
> },
> osq = {
> tail = {
> counter = 0
> }
> },
> wait_list = {
> next = 0xffff8c1986ff1260,
> prev = 0xffff8c1986ff1260
> }
> },
> thresholds = {
> primary = 0x0,
> spare = 0x0
> },
> memsw_thresholds = {
> primary = 0x0,
> spare = 0x0
> },
> oom_notify = {
> next = 0xffff8c1986ff1290,
> prev = 0xffff8c1986ff1290
> },
> move_charge_at_immigrate = 0,
> moving_account = {
> counter = 0
> },
> move_lock = {
> {
> rlock = {
> raw_lock = {
> val = {
> counter = 0
> }
> }
> }
> }
> },
> move_lock_task = 0x0,
> move_lock_flags = 0,
> stat = 0x3d9fd3426ea0,
> socket_pressure = 4333518749,
> tcpmem_active = false,
> tcpmem_pressure = 0,
> kmemcg_id = 8,
> kmem_state = KMEM_ALLOCATED,
> kmem_caches = {
> next = 0xffff8c1976f7cba0,
> prev = 0xffff8c0dac0401a0
> },
> last_scanned_node = 64,
> scan_nodes = {
> bits = {0}
> },
> numainfo_events = {
> counter = 53
> },
> numainfo_updating = {
> counter = 0
> },
> cgwb_list = {
> next = 0x0,
> prev = 0xffff8c1986ff1308
> },
> cgwb_domain = {
> lock = {
> {
> rlock = {
> raw_lock = {
> val = {
> counter = 0
> }
> }
> }
> }
> },
> completions = {
> events = {
> lock = {
> raw_lock = {
> val = {
> counter = 0
> }
> }
> },
> count = 1,
> list = {
> next = 0xffff8c196ebbf330,
> prev = 0xffff8c197a9fdf30
> },
> counters = 0x3d9fd5814ce8
> },
> period = 0,
> sequence = {
> sequence = 0
> }
> },
> period_timer = {
> entry = {
> next = 0x0,
> pprev = 0x0
> },
> expires = 0,
> function = 0xffffffffab1a3a10,
> data = 18446616639999775512,
> flags = 524326
> },
> period_time = 0,
> dirty_limit_tstamp = 4333518749,
> dirty_limit = 0
> },
> event_list = {
> next = 0xffff8c1986ff1398,
> prev = 0xffff8c1986ff1398
> },
> event_list_lock = {
> {
> rlock = {
> raw_lock = {
> val = {
> counter = 0
> }
> }
> }
> }
> },
> nodeinfo = 0xffff8c1986ff13b0
> }
>
>
> bytedance.net <http://bytedance.net/>
> 段熊春
> duanxiongc...@bytedance.com