On 04/27, Chao Yu wrote: > On 2018/4/27 10:04, Chao Yu wrote: > > On 2018/4/27 0:03, Jaegeuk Kim wrote: > >> On 04/24, Chao Yu wrote: > >>> Thread A Thread B Thread C > >>> - f2fs_remount > >>> - stop_gc_thread > >>> - f2fs_sbi_store > >>> - issue_discard_thread > >>> sbi->gc_thread = NULL; > >>> sbi->gc_thread->gc_wake = 1 > >>> access > >>> sbi->gc_thread->gc_urgent > >>> > >>> Previously, we allocate memory for sbi->gc_thread based on background > >>> gc thread mount option, the memory can be released if we turn off > >>> that mount option, but still there are several places access gc_thread > >>> pointer without considering race condition, result in NULL point > >>> dereference. > >> > >> Do we just need to check &sb->s_umount in f2fs_sbi_store() and > > > > Better, > > > >> issue_discard_thread? > > > > There is more cases can be called from different scenarios: > > - select_policy() > > - need_SSR() > > BTW, do you agree with introducing {init,destroy}_gc_context as the patch > does?
I don't think so. The issue only requires mutex() in sysfs/discard_thread. > > Thanks, > > > > > Thanks, > > > >> > >>> > >>> In order to fix this issue, keep gc_thread structure valid in sbi all > >>> the time instead of alloc/free it dynamically. > >>> > >>> In addition, add a rw semaphore to exclude rw operation in fields of > >>> gc_thread. > >>> > >>> Signed-off-by: Chao Yu <yuch...@huawei.com> > >>> --- > >>> v2: > >>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk. > >>> fs/f2fs/debug.c | 3 +-- > >>> fs/f2fs/f2fs.h | 9 ++++++- > >>> fs/f2fs/gc.c | 78 > >>> ++++++++++++++++++++++++++++++++++++------------------- > >>> fs/f2fs/gc.h | 1 + > >>> fs/f2fs/segment.c | 10 +++++-- > >>> fs/f2fs/super.c | 26 +++++++++++++------ > >>> fs/f2fs/sysfs.c | 26 ++++++++++++++----- > >>> 7 files changed, 107 insertions(+), 46 deletions(-) > >>> > >>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c > >>> index a66107b5cfff..0fbd674c66fb 100644 > >>> --- a/fs/f2fs/debug.c > >>> +++ b/fs/f2fs/debug.c > >>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) > >>> si->cache_mem = 0; > >>> > >>> /* build gc */ > >>> - if (sbi->gc_thread) > >>> - si->cache_mem += sizeof(struct f2fs_gc_kthread); > >>> + si->cache_mem += sizeof(struct f2fs_gc_kthread); > >>> > >>> /* build merge flush thread */ > >>> if (SM_I(sbi)->fcc_info) > >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > >>> index 8f3ad9662d13..75d3b4875429 100644 > >>> --- a/fs/f2fs/f2fs.h > >>> +++ b/fs/f2fs/f2fs.h > >>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct > >>> f2fs_sb_info *sbi) > >>> return (struct sit_info *)(SM_I(sbi)->sit_info); > >>> } > >>> > >>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi) > >>> +{ > >>> + return (struct f2fs_gc_kthread *)(sbi->gc_thread); > >>> +} > >>> + > >>> static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi) > >>> { > >>> return (struct free_segmap_info *)(SM_I(sbi)->free_info); > >>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t > >>> pos, size_t len); > >>> /* > >>> * gc.c > >>> */ > >>> +int init_gc_context(struct f2fs_sb_info *sbi); > >>> +void destroy_gc_context(struct f2fs_sb_info * sbi); > >>> int start_gc_thread(struct f2fs_sb_info *sbi); > >>> -void stop_gc_thread(struct f2fs_sb_info *sbi); > >>> +bool stop_gc_thread(struct f2fs_sb_info *sbi); > >>> block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode); > >>> int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, > >>> unsigned int segno); > >>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c > >>> index 70418b34c5f6..2febb84b2fd6 100644 > >>> --- a/fs/f2fs/gc.c > >>> +++ b/fs/f2fs/gc.c > >>> @@ -26,8 +26,8 @@ > >>> static int gc_thread_func(void *data) > >>> { > >>> struct f2fs_sb_info *sbi = data; > >>> - struct f2fs_gc_kthread *gc_th = sbi->gc_thread; > >>> - wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; > >>> + struct f2fs_gc_kthread *gc_th = GC_I(sbi); > >>> + wait_queue_head_t *wq = &gc_th->gc_wait_queue_head; > >>> unsigned int wait_ms; > >>> > >>> wait_ms = gc_th->min_sleep_time; > >>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data) > >>> return 0; > >>> } > >>> > >>> -int start_gc_thread(struct f2fs_sb_info *sbi) > >>> +int init_gc_context(struct f2fs_sb_info *sbi) > >>> { > >>> struct f2fs_gc_kthread *gc_th; > >>> - dev_t dev = sbi->sb->s_bdev->bd_dev; > >>> - int err = 0; > >>> > >>> gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL); > >>> - if (!gc_th) { > >>> - err = -ENOMEM; > >>> - goto out; > >>> - } > >>> + if (!gc_th) > >>> + return -ENOMEM; > >>> + > >>> + gc_th->f2fs_gc_task = NULL; > >>> > >>> gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; > >>> gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; > >>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi) > >>> gc_th->gc_urgent = 0; > >>> gc_th->gc_wake= 0; > >>> > >>> + init_rwsem(&gc_th->gc_rwsem); > >>> + > >>> sbi->gc_thread = gc_th; > >>> - init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); > >>> - sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, > >>> + > >>> + return 0; > >>> +} > >>> + > >>> +void destroy_gc_context(struct f2fs_sb_info *sbi) > >>> +{ > >>> + kfree(GC_I(sbi)); > >>> + sbi->gc_thread = NULL; > >>> +} > >>> + > >>> +int start_gc_thread(struct f2fs_sb_info *sbi) > >>> +{ > >>> + struct f2fs_gc_kthread *gc_th = GC_I(sbi); > >>> + dev_t dev = sbi->sb->s_bdev->bd_dev; > >>> + int err = 0; > >>> + > >>> + init_waitqueue_head(&gc_th->gc_wait_queue_head); > >>> + gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi, > >>> "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); > >>> if (IS_ERR(gc_th->f2fs_gc_task)) { > >>> err = PTR_ERR(gc_th->f2fs_gc_task); > >>> - kfree(gc_th); > >>> - sbi->gc_thread = NULL; > >>> + gc_th->f2fs_gc_task = NULL; > >>> } > >>> -out: > >>> + > >>> return err; > >>> } > >>> > >>> -void stop_gc_thread(struct f2fs_sb_info *sbi) > >>> +bool stop_gc_thread(struct f2fs_sb_info *sbi) > >>> { > >>> - struct f2fs_gc_kthread *gc_th = sbi->gc_thread; > >>> - if (!gc_th) > >>> - return; > >>> - kthread_stop(gc_th->f2fs_gc_task); > >>> - kfree(gc_th); > >>> - sbi->gc_thread = NULL; > >>> + struct f2fs_gc_kthread *gc_th = GC_I(sbi); > >>> + bool stopped = false; > >>> + > >>> + down_write(&gc_th->gc_rwsem); > >>> + if (gc_th->f2fs_gc_task) { > >>> + kthread_stop(gc_th->f2fs_gc_task); > >>> + gc_th->f2fs_gc_task = NULL; > >>> + stopped = true; > >>> + } > >>> + up_write(&gc_th->gc_rwsem); > >>> + > >>> + return stopped; > >>> } > >>> > >>> static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) > >>> { > >>> int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; > >>> > >>> - if (!gc_th) > >>> - return gc_mode; > >>> + down_read(&gc_th->gc_rwsem); > >>> + if (!gc_th->f2fs_gc_task) > >>> + goto out; > >>> > >>> if (gc_th->gc_idle) { > >>> if (gc_th->gc_idle == 1) > >>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread > >>> *gc_th, int gc_type) > >>> } > >>> if (gc_th->gc_urgent) > >>> gc_mode = GC_GREEDY; > >>> +out: > >>> + up_read(&gc_th->gc_rwsem); > >>> return gc_mode; > >>> } > >>> > >>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, > >>> int gc_type, > >>> p->max_search = dirty_i->nr_dirty[type]; > >>> p->ofs_unit = 1; > >>> } else { > >>> - p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); > >>> + p->gc_mode = select_gc_type(GC_I(sbi), gc_type); > >>> p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; > >>> p->max_search = dirty_i->nr_dirty[DIRTY]; > >>> p->ofs_unit = sbi->segs_per_sec; > >>> } > >>> > >>> /* we need to check every dirty segments in the FG_GC case */ > >>> - if (gc_type != FG_GC && > >>> - (sbi->gc_thread && !sbi->gc_thread->gc_urgent) && > >>> + down_read(&GC_I(sbi)->gc_rwsem); > >>> + if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task && > >>> + !GC_I(sbi)->gc_urgent && > >>> p->max_search > sbi->max_victim_search) > >>> p->max_search = sbi->max_victim_search; > >>> + up_read(&GC_I(sbi)->gc_rwsem); > >>> > >>> /* let's select beginning hot/small space first in no_heap mode*/ > >>> if (test_opt(sbi, NOHEAP) && > >>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h > >>> index b0045d4c8d1e..9a5b273328c2 100644 > >>> --- a/fs/f2fs/gc.h > >>> +++ b/fs/f2fs/gc.h > >>> @@ -26,6 +26,7 @@ > >>> #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ > >>> > >>> struct f2fs_gc_kthread { > >>> + struct rw_semaphore gc_rwsem; /* semaphore for f2fs_gc_task */ > >>> struct task_struct *f2fs_gc_task; > >>> wait_queue_head_t gc_wait_queue_head; > >>> > >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > >>> index 5960768d26df..f787eea1b2f6 100644 > >>> --- a/fs/f2fs/segment.c > >>> +++ b/fs/f2fs/segment.c > >>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi) > >>> > >>> if (test_opt(sbi, LFS)) > >>> return false; > >>> - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) > >>> + down_read(&GC_I(sbi)->gc_rwsem); > >>> + if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) { > >>> + down_read(&GC_I(sbi)->gc_rwsem); > >>> return true; > >>> + } > >>> + up_read(&GC_I(sbi)->gc_rwsem); > >>> > >>> return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + > >>> SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); > >>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data) > >>> if (dcc->discard_wake) > >>> dcc->discard_wake = 0; > >>> > >>> - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) > >>> + down_read(&GC_I(sbi)->gc_rwsem); > >>> + if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) > >>> init_discard_policy(&dpolicy, DPOLICY_FORCE, 1); > >>> + up_read(&GC_I(sbi)->gc_rwsem); > >>> > >>> sb_start_intwrite(sbi->sb); > >>> > >>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > >>> index dedb8e50b440..199f29dce86d 100644 > >>> --- a/fs/f2fs/super.c > >>> +++ b/fs/f2fs/super.c > >>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb) > >>> kfree(sbi->raw_super); > >>> > >>> destroy_device_list(sbi); > >>> + destroy_gc_context(sbi); > >>> mempool_destroy(sbi->write_io_dummy); > >>> #ifdef CONFIG_QUOTA > >>> for (i = 0; i < MAXQUOTAS; i++) > >>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, > >>> int *flags, char *data) > >>> * option. Also sync the filesystem. > >>> */ > >>> if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) { > >>> - if (sbi->gc_thread) { > >>> - stop_gc_thread(sbi); > >>> - need_restart_gc = true; > >>> + need_restart_gc = stop_gc_thread(sbi); > >>> + } else { > >>> + down_write(&GC_I(sbi)->gc_rwsem); > >>> + if (!GC_I(sbi)->f2fs_gc_task) { > >>> + err = start_gc_thread(sbi); > >>> + if (err) { > >>> + up_write(&GC_I(sbi)->gc_rwsem); > >>> + goto restore_opts; > >>> + } > >>> + need_stop_gc = true; > >>> } > >>> - } else if (!sbi->gc_thread) { > >>> - err = start_gc_thread(sbi); > >>> - if (err) > >>> - goto restore_opts; > >>> - need_stop_gc = true; > >>> + up_write(&GC_I(sbi)->gc_rwsem); > >>> } > >>> > >>> if (*flags & SB_RDONLY || > >>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, > >>> void *data, int silent) > >>> goto free_meta_inode; > >>> } > >>> > >>> + err = init_gc_context(sbi); > >>> + if (err) > >>> + goto free_checkpoint; > >>> + > >>> /* Initialize device list */ > >>> err = f2fs_scan_devices(sbi); > >>> if (err) { > >>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, > >>> void *data, int silent) > >>> destroy_segment_manager(sbi); > >>> free_devices: > >>> destroy_device_list(sbi); > >>> + destroy_gc_context(sbi); > >>> +free_checkpoint: > >>> kfree(sbi->ckpt); > >>> free_meta_inode: > >>> make_bad_inode(sbi->meta_inode); > >>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c > >>> index 2c53de9251be..b8d09935e43f 100644 > >>> --- a/fs/f2fs/sysfs.c > >>> +++ b/fs/f2fs/sysfs.c > >>> @@ -46,7 +46,7 @@ struct f2fs_attr { > >>> static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int > >>> struct_type) > >>> { > >>> if (struct_type == GC_THREAD) > >>> - return (unsigned char *)sbi->gc_thread; > >>> + return (unsigned char *)GC_I(sbi); > >>> else if (struct_type == SM_INFO) > >>> return (unsigned char *)SM_I(sbi); > >>> else if (struct_type == DCC_INFO) > >>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, > >>> if (!strcmp(a->attr.name, "trim_sections")) > >>> return -EINVAL; > >>> > >>> - *ui = t; > >>> - > >>> - if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) > >>> + if (!strcmp(a->attr.name, "iostat_enable") && t == 0) > >>> f2fs_reset_iostat(sbi); > >>> - if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) { > >>> - sbi->gc_thread->gc_wake = 1; > >>> - wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); > >>> + > >>> + if (a->struct_type == GC_THREAD) { > >>> + down_write(&GC_I(sbi)->gc_rwsem); > >>> + if (!GC_I(sbi)->f2fs_gc_task) { > >>> + up_write(&GC_I(sbi)->gc_rwsem); > >>> + return -EINVAL; > >>> + } > >>> + } > >>> + > >>> + if (!strcmp(a->attr.name, "gc_urgent") && t == 1) { > >>> + GC_I(sbi)->gc_wake = 1; > >>> + wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head); > >>> wake_up_discard_thread(sbi, true); > >>> } > >>> > >>> + *ui = t; > >>> + > >>> + if (a->struct_type == GC_THREAD) > >>> + up_write(&GC_I(sbi)->gc_rwsem); > >>> + > >>> return count; > >>> } > >>> > >>> -- > >>> 2.15.0.55.gc2ece9dc4de6 > >> > >> . > >> > > > > > > . > >