Hi Jaegeuk, On 2017/10/4 4:16, Jaegeuk Kim wrote: > On 09/19, Chao Yu wrote: >> From: Chao Yu <yuch...@huawei.com> >> >> Fstrim intends to trim invalid blocks of filesystem only with specified >> range and granularity, but actually, it will issue all previous cached >> discard commands which may be out-of-range and be with unmatched >> granularity, it's unneeded. >> >> In order to fix above issues, this patch introduces new helps to support >> to issue and wait discard in range and adds a new fstrim_list for tracking >> in-flight discard from ->fstrim. >> >> Signed-off-by: Chao Yu <yuch...@huawei.com> >> --- >> fs/f2fs/f2fs.h | 1 + >> fs/f2fs/segment.c | 125 >> +++++++++++++++++++++++++++++++++++++++++++++--------- >> 2 files changed, 106 insertions(+), 20 deletions(-) >> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >> index 89b4927dcd79..cb13c7df6971 100644 >> --- a/fs/f2fs/f2fs.h >> +++ b/fs/f2fs/f2fs.h >> @@ -249,6 +249,7 @@ struct discard_cmd_control { >> struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ >> unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */ >> struct list_head wait_list; /* store on-flushing entries */ >> + struct list_head fstrim_list; /* in-flight discard from >> fstrim */ >> wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ >> unsigned int discard_wake; /* to wake up discard thread */ >> struct mutex cmd_lock; >> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >> index dedf0209d820..088936c61905 100644 >> --- a/fs/f2fs/segment.c >> +++ b/fs/f2fs/segment.c >> @@ -845,9 +845,11 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi, >> >> /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ >> static void __submit_discard_cmd(struct f2fs_sb_info *sbi, >> - struct discard_cmd *dc) >> + struct discard_cmd *dc, bool fstrim) >> { >> struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; >> + struct list_head *wait_list = fstrim ? &(dcc->fstrim_list) : >> + &(dcc->wait_list); >> struct bio *bio = NULL; >> >> if (dc->state != D_PREP) >> @@ -869,7 +871,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info >> *sbi, >> bio->bi_end_io = f2fs_submit_discard_endio; >> bio->bi_opf |= REQ_SYNC; >> submit_bio(bio); >> - list_move_tail(&dc->list, &dcc->wait_list); >> + list_move_tail(&dc->list, wait_list); >> __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); >> >> f2fs_update_iostat(sbi, FS_DISCARD, 1); >> @@ -1054,6 +1056,68 @@ static int __queue_discard_cmd(struct f2fs_sb_info >> *sbi, >> return 0; >> } >> >> +static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, >> + unsigned int start, unsigned int end, >> + unsigned int granularity) >> +{ >> + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; >> + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; >> + struct rb_node **insert_p = NULL, *insert_parent = NULL; >> + struct discard_cmd *dc; >> + struct blk_plug plug; >> + int issued; >> + >> +next: >> + issued = 0; >> + >> + mutex_lock(&dcc->cmd_lock); >> + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); >> + >> + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, >> + NULL, start, >> + (struct rb_entry **)&prev_dc, >> + (struct rb_entry **)&next_dc, >> + &insert_p, &insert_parent, true); >> + if (!dc) >> + dc = next_dc; >> + >> + blk_start_plug(&plug); >> + >> + while (dc && dc->lstart <= end) { >> + struct rb_node *node; >> + >> + if (dc->len < granularity) >> + goto skip; >> + >> + if (dc->state != D_PREP) { >> + list_move_tail(&dc->list, &dcc->fstrim_list); >> + goto skip; >> + } >> + >> + __submit_discard_cmd(sbi, dc, true); >> + >> + if (++issued >= DISCARD_ISSUE_RATE) { >> + start = dc->lstart + dc->len; >> + >> + blk_finish_plug(&plug); >> + mutex_unlock(&dcc->cmd_lock); >> + >> + schedule(); >> + >> + goto next; >> + } >> +skip: >> + node = rb_next(&dc->rb_node); >> + dc = rb_entry_safe(node, struct discard_cmd, rb_node); >> + >> + if (fatal_signal_pending(current)) >> + break; >> + } >> + >> + blk_finish_plug(&plug); >> + mutex_unlock(&dcc->cmd_lock); >> +} >> + >> static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) >> { >> struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; >> @@ -1076,22 +1140,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info >> *sbi, bool issue_cond) >> >> /* Hurry up to finish fstrim */ >> if (dcc->pend_list_tag[i] & P_TRIM) { >> - __submit_discard_cmd(sbi, dc); >> + __submit_discard_cmd(sbi, dc, false); >> issued++; >> - >> - if (fatal_signal_pending(current)) >> - break; >> continue; >> } >> >> if (!issue_cond) { >> - __submit_discard_cmd(sbi, dc); >> + __submit_discard_cmd(sbi, dc, false); >> issued++; >> continue; >> } >> >> if (is_idle(sbi)) { >> - __submit_discard_cmd(sbi, dc); >> + __submit_discard_cmd(sbi, dc, false); >> issued++; >> } else { >> io_interrupted = true; >> @@ -1145,10 +1206,14 @@ static void __wait_one_discard_bio(struct >> f2fs_sb_info *sbi, >> mutex_unlock(&dcc->cmd_lock); >> } >> >> -static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) >> +static void __wait_discard_cmd_range(struct f2fs_sb_info *sbi, bool >> wait_cond, >> + block_t start, block_t end, >> + unsigned int granularity, >> + bool fstrim) >> { >> struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; >> - struct list_head *wait_list = &(dcc->wait_list); >> + struct list_head *wait_list = fstrim ? &(dcc->fstrim_list) : >> + &(dcc->wait_list); >> struct discard_cmd *dc, *tmp; >> bool need_wait; >> >> @@ -1157,6 +1222,10 @@ static void __wait_discard_cmd(struct f2fs_sb_info >> *sbi, bool wait_cond) >> >> mutex_lock(&dcc->cmd_lock); >> list_for_each_entry_safe(dc, tmp, wait_list, list) { >> + if (dc->lstart + dc->len <= start || end <= dc->lstart) >> + continue; >> + if (dc->len < granularity) >> + continue; >> if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { >> wait_for_completion_io(&dc->wait); >> __remove_discard_cmd(sbi, dc); > > So, we need to add this on top of the patch that fixes the bug reported > by Juhyung, right? > > if (fstrim) > need_wait = true;
+ __wait_discard_cmd_range(sbi, false, start_block, end_block, + cpc.trim_minlen, true); We can control wait_cond through the parameter in __wait_discard_cmd_range, let me fix that wrong parameter passed from f2fs_trim_fs. :) Thanks, > > Thanks, > >> @@ -1174,6 +1243,11 @@ static void __wait_discard_cmd(struct f2fs_sb_info >> *sbi, bool wait_cond) >> } >> } >> >> +static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) >> +{ >> + __wait_discard_cmd_range(sbi, wait_cond, 0, UINT_MAX, 1, false); >> +} >> + >> /* This should be covered by global mutex, &sit_i->sentry_lock */ >> void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) >> { >> @@ -1209,12 +1283,12 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) >> } >> } >> >> -/* This comes from f2fs_put_super and f2fs_trim_fs */ >> +/* This comes from f2fs_put_super */ >> void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) >> { >> __issue_discard_cmd(sbi, false); >> __drop_discard_cmd(sbi); >> - __wait_discard_cmd(sbi, false); >> + __wait_all_discard_cmd(sbi, false); >> } >> >> static void mark_discard_range_all(struct f2fs_sb_info *sbi) >> @@ -1258,7 +1332,7 @@ static int issue_discard_thread(void *data) >> >> issued = __issue_discard_cmd(sbi, true); >> if (issued) { >> - __wait_discard_cmd(sbi, true); >> + __wait_all_discard_cmd(sbi, true); >> wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; >> } else { >> wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; >> @@ -1569,6 +1643,7 @@ static int create_discard_cmd_control(struct >> f2fs_sb_info *sbi) >> dcc->pend_list_tag[i] |= P_ACTIVE; >> } >> INIT_LIST_HEAD(&dcc->wait_list); >> + INIT_LIST_HEAD(&dcc->fstrim_list); >> mutex_init(&dcc->cmd_lock); >> atomic_set(&dcc->issued_discard, 0); >> atomic_set(&dcc->issing_discard, 0); >> @@ -2196,7 +2271,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct >> fstrim_range *range) >> { >> __u64 start = F2FS_BYTES_TO_BLK(range->start); >> __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; >> - unsigned int start_segno, end_segno; >> + unsigned int start_segno, end_segno, cur_segno; >> + block_t start_block, end_block; >> struct cp_control cpc; >> int err = 0; >> >> @@ -2217,12 +2293,17 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct >> fstrim_range *range) >> start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); >> end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : >> GET_SEGNO(sbi, end); >> + >> + start_block = START_BLOCK(sbi, start_segno); >> + end_block = START_BLOCK(sbi, end_segno + 1); >> + >> cpc.reason = CP_DISCARD; >> cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); >> >> /* do checkpoint to issue discard commands safely */ >> - for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { >> - cpc.trim_start = start_segno; >> + for (cur_segno = start_segno; cur_segno <= end_segno; >> + cur_segno = cpc.trim_end + 1) { >> + cpc.trim_start = cur_segno; >> >> if (sbi->discard_blks == 0) >> break; >> @@ -2230,7 +2311,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct >> fstrim_range *range) >> cpc.trim_end = end_segno; >> else >> cpc.trim_end = min_t(unsigned int, >> - rounddown(start_segno + >> + rounddown(cur_segno + >> BATCHED_TRIM_SEGMENTS(sbi), >> sbi->segs_per_sec) - 1, end_segno); >> >> @@ -2242,9 +2323,13 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct >> fstrim_range *range) >> >> schedule(); >> } >> - /* It's time to issue all the filed discards */ >> - mark_discard_range_all(sbi); >> - f2fs_wait_discard_bios(sbi); >> + >> + start_block = START_BLOCK(sbi, start_segno); >> + end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1); >> + >> + __issue_discard_cmd_range(sbi, start_block, end_block, cpc.trim_minlen); >> + __wait_discard_cmd_range(sbi, false, start_block, end_block, >> + cpc.trim_minlen, true); >> out: >> range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); >> return err; >> -- >> 2.14.1.145.gb3622a4ee