4.4-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Jaegeuk Kim <jaeg...@kernel.org>

commit a51311938e14c17f5a94d30baac9d7bec71f5858 upstream.

There was a subtle bug on nat cache management which incurs wrong nid allocation
or wrong block addresses when try_to_free_nats is triggered heavily.
This patch enlarges the previous coverage of nat_tree_lock to avoid data race.

Signed-off-by: Jaegeuk Kim <jaeg...@kernel.org>
Signed-off-by: Ben Hutchings <ben.hutchi...@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>
---
 fs/f2fs/node.c |   29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -261,13 +261,11 @@ static void cache_nat_entry(struct f2fs_
 {
        struct nat_entry *e;
 
-       down_write(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
        if (!e) {
                e = grab_nat_entry(nm_i, nid);
                node_info_from_raw_nat(&e->ni, ne);
        }
-       up_write(&nm_i->nat_tree_lock);
 }
 
 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -379,6 +377,8 @@ void get_node_info(struct f2fs_sb_info *
 
        memset(&ne, 0, sizeof(struct f2fs_nat_entry));
 
+       down_write(&nm_i->nat_tree_lock);
+
        /* Check current segment summary */
        mutex_lock(&curseg->curseg_mutex);
        i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
@@ -399,6 +399,7 @@ void get_node_info(struct f2fs_sb_info *
 cache:
        /* cache nat entry */
        cache_nat_entry(NM_I(sbi), nid, &ne);
+       up_write(&nm_i->nat_tree_lock);
 }
 
 /*
@@ -1440,13 +1441,10 @@ static int add_free_nid(struct f2fs_sb_i
 
        if (build) {
                /* do not add allocated nids */
-               down_read(&nm_i->nat_tree_lock);
                ne = __lookup_nat_cache(nm_i, nid);
-               if (ne &&
-                       (!get_nat_flag(ne, IS_CHECKPOINTED) ||
+               if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
                                nat_get_blkaddr(ne) != NULL_ADDR))
                        allocated = true;
-               up_read(&nm_i->nat_tree_lock);
                if (allocated)
                        return 0;
        }
@@ -1532,6 +1530,8 @@ static void build_free_nids(struct f2fs_
        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
                                                        META_NAT, true);
 
+       down_read(&nm_i->nat_tree_lock);
+
        while (1) {
                struct page *page = get_current_nat_page(sbi, nid);
 
@@ -1560,6 +1560,7 @@ static void build_free_nids(struct f2fs_
                        remove_free_nid(nm_i, nid);
        }
        mutex_unlock(&curseg->curseg_mutex);
+       up_read(&nm_i->nat_tree_lock);
 
        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
                                        nm_i->ra_nid_pages, META_NAT, false);
@@ -1842,14 +1843,12 @@ static void remove_nats_in_journal(struc
 
                raw_ne = nat_in_journal(sum, i);
 
-               down_write(&nm_i->nat_tree_lock);
                ne = __lookup_nat_cache(nm_i, nid);
                if (!ne) {
                        ne = grab_nat_entry(nm_i, nid);
                        node_info_from_raw_nat(&ne->ni, &raw_ne);
                }
                __set_nat_cache_dirty(nm_i, ne);
-               up_write(&nm_i->nat_tree_lock);
        }
        update_nats_in_cursum(sum, -i);
        mutex_unlock(&curseg->curseg_mutex);
@@ -1883,7 +1882,6 @@ static void __flush_nat_entry_set(struct
        struct f2fs_nat_block *nat_blk;
        struct nat_entry *ne, *cur;
        struct page *page = NULL;
-       struct f2fs_nm_info *nm_i = NM_I(sbi);
 
        /*
         * there are two steps to flush nat entries:
@@ -1920,12 +1918,8 @@ static void __flush_nat_entry_set(struct
                        raw_ne = &nat_blk->entries[nid - start_nid];
                }
                raw_nat_from_node_info(raw_ne, &ne->ni);
-
-               down_write(&NM_I(sbi)->nat_tree_lock);
                nat_reset_flag(ne);
                __clear_nat_cache_dirty(NM_I(sbi), ne);
-               up_write(&NM_I(sbi)->nat_tree_lock);
-
                if (nat_get_blkaddr(ne) == NULL_ADDR)
                        add_free_nid(sbi, nid, false);
        }
@@ -1937,9 +1931,7 @@ static void __flush_nat_entry_set(struct
 
        f2fs_bug_on(sbi, set->entry_cnt);
 
-       down_write(&nm_i->nat_tree_lock);
        radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
-       up_write(&nm_i->nat_tree_lock);
        kmem_cache_free(nat_entry_set_slab, set);
 }
 
@@ -1959,6 +1951,9 @@ void flush_nat_entries(struct f2fs_sb_in
 
        if (!nm_i->dirty_nat_cnt)
                return;
+
+       down_write(&nm_i->nat_tree_lock);
+
        /*
         * if there are no enough space in journal to store dirty nat
         * entries, remove all entries from journal and merge them
@@ -1967,7 +1962,6 @@ void flush_nat_entries(struct f2fs_sb_in
        if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
                remove_nats_in_journal(sbi);
 
-       down_write(&nm_i->nat_tree_lock);
        while ((found = __gang_lookup_nat_set(nm_i,
                                        set_idx, SETVEC_SIZE, setvec))) {
                unsigned idx;
@@ -1976,12 +1970,13 @@ void flush_nat_entries(struct f2fs_sb_in
                        __adjust_nat_entry_set(setvec[idx], &sets,
                                                        MAX_NAT_JENTRIES(sum));
        }
-       up_write(&nm_i->nat_tree_lock);
 
        /* flush dirty nats in nat entry set */
        list_for_each_entry_safe(set, tmp, &sets, set_list)
                __flush_nat_entry_set(sbi, set);
 
+       up_write(&nm_i->nat_tree_lock);
+
        f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
 


Reply via email to