Once a pcluster is fully decompressed and there are no attached cached
folios, its corresponding `struct z_erofs_pcluster` will be freed. This
will significantly reduce the frequency of calls to erofs_shrink_scan()
and the memory allocated for `struct z_erofs_pcluster`.

The tables below show approximately a 96% reduction in the calls to
erofs_shrink_scan() and in the memory allocated for `struct
z_erofs_pcluster` after applying this patch. The results were obtained
by performing a test to copy a 4.1GB partition on ARM64 Android devices
running the 6.6 kernel with an 8-core CPU and 12GB of memory.

1. The reduction in calls to erofs_shrink_scan():
+-----------------+-----------+----------+---------+
|                 | w/o patch | w/ patch |  diff   |
+-----------------+-----------+----------+---------+
| Average (times) |   11390   |   390    | -96.57% |
+-----------------+-----------+----------+---------+

2. The reduction in memory released by erofs_shrink_scan():
+-----------------+-----------+----------+---------+
|                 | w/o patch | w/ patch |  diff   |
+-----------------+-----------+----------+---------+
| Average (Byte)  | 133612656 | 4434552  | -96.68% |
+-----------------+-----------+----------+---------+

Signed-off-by: Chunhai Guo <guochun...@vivo.com>
---
v3 -> v4:
 - modify the patch as Gao Xiang suggested in v3.

v2 -> v3:
 - rename erofs_prepare_to_release_pcluster() to 
__erofs_try_to_release_pcluster()
 - use trylock in z_erofs_put_pcluster() instead of 
erofs_try_to_release_pcluster()

v1: 
https://lore.kernel.org/linux-erofs/588351c0-93f9-4a04-a923-15aae8b71...@linux.alibaba.com/
change since v1:
 - rebase this patch on "sunset z_erofs_workgroup` series
 - remove check on pcl->partial and get rid of `be->try_free`
 - update test results base on 6.6 kernel 
---
 fs/erofs/zdata.c | 54 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 6b73a2307460..d2338bd99811 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -885,14 +885,11 @@ static void z_erofs_rcu_callback(struct rcu_head *head)
                        struct z_erofs_pcluster, rcu));
 }
 
-static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
+static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
                                          struct z_erofs_pcluster *pcl)
 {
-       int free = false;
-
-       spin_lock(&pcl->lockref.lock);
        if (pcl->lockref.count)
-               goto out;
+               return false;
 
        /*
         * Note that all cached folios should be detached before deleted from
@@ -900,7 +897,7 @@ static bool erofs_try_to_release_pcluster(struct 
erofs_sb_info *sbi,
         * orphan old pcluster when the new one is available in the tree.
         */
        if (erofs_try_to_free_all_cached_folios(sbi, pcl))
-               goto out;
+               return false;
 
        /*
         * It's impossible to fail after the pcluster is freezed, but in order
@@ -909,8 +906,16 @@ static bool erofs_try_to_release_pcluster(struct 
erofs_sb_info *sbi,
        DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->index) != pcl);
 
        lockref_mark_dead(&pcl->lockref);
-       free = true;
-out:
+       return true;
+}
+
+static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
+                                         struct z_erofs_pcluster *pcl)
+{
+       bool free;
+
+       spin_lock(&pcl->lockref.lock);
+       free = __erofs_try_to_release_pcluster(sbi, pcl);
        spin_unlock(&pcl->lockref.lock);
        if (free) {
                atomic_long_dec(&erofs_global_shrink_cnt);
@@ -942,16 +947,25 @@ unsigned long z_erofs_shrink_scan(struct erofs_sb_info 
*sbi,
        return freed;
 }
 
-static void z_erofs_put_pcluster(struct z_erofs_pcluster *pcl)
+static void z_erofs_put_pcluster(struct erofs_sb_info *sbi,
+               struct z_erofs_pcluster *pcl, bool try_free)
 {
+       bool free = false;
+
        if (lockref_put_or_lock(&pcl->lockref))
                return;
 
        DBG_BUGON(__lockref_is_dead(&pcl->lockref));
-       if (pcl->lockref.count == 1)
-               atomic_long_inc(&erofs_global_shrink_cnt);
-       --pcl->lockref.count;
+       if (--pcl->lockref.count == 0) {
+               if (try_free && xa_trylock(&sbi->managed_pslots)) {
+                       free = __erofs_try_to_release_pcluster(sbi, pcl);
+                       xa_unlock(&sbi->managed_pslots);
+               }
+               atomic_long_add(!free, &erofs_global_shrink_cnt);
+       }
        spin_unlock(&pcl->lockref.lock);
+       if (free)
+               call_rcu(&pcl->rcu, z_erofs_rcu_callback);
 }
 
 static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
@@ -972,7 +986,7 @@ static void z_erofs_pcluster_end(struct 
z_erofs_decompress_frontend *fe)
         * any longer if the pcluster isn't hosted by ourselves.
         */
        if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
-               z_erofs_put_pcluster(pcl);
+               z_erofs_put_pcluster(EROFS_I_SB(fe->inode), pcl, false);
 
        fe->pcl = NULL;
 }
@@ -1274,6 +1288,7 @@ static int z_erofs_decompress_pcluster(struct 
z_erofs_decompress_backend *be,
        int i, j, jtop, err2;
        struct page *page;
        bool overlapped;
+       bool try_free = true;
 
        mutex_lock(&pcl->lock);
        be->nr_pages = PAGE_ALIGN(pcl->length + pcl->pageofs_out) >> PAGE_SHIFT;
@@ -1332,8 +1347,10 @@ static int z_erofs_decompress_pcluster(struct 
z_erofs_decompress_backend *be,
                for (i = 0; i < pclusterpages; ++i) {
                        page = be->compressed_pages[i];
                        if (!page ||
-                           erofs_folio_is_managed(sbi, page_folio(page)))
+                           erofs_folio_is_managed(sbi, page_folio(page))) {
+                               try_free = false;
                                continue;
+                       }
                        (void)z_erofs_put_shortlivedpage(be->pagepool, page);
                        WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
                }
@@ -1379,6 +1396,11 @@ static int z_erofs_decompress_pcluster(struct 
z_erofs_decompress_backend *be,
        /* pcluster lock MUST be taken before the following line */
        WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
        mutex_unlock(&pcl->lock);
+
+       if (z_erofs_is_inline_pcluster(pcl))
+               z_erofs_free_pcluster(pcl);
+       else
+               z_erofs_put_pcluster(sbi, pcl, try_free);
        return err;
 }
 
@@ -1401,10 +1423,6 @@ static int z_erofs_decompress_queue(const struct 
z_erofs_decompressqueue *io,
                owned = READ_ONCE(be.pcl->next);
 
                err = z_erofs_decompress_pcluster(&be, err) ?: err;
-               if (z_erofs_is_inline_pcluster(be.pcl))
-                       z_erofs_free_pcluster(be.pcl);
-               else
-                       z_erofs_put_pcluster(be.pcl);
        }
        return err;
 }
-- 
2.34.1

Reply via email to