[PATCH v2] f2fs: handle newly created page when revoking inmem pages

2018-01-10 Thread Daeho Jeong
When committing inmem pages is successful, we revoke already committed
blocks in __revoke_inmem_pages() and finally replace the committed
ones with the old blocks using f2fs_replace_block(). However, if
the committed block was newly created one, the address of the old
block is NEW_ADDR and __f2fs_replace_block() cannot handle NEW_ADDR
as new_blkaddr properly and a kernel panic occurrs.

Signed-off-by: Daeho Jeong 
Tested-by: Shu Tan 
Reviewed-by: Chao Yu 
---
 fs/f2fs/segment.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c117e09..0673d08 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -248,7 +248,11 @@ static int __revoke_inmem_pages(struct inode *inode,
goto next;
}
get_node_info(sbi, dn.nid, &ni);
-   f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
+   if (cur->old_addr == NEW_ADDR) {
+   invalidate_blocks(sbi, dn.data_blkaddr);
+   f2fs_update_data_blkaddr(&dn, NEW_ADDR);
+   } else
+   f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
cur->old_addr, ni.version, true, true);
f2fs_put_dnode(&dn);
}
-- 
1.9.1



[PATCH] f2fs: prevent newly created inode from being dirtied incorrectly

2018-01-10 Thread Daeho Jeong
Now, we invoke f2fs_mark_inode_dirty_sync() to make an inode dirty in
advance of creating a new node page for the inode. By this, some inodes
whose node page is not created yet can be linked into the global dirty
list.

If the checkpoint is executed at this moment, the inode will be written
back by writeback_single_inode() and finally update_inode_page() will
fail to detach the inode from the global dirty list because the inode
doesn't have a node page.

The problem is that the inode's state in VFS layer will become clean
after execution of writeback_single_inode() and it's still linked in
the global dirty list of f2fs and this will cause a kernel panic.

So, we will prevent the newly created inode from being dirtied during
the FI_NEW_INODE flag of the inode is set. We will make it dirty
right after the flag is cleared.

Signed-off-by: Daeho Jeong 
Signed-off-by: Youngjin Gil 
Tested-by: Hobin Woo 
---
 fs/f2fs/f2fs.h  | 1 +
 fs/f2fs/inode.c | 3 +++
 fs/f2fs/namei.c | 4 ++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f4e094e..546c7d6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2116,6 +2116,7 @@ static inline void __mark_inode_dirty_flag(struct inode 
*inode,
case FI_INLINE_XATTR:
case FI_INLINE_DATA:
case FI_INLINE_DENTRY:
+   case FI_NEW_INODE:
if (set)
return;
case FI_DATA_EXIST:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index b4c4f2b..67dfa16 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -22,6 +22,9 @@
 
 void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
 {
+   if (is_inode_flag_set(inode, FI_NEW_INODE))
+   return;
+
if (f2fs_inode_dirtied(inode, sync))
return;
 
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 28bdf88..bedf225 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -74,12 +74,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, 
umode_t mode)
if (err)
goto fail_drop;
 
+   set_inode_flag(inode, FI_NEW_INODE);
+
/* If the directory encrypted, then we should encrypt the inode. */
if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
 
-   set_inode_flag(inode, FI_NEW_INODE);
-
if (f2fs_sb_has_extra_attr(sbi->sb)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
-- 
1.9.1



[PATCH] f2fs: prevent newly created inode from being dirtied incorrectly

2018-01-11 Thread Daeho Jeong
Now, we invoke f2fs_mark_inode_dirty_sync() to make an inode dirty in
advance of creating a new node page for the inode. By this, some inodes
whose node page is not created yet can be linked into the global dirty
list.

If the checkpoint is executed at this moment, the inode will be written
back by writeback_single_inode() and finally update_inode_page() will
fail to detach the inode from the global dirty list because the inode
doesn't have a node page.

The problem is that the inode's state in VFS layer will become clean
after execution of writeback_single_inode() and it's still linked in
the global dirty list of f2fs and this will cause a kernel panic.

So, we will prevent the newly created inode from being dirtied during
the FI_NEW_INODE flag of the inode is set. We will make it dirty
right after the flag is cleared.

Signed-off-by: Daeho Jeong 
Signed-off-by: Youngjin Gil 
Tested-by: Hobin Woo 
Reviewed-by: Chao Yu 
---
 fs/f2fs/f2fs.h  | 1 +
 fs/f2fs/inode.c | 3 +++
 fs/f2fs/namei.c | 4 ++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f4e094e..546c7d6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2116,6 +2116,7 @@ static inline void __mark_inode_dirty_flag(struct inode 
*inode,
case FI_INLINE_XATTR:
case FI_INLINE_DATA:
case FI_INLINE_DENTRY:
+   case FI_NEW_INODE:
if (set)
return;
case FI_DATA_EXIST:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index b4c4f2b..67dfa16 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -22,6 +22,9 @@
 
 void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
 {
+   if (is_inode_flag_set(inode, FI_NEW_INODE))
+   return;
+
if (f2fs_inode_dirtied(inode, sync))
return;
 
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 28bdf88..bedf225 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -74,12 +74,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, 
umode_t mode)
if (err)
goto fail_drop;
 
+   set_inode_flag(inode, FI_NEW_INODE);
+
/* If the directory encrypted, then we should encrypt the inode. */
if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
 
-   set_inode_flag(inode, FI_NEW_INODE);
-
if (f2fs_sb_has_extra_attr(sbi->sb)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
-- 
1.9.1



[PATCH] f2fs: handle newly created page when revoking inmem pages

2018-01-09 Thread Daeho Jeong
When committing inmem pages is successful, we revoke already committed
blocks in __revoke_inmem_pages() and finally replace the committed
ones with the old blocks using f2fs_replace_block(). However, if
the committed block was newly created one, the address of the old
block is NEW_ADDR and __f2fs_replace_block() cannot handle NEW_ADDR
as new_blkaddr properly and a kernel panic occurrs.

Signed-off-by: Daeho Jeong 
Tested-by: Shu Tan 
---
 fs/f2fs/segment.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c117e09..463f420 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -248,7 +248,11 @@ static int __revoke_inmem_pages(struct inode *inode,
goto next;
}
get_node_info(sbi, dn.nid, &ni);
-   f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
+   if (cur->old_addr == NEW_ADDR) {
+   invalidate_blocks(sbi, dn.data_blkaddr);
+   f2fs_update_data_blkaddr(&dn, NULL_ADDR);
+   } else
+   f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
cur->old_addr, ni.version, true, true);
f2fs_put_dnode(&dn);
}
-- 
1.9.1



Re: [PATCH] f2fs: handle newly created page when revoking inmem pages

2018-01-09 Thread Daeho Jeong
Hi Chao,

> Original intention here is to recover status to the timing before
> committing atomic write. As at that timing blkaddr in dnode should be
> cur->old_addr(NEW_ADDR), so we need to change to call:
 
> f2fs_update_data_blkaddr(&dn, NEW_ADDR);

Ok, I'll change NULL_ADDR to NEW_ADDR.

Thanks,
 
> Otherwise, metadata will become inconsistent, because blkaddr value is
> NULL_ADDR means that current block is not preallocated, but
> total_valid_block_count has already been updated. Right?
 
> Thanks,
 

 
 


[PATCH v2] f2fs: handle newly created page when revoking inmem pages

2018-01-09 Thread Daeho Jeong
When committing inmem pages is successful, we revoke already committed
blocks in __revoke_inmem_pages() and finally replace the committed
ones with the old blocks using f2fs_replace_block(). However, if
the committed block was newly created one, the address of the old
block is NEW_ADDR and __f2fs_replace_block() cannot handle NEW_ADDR
as new_blkaddr properly and a kernel panic occurrs.

Signed-off-by: Daeho Jeong 
Tested-by: Shu Tan 
---
 fs/f2fs/segment.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c117e09..0673d08 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -248,7 +248,11 @@ static int __revoke_inmem_pages(struct inode *inode,
goto next;
}
get_node_info(sbi, dn.nid, &ni);
-   f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
+   if (cur->old_addr == NEW_ADDR) {
+   invalidate_blocks(sbi, dn.data_blkaddr);
+   f2fs_update_data_blkaddr(&dn, NEW_ADDR);
+   } else
+   f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
cur->old_addr, ni.version, true, true);
f2fs_put_dnode(&dn);
}
-- 
1.9.1



[PATCH] f2fs: fix checkpoint mount option wrong combination

2021-01-31 Thread Daeho Jeong
From: Daeho Jeong 

As checkpoint=merge comes in, mount option setting related to
checkpoint had been mixed up. Fixed it.

Signed-off-by: Daeho Jeong 
---
 fs/f2fs/super.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 56696f6cfa86..8231c888c772 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -930,20 +930,25 @@ static int parse_options(struct super_block *sb, char 
*options, bool is_remount)
return -EINVAL;
F2FS_OPTION(sbi).unusable_cap_perc = arg;
set_opt(sbi, DISABLE_CHECKPOINT);
+   clear_opt(sbi, MERGE_CHECKPOINT);
break;
case Opt_checkpoint_disable_cap:
if (args->from && match_int(args, &arg))
return -EINVAL;
F2FS_OPTION(sbi).unusable_cap = arg;
set_opt(sbi, DISABLE_CHECKPOINT);
+   clear_opt(sbi, MERGE_CHECKPOINT);
break;
case Opt_checkpoint_disable:
set_opt(sbi, DISABLE_CHECKPOINT);
+   clear_opt(sbi, MERGE_CHECKPOINT);
break;
case Opt_checkpoint_enable:
clear_opt(sbi, DISABLE_CHECKPOINT);
+   clear_opt(sbi, MERGE_CHECKPOINT);
break;
case Opt_checkpoint_merge:
+   clear_opt(sbi, DISABLE_CHECKPOINT);
set_opt(sbi, MERGE_CHECKPOINT);
break;
 #ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -1142,12 +1147,6 @@ static int parse_options(struct super_block *sb, char 
*options, bool is_remount)
return -EINVAL;
}
 
-   if (test_opt(sbi, DISABLE_CHECKPOINT) &&
-   test_opt(sbi, MERGE_CHECKPOINT)) {
-   f2fs_err(sbi, "checkpoint=merge cannot be used with 
checkpoint=disable\n");
-   return -EINVAL;
-   }
-
/* Not pass down write hints if the number of active logs is lesser
 * than NR_CURSEG_PERSIST_TYPE.
 */
-- 
2.30.0.365.g02bc693789-goog



[PATCH] f2fs: prevent setting ioprio of thread not in merge mode

2021-01-31 Thread Daeho Jeong
From: Daeho Jeong 

It causes a crash to change the ioprio of checkpoint thread not in
checkpoint=merge. I fixed that to prevent setting the ioprio of the
thread when checkpoint=merge is not enabled.

Signed-off-by: Daeho Jeong 
---
 fs/f2fs/sysfs.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 100608bcd517..e38a7f6921dd 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -357,8 +357,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return -EINVAL;
 
cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data);
-   ret = set_task_ioprio(cprc->f2fs_issue_ckpt,
-   cprc->ckpt_thread_ioprio);
+   if (test_opt(sbi, MERGE_CHECKPOINT)) {
+   ret = set_task_ioprio(cprc->f2fs_issue_ckpt,
+   cprc->ckpt_thread_ioprio);
+   if (ret)
+   return ret;
+   }
 
return count;
}
-- 
2.30.0.365.g02bc693789-goog



Re: [f2fs-dev] [PATCH] f2fs: fix checkpoint mount option wrong combination

2021-02-01 Thread Daeho Jeong
Actually, I think we need to select one among them, disable, enable
and merge. I realized my previous understanding about that was wrong.
In that case of "checkpoint=merge,checkpoint=enable", the last option
will override the ones before that.
This is how the other mount options like fsync_mode, whint_mode and etc.
So, the answer will be "checkpoint=enable". What do you think?



2021년 2월 1일 (월) 오후 9:40, Chao Yu 님이 작성:
>
> On 2021/2/1 8:06, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > As checkpoint=merge comes in, mount option setting related to
> > checkpoint had been mixed up. Fixed it.
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> >   fs/f2fs/super.c | 11 +--
> >   1 file changed, 5 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index 56696f6cfa86..8231c888c772 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -930,20 +930,25 @@ static int parse_options(struct super_block *sb, char 
> > *options, bool is_remount)
> >   return -EINVAL;
> >   F2FS_OPTION(sbi).unusable_cap_perc = arg;
> >   set_opt(sbi, DISABLE_CHECKPOINT);
> > + clear_opt(sbi, MERGE_CHECKPOINT);
> >   break;
> >   case Opt_checkpoint_disable_cap:
> >   if (args->from && match_int(args, &arg))
> >   return -EINVAL;
> >   F2FS_OPTION(sbi).unusable_cap = arg;
> >   set_opt(sbi, DISABLE_CHECKPOINT);
> > + clear_opt(sbi, MERGE_CHECKPOINT);
> >   break;
> >   case Opt_checkpoint_disable:
> >   set_opt(sbi, DISABLE_CHECKPOINT);
> > + clear_opt(sbi, MERGE_CHECKPOINT);
> >   break;
> >   case Opt_checkpoint_enable:
> >   clear_opt(sbi, DISABLE_CHECKPOINT);
> > + clear_opt(sbi, MERGE_CHECKPOINT);
>
> What if: -o checkpoint=merge,checkpoint=enable
>
> Can you please explain the rule of merge/disable/enable combination and their
> result? e.g.
> checkpoint=merge,checkpoint=enable
> checkpoint=enable,checkpoint=merge
> checkpoint=merge,checkpoint=disable
> checkpoint=disable,checkpoint=merge
>
> If the rule/result is clear, it should be documented.
>
> Thanks,
>
>
> >   break;
> >   case Opt_checkpoint_merge:
> > + clear_opt(sbi, DISABLE_CHECKPOINT);
> >   set_opt(sbi, MERGE_CHECKPOINT);
> >   break;
> >   #ifdef CONFIG_F2FS_FS_COMPRESSION
> > @@ -1142,12 +1147,6 @@ static int parse_options(struct super_block *sb, 
> > char *options, bool is_remount)
> >   return -EINVAL;
> >   }
> >
> > - if (test_opt(sbi, DISABLE_CHECKPOINT) &&
> > - test_opt(sbi, MERGE_CHECKPOINT)) {
> > - f2fs_err(sbi, "checkpoint=merge cannot be used with 
> > checkpoint=disable\n");
> > - return -EINVAL;
> > - }
> > -
> >   /* Not pass down write hints if the number of active logs is lesser
> >* than NR_CURSEG_PERSIST_TYPE.
> >*/
> >


Re: [f2fs-dev] [PATCH] f2fs: fix checkpoint mount option wrong combination

2021-02-01 Thread Daeho Jeong
The rightmost one is the final option. And checkpoint=merge means
checkpoint is enabled with a checkpoint thread.

mount checkpoint=disable,checkpoint=merge => checkpoint=merge
remount checkpoint=enable,checkpoint=merge => checkpoint=merge
remount checkpoint=merge,checkpoint=disable => checkpoint=disable
remount checkpoint=merge,checkpoint=enable => checkpoint=enable

Like

mount fsync_mode=posix, fsync_mode=strict, fsync_mode=nobarrier =>
fsync_mode=nobarrier

2021년 2월 2일 (화) 오전 5:11, Jaegeuk Kim 님이 작성:
>
> On 02/01, Daeho Jeong wrote:
> > Actually, I think we need to select one among them, disable, enable
> > and merge. I realized my previous understanding about that was wrong.
> > In that case of "checkpoint=merge,checkpoint=enable", the last option
> > will override the ones before that.
> > This is how the other mount options like fsync_mode, whint_mode and etc.
> > So, the answer will be "checkpoint=enable". What do you think?
>
> We need to clarify a bit more. :)
>
> mount checkpoint=disable,checkpoint=merge
> remount checkpoint=enable,checkpoint=merge
>
> Then, is it going to enable checkpoint with a thread?
>
> >
> >
> >
> > 2021년 2월 1일 (월) 오후 9:40, Chao Yu 님이 작성:
> > >
> > > On 2021/2/1 8:06, Daeho Jeong wrote:
> > > > From: Daeho Jeong 
> > > >
> > > > As checkpoint=merge comes in, mount option setting related to
> > > > checkpoint had been mixed up. Fixed it.
> > > >
> > > > Signed-off-by: Daeho Jeong 
> > > > ---
> > > >   fs/f2fs/super.c | 11 +--
> > > >   1 file changed, 5 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > > index 56696f6cfa86..8231c888c772 100644
> > > > --- a/fs/f2fs/super.c
> > > > +++ b/fs/f2fs/super.c
> > > > @@ -930,20 +930,25 @@ static int parse_options(struct super_block *sb, 
> > > > char *options, bool is_remount)
> > > >   return -EINVAL;
> > > >   F2FS_OPTION(sbi).unusable_cap_perc = arg;
> > > >   set_opt(sbi, DISABLE_CHECKPOINT);
> > > > + clear_opt(sbi, MERGE_CHECKPOINT);
> > > >   break;
> > > >   case Opt_checkpoint_disable_cap:
> > > >   if (args->from && match_int(args, &arg))
> > > >   return -EINVAL;
> > > >   F2FS_OPTION(sbi).unusable_cap = arg;
> > > >   set_opt(sbi, DISABLE_CHECKPOINT);
> > > > + clear_opt(sbi, MERGE_CHECKPOINT);
> > > >   break;
> > > >   case Opt_checkpoint_disable:
> > > >   set_opt(sbi, DISABLE_CHECKPOINT);
> > > > + clear_opt(sbi, MERGE_CHECKPOINT);
> > > >   break;
> > > >   case Opt_checkpoint_enable:
> > > >   clear_opt(sbi, DISABLE_CHECKPOINT);
> > > > + clear_opt(sbi, MERGE_CHECKPOINT);
> > >
> > > What if: -o checkpoint=merge,checkpoint=enable
> > >
> > > Can you please explain the rule of merge/disable/enable combination and 
> > > their
> > > result? e.g.
> > > checkpoint=merge,checkpoint=enable
> > > checkpoint=enable,checkpoint=merge
> > > checkpoint=merge,checkpoint=disable
> > > checkpoint=disable,checkpoint=merge
> > >
> > > If the rule/result is clear, it should be documented.
> > >
> > > Thanks,
> > >
> > >
> > > >   break;
> > > >   case Opt_checkpoint_merge:
> > > > + clear_opt(sbi, DISABLE_CHECKPOINT);
> > > >   set_opt(sbi, MERGE_CHECKPOINT);
> > > >   break;
> > > >   #ifdef CONFIG_F2FS_FS_COMPRESSION
> > > > @@ -1142,12 +1147,6 @@ static int parse_options(struct super_block *sb, 
> > > > char *options, bool is_remount)
> > > >   return -EINVAL;
> > > >   }
> > > >
> > > > - if (test_opt(sbi, DISABLE_CHECKPOINT) &&
> > > > - test_opt(sbi, MERGE_CHECKPOINT)) {
> > > > - f2fs_err(sbi, "checkpoint=merge cannot be used with 
> > > > checkpoint=disable\n");
> > > > - return -EINVAL;
> > > > - }
> > > > -
> > > >   /* Not pass down write hints if the number of active logs is 
> > > > lesser
> > > >* than NR_CURSEG_PERSIST_TYPE.
> > > >*/
> > > >
> >
> >
> > ___
> > Linux-f2fs-devel mailing list
> > linux-f2fs-de...@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: fix checkpoint mount option wrong combination

2021-02-01 Thread Daeho Jeong
For less confusion, I am going to separate the "merge" option from
"checkpoint=".
I am adding another "ckpt_merge" option. :)

2021년 2월 2일 (화) 오전 8:33, Daeho Jeong 님이 작성:
>
> The rightmost one is the final option. And checkpoint=merge means
> checkpoint is enabled with a checkpoint thread.
>
> mount checkpoint=disable,checkpoint=merge => checkpoint=merge
> remount checkpoint=enable,checkpoint=merge => checkpoint=merge
> remount checkpoint=merge,checkpoint=disable => checkpoint=disable
> remount checkpoint=merge,checkpoint=enable => checkpoint=enable
>
> Like
>
> mount fsync_mode=posix, fsync_mode=strict, fsync_mode=nobarrier =>
> fsync_mode=nobarrier
>
> 2021년 2월 2일 (화) 오전 5:11, Jaegeuk Kim 님이 작성:
> >
> > On 02/01, Daeho Jeong wrote:
> > > Actually, I think we need to select one among them, disable, enable
> > > and merge. I realized my previous understanding about that was wrong.
> > > In that case of "checkpoint=merge,checkpoint=enable", the last option
> > > will override the ones before that.
> > > This is how the other mount options like fsync_mode, whint_mode and etc.
> > > So, the answer will be "checkpoint=enable". What do you think?
> >
> > We need to clarify a bit more. :)
> >
> > mount checkpoint=disable,checkpoint=merge
> > remount checkpoint=enable,checkpoint=merge
> >
> > Then, is it going to enable checkpoint with a thread?
> >
> > >
> > >
> > >
> > > 2021년 2월 1일 (월) 오후 9:40, Chao Yu 님이 작성:
> > > >
> > > > On 2021/2/1 8:06, Daeho Jeong wrote:
> > > > > From: Daeho Jeong 
> > > > >
> > > > > As checkpoint=merge comes in, mount option setting related to
> > > > > checkpoint had been mixed up. Fixed it.
> > > > >
> > > > > Signed-off-by: Daeho Jeong 
> > > > > ---
> > > > >   fs/f2fs/super.c | 11 +--
> > > > >   1 file changed, 5 insertions(+), 6 deletions(-)
> > > > >
> > > > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > > > index 56696f6cfa86..8231c888c772 100644
> > > > > --- a/fs/f2fs/super.c
> > > > > +++ b/fs/f2fs/super.c
> > > > > @@ -930,20 +930,25 @@ static int parse_options(struct super_block 
> > > > > *sb, char *options, bool is_remount)
> > > > >   return -EINVAL;
> > > > >   F2FS_OPTION(sbi).unusable_cap_perc = arg;
> > > > >   set_opt(sbi, DISABLE_CHECKPOINT);
> > > > > + clear_opt(sbi, MERGE_CHECKPOINT);
> > > > >   break;
> > > > >   case Opt_checkpoint_disable_cap:
> > > > >   if (args->from && match_int(args, &arg))
> > > > >   return -EINVAL;
> > > > >   F2FS_OPTION(sbi).unusable_cap = arg;
> > > > >   set_opt(sbi, DISABLE_CHECKPOINT);
> > > > > + clear_opt(sbi, MERGE_CHECKPOINT);
> > > > >   break;
> > > > >   case Opt_checkpoint_disable:
> > > > >   set_opt(sbi, DISABLE_CHECKPOINT);
> > > > > + clear_opt(sbi, MERGE_CHECKPOINT);
> > > > >   break;
> > > > >   case Opt_checkpoint_enable:
> > > > >   clear_opt(sbi, DISABLE_CHECKPOINT);
> > > > > + clear_opt(sbi, MERGE_CHECKPOINT);
> > > >
> > > > What if: -o checkpoint=merge,checkpoint=enable
> > > >
> > > > Can you please explain the rule of merge/disable/enable combination and 
> > > > their
> > > > result? e.g.
> > > > checkpoint=merge,checkpoint=enable
> > > > checkpoint=enable,checkpoint=merge
> > > > checkpoint=merge,checkpoint=disable
> > > > checkpoint=disable,checkpoint=merge
> > > >
> > > > If the rule/result is clear, it should be documented.
> > > >
> > > > Thanks,
> > > >
> > > >
> > > > >   break;
> > > > >   case Opt_checkpoint_merge:
> > > > > + clear_opt(sbi, DISABLE_CHECKPOINT);
> > > > >   set_opt(sbi, MERGE_CHECKPOINT);
> > > > >   break;
> > > > >   #ifdef CONFIG_F2FS_FS_COMPRESSION
> > > > > @@ -1142,12 +1147,6 @@ static int parse_options(struct super_block 
> > > > > *sb, char *options, bool is_remount)
> > > > >   return -EINVAL;
> > > > >   }
> > > > >
> > > > > - if (test_opt(sbi, DISABLE_CHECKPOINT) &&
> > > > > - test_opt(sbi, MERGE_CHECKPOINT)) {
> > > > > - f2fs_err(sbi, "checkpoint=merge cannot be used with 
> > > > > checkpoint=disable\n");
> > > > > - return -EINVAL;
> > > > > - }
> > > > > -
> > > > >   /* Not pass down write hints if the number of active logs is 
> > > > > lesser
> > > > >* than NR_CURSEG_PERSIST_TYPE.
> > > > >*/
> > > > >
> > >
> > >
> > > ___
> > > Linux-f2fs-devel mailing list
> > > linux-f2fs-de...@lists.sourceforge.net
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[PATCH v2] f2fs: rename checkpoint=merge mount option to checkpoint_merge

2021-02-01 Thread Daeho Jeong
From: Daeho Jeong 

As checkpoint=merge comes in, mount option setting related to checkpoint
had been mixed up and it became hard to understand. So, I separated
this option from "checkpoint=" and made another mount option
"checkpoint_merge" for this.

Signed-off-by: Daeho Jeong 
---
v2: renamed "checkpoint=merge" to "checkpoint_merge"
---
 Documentation/filesystems/f2fs.rst |  6 +++---
 fs/f2fs/super.c| 26 ++
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index d0ead45dc706..475994ed8b15 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -247,9 +247,9 @@ checkpoint=%s[:%u[%]]Set to "disable" to turn off 
checkpointing. Set to "enabl
 hide up to all remaining free space. The actual space 
that
 would be unusable can be viewed at 
/sys/fs/f2fs//unusable
 This space is reclaimed once checkpoint=enable.
-Here is another option "merge", which creates a kernel 
daemon
-and makes it to merge concurrent checkpoint requests 
as much
-as possible to eliminate redundant checkpoint issues. 
Plus,
+checkpoint_mergeWhen checkpoint is enabled, this can be used to create 
a kernel
+daemon and make it to merge concurrent checkpoint 
requests as
+much as possible to eliminate redundant checkpoint 
issues. Plus,
 we can eliminate the sluggish issue caused by slow 
checkpoint
 operation when the checkpoint is done in a process 
context in
 a cgroup having low i/o budget and cpu shares. To make 
this
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 56696f6cfa86..d8603e6c4916 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -145,6 +145,7 @@ enum {
Opt_checkpoint_disable_cap_perc,
Opt_checkpoint_enable,
Opt_checkpoint_merge,
+   Opt_nocheckpoint_merge,
Opt_compress_algorithm,
Opt_compress_log_size,
Opt_compress_extension,
@@ -215,7 +216,8 @@ static match_table_t f2fs_tokens = {
{Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
{Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
{Opt_checkpoint_enable, "checkpoint=enable"},
-   {Opt_checkpoint_merge, "checkpoint=merge"},
+   {Opt_checkpoint_merge, "checkpoint_merge"},
+   {Opt_nocheckpoint_merge, "nocheckpoint_merge"},
{Opt_compress_algorithm, "compress_algorithm=%s"},
{Opt_compress_log_size, "compress_log_size=%u"},
{Opt_compress_extension, "compress_extension=%s"},
@@ -946,6 +948,9 @@ static int parse_options(struct super_block *sb, char 
*options, bool is_remount)
case Opt_checkpoint_merge:
set_opt(sbi, MERGE_CHECKPOINT);
break;
+   case Opt_nocheckpoint_merge:
+   clear_opt(sbi, MERGE_CHECKPOINT);
+   break;
 #ifdef CONFIG_F2FS_FS_COMPRESSION
case Opt_compress_algorithm:
if (!f2fs_sb_has_compression(sbi)) {
@@ -1142,12 +1147,6 @@ static int parse_options(struct super_block *sb, char 
*options, bool is_remount)
return -EINVAL;
}
 
-   if (test_opt(sbi, DISABLE_CHECKPOINT) &&
-   test_opt(sbi, MERGE_CHECKPOINT)) {
-   f2fs_err(sbi, "checkpoint=merge cannot be used with 
checkpoint=disable\n");
-   return -EINVAL;
-   }
-
/* Not pass down write hints if the number of active logs is lesser
 * than NR_CURSEG_PERSIST_TYPE.
 */
@@ -1782,7 +1781,7 @@ static int f2fs_show_options(struct seq_file *seq, struct 
dentry *root)
seq_printf(seq, ",checkpoint=disable:%u",
F2FS_OPTION(sbi).unusable_cap);
if (test_opt(sbi, MERGE_CHECKPOINT))
-   seq_puts(seq, ",checkpoint=merge");
+   seq_puts(seq, ",checkpoint_merge");
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1827,6 +1826,7 @@ static void default_options(struct f2fs_sb_info *sbi)
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
set_opt(sbi, DISCARD);
+   clear_opt(sbi, MERGE_CHECKPOINT);
if (f2fs_sb_has_blkzoned(sbi))
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
else
@@ -2066,9 +2066,8 @@ stat

Re: [f2fs-dev] [PATCH v2] f2fs: rename checkpoint=merge mount option to checkpoint_merge

2021-02-02 Thread Daeho Jeong
I chose the same step with "flush_merge", because it doesn't have
"noflush_merge".
Do you think we need that for both, "noflush_merge" and "nocheckpoint_merge"?

I thought we needed to give some time to make this be turned on by
default. It might be a little radical. :)

What do you think?

2021년 2월 2일 (화) 오후 4:40, Chao Yu 님이 작성:
>
> On 2021/2/2 13:18, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > As checkpoint=merge comes in, mount option setting related to checkpoint
> > had been mixed up and it became hard to understand. So, I separated
> > this option from "checkpoint=" and made another mount option
> > "checkpoint_merge" for this.
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> > v2: renamed "checkpoint=merge" to "checkpoint_merge"
> > ---
> >   Documentation/filesystems/f2fs.rst |  6 +++---
> >   fs/f2fs/super.c| 26 ++
> >   2 files changed, 17 insertions(+), 15 deletions(-)
> >
> > diff --git a/Documentation/filesystems/f2fs.rst 
> > b/Documentation/filesystems/f2fs.rst
> > index d0ead45dc706..475994ed8b15 100644
> > --- a/Documentation/filesystems/f2fs.rst
> > +++ b/Documentation/filesystems/f2fs.rst
> > @@ -247,9 +247,9 @@ checkpoint=%s[:%u[%]]  Set to "disable" to turn off 
> > checkpointing. Set to "enabl
> >hide up to all remaining free space. The actual 
> > space that
> >would be unusable can be viewed at 
> > /sys/fs/f2fs//unusable
> >This space is reclaimed once checkpoint=enable.
> > -  Here is another option "merge", which creates a 
> > kernel daemon
> > -  and makes it to merge concurrent checkpoint requests 
> > as much
> > -  as possible to eliminate redundant checkpoint 
> > issues. Plus,
> > +checkpoint_merge  When checkpoint is enabled, this can be used to 
> > create a kernel
> > +  daemon and make it to merge concurrent checkpoint 
> > requests as
> > +  much as possible to eliminate redundant checkpoint 
> > issues. Plus,
> >we can eliminate the sluggish issue caused by slow 
> > checkpoint
> >operation when the checkpoint is done in a process 
> > context in
> >a cgroup having low i/o budget and cpu shares. To 
> > make this
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index 56696f6cfa86..d8603e6c4916 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -145,6 +145,7 @@ enum {
> >   Opt_checkpoint_disable_cap_perc,
> >   Opt_checkpoint_enable,
> >   Opt_checkpoint_merge,
> > + Opt_nocheckpoint_merge,
> >   Opt_compress_algorithm,
> >   Opt_compress_log_size,
> >   Opt_compress_extension,
> > @@ -215,7 +216,8 @@ static match_table_t f2fs_tokens = {
> >   {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
> >   {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
> >   {Opt_checkpoint_enable, "checkpoint=enable"},
> > - {Opt_checkpoint_merge, "checkpoint=merge"},
> > + {Opt_checkpoint_merge, "checkpoint_merge"},
> > + {Opt_nocheckpoint_merge, "nocheckpoint_merge"},
> >   {Opt_compress_algorithm, "compress_algorithm=%s"},
> >   {Opt_compress_log_size, "compress_log_size=%u"},
> >   {Opt_compress_extension, "compress_extension=%s"},
> > @@ -946,6 +948,9 @@ static int parse_options(struct super_block *sb, char 
> > *options, bool is_remount)
> >   case Opt_checkpoint_merge:
> >   set_opt(sbi, MERGE_CHECKPOINT);
> >   break;
> > + case Opt_nocheckpoint_merge:
> > + clear_opt(sbi, MERGE_CHECKPOINT);
> > + break;
> >   #ifdef CONFIG_F2FS_FS_COMPRESSION
> >   case Opt_compress_algorithm:
> >   if (!f2fs_sb_has_compression(sbi)) {
> > @@ -1142,12 +1147,6 @@ static int parse_options(struct super_block *sb, 
> > char *options, bool is_remount)
> >   return -EINVAL;
> >   }
> >
> > - if (test_opt(sbi, DISABLE_CHECKPOINT) &&
> > - test_opt(sbi, MERGE_CHECKPOINT)) {
> > - f2fs_err(sbi, &quo

Re: [f2fs-dev] [PATCH v2] f2fs: rename checkpoint=merge mount option to checkpoint_merge

2021-02-02 Thread Daeho Jeong
If I understand it correctly, the only thing I have to do now is
remove "nocheckpoint_merge" now.
Am I correct? :)

2021년 2월 2일 (화) 오후 5:30, Chao Yu 님이 작성:
>
> On 2021/2/2 16:02, Daeho Jeong wrote:
> > I chose the same step with "flush_merge", because it doesn't have
> > "noflush_merge".
>
> Oh, "noxxx" option was added only when we set the option by default in
> default_options(), when user want to disable the default option, it
> needs to use "noxxx" option, and then we will show this "noxxx" option
> string to user via show_options() to indicate that "noxxx" option is
> working now.
>
> Anyway I think we should fix to show "noflush_merge" option because we
> have set flush_merge by default.
>
> > Do you think we need that for both, "noflush_merge" and 
> > "nocheckpoint_merge"?
>
> For "nocheckpoint_merge", we can introduce this option only when we want
> to set "checkpoint_merge" by default.
>
> Here is the example from noinline_data:
>
> Commit 75342797988 ("f2fs: enable inline data by default")
>
> Thanks,
>
> >
> > I thought we needed to give some time to make this be turned on by
> > default. It might be a little radical. :)
> >
> > What do you think?
> >
> > 2021년 2월 2일 (화) 오후 4:40, Chao Yu 님이 작성:
> >>
> >> On 2021/2/2 13:18, Daeho Jeong wrote:
> >>> From: Daeho Jeong 
> >>>
> >>> As checkpoint=merge comes in, mount option setting related to checkpoint
> >>> had been mixed up and it became hard to understand. So, I separated
> >>> this option from "checkpoint=" and made another mount option
> >>> "checkpoint_merge" for this.
> >>>
> >>> Signed-off-by: Daeho Jeong 
> >>> ---
> >>> v2: renamed "checkpoint=merge" to "checkpoint_merge"
> >>> ---
> >>>Documentation/filesystems/f2fs.rst |  6 +++---
> >>>fs/f2fs/super.c| 26 ++
> >>>2 files changed, 17 insertions(+), 15 deletions(-)
> >>>
> >>> diff --git a/Documentation/filesystems/f2fs.rst 
> >>> b/Documentation/filesystems/f2fs.rst
> >>> index d0ead45dc706..475994ed8b15 100644
> >>> --- a/Documentation/filesystems/f2fs.rst
> >>> +++ b/Documentation/filesystems/f2fs.rst
> >>> @@ -247,9 +247,9 @@ checkpoint=%s[:%u[%]]  Set to "disable" to turn 
> >>> off checkpointing. Set to "enabl
> >>> hide up to all remaining free space. The actual 
> >>> space that
> >>> would be unusable can be viewed at 
> >>> /sys/fs/f2fs//unusable
> >>> This space is reclaimed once checkpoint=enable.
> >>> -  Here is another option "merge", which creates a 
> >>> kernel daemon
> >>> -  and makes it to merge concurrent checkpoint 
> >>> requests as much
> >>> -  as possible to eliminate redundant checkpoint 
> >>> issues. Plus,
> >>> +checkpoint_merge  When checkpoint is enabled, this can be used to 
> >>> create a kernel
> >>> +  daemon and make it to merge concurrent checkpoint 
> >>> requests as
> >>> +  much as possible to eliminate redundant checkpoint 
> >>> issues. Plus,
> >>> we can eliminate the sluggish issue caused by 
> >>> slow checkpoint
> >>> operation when the checkpoint is done in a 
> >>> process context in
> >>> a cgroup having low i/o budget and cpu shares. To 
> >>> make this
> >>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>> index 56696f6cfa86..d8603e6c4916 100644
> >>> --- a/fs/f2fs/super.c
> >>> +++ b/fs/f2fs/super.c
> >>> @@ -145,6 +145,7 @@ enum {
> >>>Opt_checkpoint_disable_cap_perc,
> >>>Opt_checkpoint_enable,
> >>>Opt_checkpoint_merge,
> >>> + Opt_nocheckpoint_merge,
> >>>Opt_compress_algorithm,
> >>>Opt_compress_log_size,
> >>>Opt_compress_extension,
> >>> @@ -215,7 +216,8 @@ static match_table_t f2fs_tokens = {
> >>>{Opt_checkpoint_disabl

[PATCH v3] f2fs: rename checkpoint=merge mount option to checkpoint_merge

2021-02-02 Thread Daeho Jeong
From: Daeho Jeong 

As checkpoint=merge comes in, mount option setting related to checkpoint
had been mixed up and it became hard to understand. So, I separated
this option from "checkpoint=" and made another mount option
"checkpoint_merge" for this.

Signed-off-by: Daeho Jeong 
---
v2: renamed "checkpoint=merge" to "checkpoint_merge"
v3: removed "nocheckpoint_merge" option
---
 Documentation/filesystems/f2fs.rst |  6 +++---
 fs/f2fs/super.c| 21 +
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index d0ead45dc706..475994ed8b15 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -247,9 +247,9 @@ checkpoint=%s[:%u[%]]Set to "disable" to turn off 
checkpointing. Set to "enabl
 hide up to all remaining free space. The actual space 
that
 would be unusable can be viewed at 
/sys/fs/f2fs//unusable
 This space is reclaimed once checkpoint=enable.
-Here is another option "merge", which creates a kernel 
daemon
-and makes it to merge concurrent checkpoint requests 
as much
-as possible to eliminate redundant checkpoint issues. 
Plus,
+checkpoint_mergeWhen checkpoint is enabled, this can be used to create 
a kernel
+daemon and make it to merge concurrent checkpoint 
requests as
+much as possible to eliminate redundant checkpoint 
issues. Plus,
 we can eliminate the sluggish issue caused by slow 
checkpoint
 operation when the checkpoint is done in a process 
context in
 a cgroup having low i/o budget and cpu shares. To make 
this
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 56696f6cfa86..b60dcef7f9d0 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -215,7 +215,7 @@ static match_table_t f2fs_tokens = {
{Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
{Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
{Opt_checkpoint_enable, "checkpoint=enable"},
-   {Opt_checkpoint_merge, "checkpoint=merge"},
+   {Opt_checkpoint_merge, "checkpoint_merge"},
{Opt_compress_algorithm, "compress_algorithm=%s"},
{Opt_compress_log_size, "compress_log_size=%u"},
{Opt_compress_extension, "compress_extension=%s"},
@@ -1142,12 +1142,6 @@ static int parse_options(struct super_block *sb, char 
*options, bool is_remount)
return -EINVAL;
}
 
-   if (test_opt(sbi, DISABLE_CHECKPOINT) &&
-   test_opt(sbi, MERGE_CHECKPOINT)) {
-   f2fs_err(sbi, "checkpoint=merge cannot be used with 
checkpoint=disable\n");
-   return -EINVAL;
-   }
-
/* Not pass down write hints if the number of active logs is lesser
 * than NR_CURSEG_PERSIST_TYPE.
 */
@@ -1782,7 +1776,7 @@ static int f2fs_show_options(struct seq_file *seq, struct 
dentry *root)
seq_printf(seq, ",checkpoint=disable:%u",
F2FS_OPTION(sbi).unusable_cap);
if (test_opt(sbi, MERGE_CHECKPOINT))
-   seq_puts(seq, ",checkpoint=merge");
+   seq_puts(seq, ",checkpoint_merge");
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1827,6 +1821,7 @@ static void default_options(struct f2fs_sb_info *sbi)
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
set_opt(sbi, DISCARD);
+   clear_opt(sbi, MERGE_CHECKPOINT);
if (f2fs_sb_has_blkzoned(sbi))
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
else
@@ -2066,9 +2061,8 @@ static int f2fs_remount(struct super_block *sb, int 
*flags, char *data)
}
}
 
-   if (!test_opt(sbi, MERGE_CHECKPOINT)) {
-   f2fs_stop_ckpt_thread(sbi);
-   } else {
+   if (!test_opt(sbi, DISABLE_CHECKPOINT) &&
+   test_opt(sbi, MERGE_CHECKPOINT)) {
err = f2fs_start_ckpt_thread(sbi);
if (err) {
f2fs_err(sbi,
@@ -2076,6 +2070,8 @@ static int f2fs_remount(struct super_block *sb, int 
*flags, char *data)
err);
goto restore_gc;
}
+   } else {
+   f2fs_stop_ckpt_thread(sbi);
}
 
/*
@@ -3831,7 +3827,8 @@ static int f2fs_fill_super(struct s

Re: [f2fs-dev] [PATCH v3] f2fs: rename checkpoint=merge mount option to checkpoint_merge

2021-02-02 Thread Daeho Jeong
When we remount it without the "checkpoint_merge" option, shouldn't we
need to clear "MERGE_CHECKPOINT" again?
This is actually what I intended, but I was wrong. Actually, I found this.

When we remount the filesystem, the previous mount option is passed
through the "data" argument in the below.
f2fs_remount(struct super_block *sb, int *flags, char *data)

If we don't provide the "nocheckpoint_merge" option, how can we turn
off the "checkpoint_merge" option which is turned on in the previous
mount?

2021년 2월 2일 (화) 오후 6:28, Chao Yu 님이 작성:
>
> On 2021/2/2 17:23, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > As checkpoint=merge comes in, mount option setting related to checkpoint
> > had been mixed up and it became hard to understand. So, I separated
> > this option from "checkpoint=" and made another mount option
> > "checkpoint_merge" for this.
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> > v2: renamed "checkpoint=merge" to "checkpoint_merge"
> > v3: removed "nocheckpoint_merge" option
> > ---
> >   Documentation/filesystems/f2fs.rst |  6 +++---
> >   fs/f2fs/super.c| 21 +
> >   2 files changed, 12 insertions(+), 15 deletions(-)
> >
> > diff --git a/Documentation/filesystems/f2fs.rst 
> > b/Documentation/filesystems/f2fs.rst
> > index d0ead45dc706..475994ed8b15 100644
> > --- a/Documentation/filesystems/f2fs.rst
> > +++ b/Documentation/filesystems/f2fs.rst
> > @@ -247,9 +247,9 @@ checkpoint=%s[:%u[%]]  Set to "disable" to turn off 
> > checkpointing. Set to "enabl
> >hide up to all remaining free space. The actual 
> > space that
> >would be unusable can be viewed at 
> > /sys/fs/f2fs//unusable
> >This space is reclaimed once checkpoint=enable.
> > -  Here is another option "merge", which creates a 
> > kernel daemon
> > -  and makes it to merge concurrent checkpoint requests 
> > as much
> > -  as possible to eliminate redundant checkpoint 
> > issues. Plus,
> > +checkpoint_merge  When checkpoint is enabled, this can be used to 
> > create a kernel
> > +  daemon and make it to merge concurrent checkpoint 
> > requests as
> > +  much as possible to eliminate redundant checkpoint 
> > issues. Plus,
> >we can eliminate the sluggish issue caused by slow 
> > checkpoint
> >operation when the checkpoint is done in a process 
> > context in
> >a cgroup having low i/o budget and cpu shares. To 
> > make this
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index 56696f6cfa86..b60dcef7f9d0 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -215,7 +215,7 @@ static match_table_t f2fs_tokens = {
> >   {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
> >   {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
> >   {Opt_checkpoint_enable, "checkpoint=enable"},
> > - {Opt_checkpoint_merge, "checkpoint=merge"},
> > + {Opt_checkpoint_merge, "checkpoint_merge"},
> >   {Opt_compress_algorithm, "compress_algorithm=%s"},
> >   {Opt_compress_log_size, "compress_log_size=%u"},
> >   {Opt_compress_extension, "compress_extension=%s"},
> > @@ -1142,12 +1142,6 @@ static int parse_options(struct super_block *sb, 
> > char *options, bool is_remount)
> >   return -EINVAL;
> >   }
> >
> > - if (test_opt(sbi, DISABLE_CHECKPOINT) &&
> > - test_opt(sbi, MERGE_CHECKPOINT)) {
> > - f2fs_err(sbi, "checkpoint=merge cannot be used with 
> > checkpoint=disable\n");
> > - return -EINVAL;
> > - }
> > -
> >   /* Not pass down write hints if the number of active logs is lesser
> >* than NR_CURSEG_PERSIST_TYPE.
> >*/
> > @@ -1782,7 +1776,7 @@ static int f2fs_show_options(struct seq_file *seq, 
> > struct dentry *root)
> >   seq_printf(seq, ",checkpoint=disable:%u",
> >   F2FS_OPTION(sbi).unusable_cap);
> >   if (test_opt(sbi, MERGE_CHECKPOINT))
> > - seq_puts(seq, ",checkpoint=merge");
&

Re: [f2fs-dev] [PATCH v3] f2fs: rename checkpoint=merge mount option to checkpoint_merge

2021-02-02 Thread Daeho Jeong
Thanks for the explanation.

I am going to remove the line clearing "MERGE_CHECKPOINT".
But, when we go with the below remount command, I think the
"nocheckpoint_merge" option will work well to disable only just
"checkpoint_merge" from the previous option.
"mount -o remount,nocheckpoint_merge  /dir"

It would be more convenient to users. What do you think?

2021년 2월 2일 (화) 오후 6:55, Chao Yu 님이 작성:
>
> On 2021/2/2 17:44, Daeho Jeong wrote:
> > When we remount it without the "checkpoint_merge" option, shouldn't we
> > need to clear "MERGE_CHECKPOINT" again?
> > This is actually what I intended, but I was wrong. Actually, I found this.
> >
> > When we remount the filesystem, the previous mount option is passed
> > through the "data" argument in the below.
> > f2fs_remount(struct super_block *sb, int *flags, char *data)
> >
> > If we don't provide the "nocheckpoint_merge" option, how can we turn
> > off the "checkpoint_merge" option which is turned on in the previous
> > mount?
>
> We can use "mount -o remount /dev/xxx /mnt" to disable checkpoint_merge,
> since that command won't pass old mount options to remount?
>
> Quoted from man mount:
>
>mount -o remount,rw /dev/foo /dir
>
>After  this  call  all  old  mount options are replaced and 
> arbitrary stuff from fstab (or mtab) is ignored, except the loop= option 
> which is internally generated and maintained by the
>mount command.
>
>mount -o remount,rw  /dir
>
>After this call mount reads fstab and merges these options 
> with the options from the command line (-o). If no mountpoint found in fstab 
> than remount with unspecified source is allowed.
>
> Thanks,
>
> >
> > 2021년 2월 2일 (화) 오후 6:28, Chao Yu 님이 작성:
> >>
> >> On 2021/2/2 17:23, Daeho Jeong wrote:
> >>> From: Daeho Jeong 
> >>>
> >>> As checkpoint=merge comes in, mount option setting related to checkpoint
> >>> had been mixed up and it became hard to understand. So, I separated
> >>> this option from "checkpoint=" and made another mount option
> >>> "checkpoint_merge" for this.
> >>>
> >>> Signed-off-by: Daeho Jeong 
> >>> ---
> >>> v2: renamed "checkpoint=merge" to "checkpoint_merge"
> >>> v3: removed "nocheckpoint_merge" option
> >>> ---
> >>>Documentation/filesystems/f2fs.rst |  6 +++---
> >>>fs/f2fs/super.c| 21 +
> >>>2 files changed, 12 insertions(+), 15 deletions(-)
> >>>
> >>> diff --git a/Documentation/filesystems/f2fs.rst 
> >>> b/Documentation/filesystems/f2fs.rst
> >>> index d0ead45dc706..475994ed8b15 100644
> >>> --- a/Documentation/filesystems/f2fs.rst
> >>> +++ b/Documentation/filesystems/f2fs.rst
> >>> @@ -247,9 +247,9 @@ checkpoint=%s[:%u[%]]  Set to "disable" to turn 
> >>> off checkpointing. Set to "enabl
> >>> hide up to all remaining free space. The actual 
> >>> space that
> >>> would be unusable can be viewed at 
> >>> /sys/fs/f2fs//unusable
> >>> This space is reclaimed once checkpoint=enable.
> >>> -  Here is another option "merge", which creates a 
> >>> kernel daemon
> >>> -  and makes it to merge concurrent checkpoint 
> >>> requests as much
> >>> -  as possible to eliminate redundant checkpoint 
> >>> issues. Plus,
> >>> +checkpoint_merge  When checkpoint is enabled, this can be used to 
> >>> create a kernel
> >>> +  daemon and make it to merge concurrent checkpoint 
> >>> requests as
> >>> +  much as possible to eliminate redundant checkpoint 
> >>> issues. Plus,
> >>> we can eliminate the sluggish issue caused by 
> >>> slow checkpoint
> >>> operation when the checkpoint is done in a 
> >>> process context in
> >>> a cgroup having low i/o budget and cpu shares. To 
> >>> make this
> >>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>> index 56696f6cfa86

[PATCH v4] f2fs: rename checkpoint=merge mount option to checkpoint_merge

2021-02-02 Thread Daeho Jeong
From: Daeho Jeong 

As checkpoint=merge comes in, mount option setting related to checkpoint
had been mixed up and it became hard to understand. So, I separated
this option from "checkpoint=" and made another mount option
"checkpoint_merge" for this.

Signed-off-by: Daeho Jeong 
---
v2: renamed "checkpoint=merge" to "checkpoint_merge"
v3: removed "nocheckpoint_merge" option
v4: re-added "nocheckpoint_merge" option to make it possible to disable
just only "checkpoint_merge" when remount
---
 Documentation/filesystems/f2fs.rst |  6 +++---
 fs/f2fs/super.c| 25 +
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index d0ead45dc706..475994ed8b15 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -247,9 +247,9 @@ checkpoint=%s[:%u[%]]Set to "disable" to turn off 
checkpointing. Set to "enabl
 hide up to all remaining free space. The actual space 
that
 would be unusable can be viewed at 
/sys/fs/f2fs//unusable
 This space is reclaimed once checkpoint=enable.
-Here is another option "merge", which creates a kernel 
daemon
-and makes it to merge concurrent checkpoint requests 
as much
-as possible to eliminate redundant checkpoint issues. 
Plus,
+checkpoint_mergeWhen checkpoint is enabled, this can be used to create 
a kernel
+daemon and make it to merge concurrent checkpoint 
requests as
+much as possible to eliminate redundant checkpoint 
issues. Plus,
 we can eliminate the sluggish issue caused by slow 
checkpoint
 operation when the checkpoint is done in a process 
context in
 a cgroup having low i/o budget and cpu shares. To make 
this
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 56696f6cfa86..f1791b9c1eac 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -145,6 +145,7 @@ enum {
Opt_checkpoint_disable_cap_perc,
Opt_checkpoint_enable,
Opt_checkpoint_merge,
+   Opt_nocheckpoint_merge,
Opt_compress_algorithm,
Opt_compress_log_size,
Opt_compress_extension,
@@ -215,7 +216,8 @@ static match_table_t f2fs_tokens = {
{Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
{Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
{Opt_checkpoint_enable, "checkpoint=enable"},
-   {Opt_checkpoint_merge, "checkpoint=merge"},
+   {Opt_checkpoint_merge, "checkpoint_merge"},
+   {Opt_nocheckpoint_merge, "nocheckpoint_merge"},
{Opt_compress_algorithm, "compress_algorithm=%s"},
{Opt_compress_log_size, "compress_log_size=%u"},
{Opt_compress_extension, "compress_extension=%s"},
@@ -946,6 +948,9 @@ static int parse_options(struct super_block *sb, char 
*options, bool is_remount)
case Opt_checkpoint_merge:
set_opt(sbi, MERGE_CHECKPOINT);
break;
+   case Opt_nocheckpoint_merge:
+   clear_opt(sbi, MERGE_CHECKPOINT);
+   break;
 #ifdef CONFIG_F2FS_FS_COMPRESSION
case Opt_compress_algorithm:
if (!f2fs_sb_has_compression(sbi)) {
@@ -1142,12 +1147,6 @@ static int parse_options(struct super_block *sb, char 
*options, bool is_remount)
return -EINVAL;
}
 
-   if (test_opt(sbi, DISABLE_CHECKPOINT) &&
-   test_opt(sbi, MERGE_CHECKPOINT)) {
-   f2fs_err(sbi, "checkpoint=merge cannot be used with 
checkpoint=disable\n");
-   return -EINVAL;
-   }
-
/* Not pass down write hints if the number of active logs is lesser
 * than NR_CURSEG_PERSIST_TYPE.
 */
@@ -1782,7 +1781,7 @@ static int f2fs_show_options(struct seq_file *seq, struct 
dentry *root)
seq_printf(seq, ",checkpoint=disable:%u",
F2FS_OPTION(sbi).unusable_cap);
if (test_opt(sbi, MERGE_CHECKPOINT))
-   seq_puts(seq, ",checkpoint=merge");
+   seq_puts(seq, ",checkpoint_merge");
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -2066,9 +2065,8 @@ static int f2fs_remount(struct super_block *sb, int 
*flags, char *data)
}
}
 
-   if (!test_opt(sbi, MERGE_CHECKP

[PATCH v5] f2fs: rename checkpoint=merge mount option to checkpoint_merge

2021-02-02 Thread Daeho Jeong
From: Daeho Jeong 

As checkpoint=merge comes in, mount option setting related to checkpoint
had been mixed up and it became hard to understand. So, I separated
this option from "checkpoint=" and made another mount option
"checkpoint_merge" for this.

Signed-off-by: Daeho Jeong 
---
v2: renamed "checkpoint=merge" to "checkpoint_merge"
v3: removed "nocheckpoint_merge" option
v4: re-added "nocheckpoint_merge" option to make it possible to disable
just only "checkpoint_merge" when remount
v5: added the description about "nocheckpoint_merge" in rst and added it
in show_options
---
 Documentation/filesystems/f2fs.rst |  7 ---
 fs/f2fs/super.c| 27 +++
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index d0ead45dc706..f75ec244762f 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -247,9 +247,9 @@ checkpoint=%s[:%u[%]]Set to "disable" to turn off 
checkpointing. Set to "enabl
 hide up to all remaining free space. The actual space 
that
 would be unusable can be viewed at 
/sys/fs/f2fs//unusable
 This space is reclaimed once checkpoint=enable.
-Here is another option "merge", which creates a kernel 
daemon
-and makes it to merge concurrent checkpoint requests 
as much
-as possible to eliminate redundant checkpoint issues. 
Plus,
+checkpoint_mergeWhen checkpoint is enabled, this can be used to create 
a kernel
+daemon and make it to merge concurrent checkpoint 
requests as
+much as possible to eliminate redundant checkpoint 
issues. Plus,
 we can eliminate the sluggish issue caused by slow 
checkpoint
 operation when the checkpoint is done in a process 
context in
 a cgroup having low i/o budget and cpu shares. To make 
this
@@ -257,6 +257,7 @@ checkpoint=%s[:%u[%]]Set to "disable" to turn off 
checkpointing. Set to "enabl
 to "3", to give one higher priority than other kernel 
threads.
 This is the same way to give a I/O priority to the jbd2
 journaling thread of ext4 filesystem.
+nocheckpoint_merge  Disable checkpoint merge feature.
 compress_algorithm=%s   Control compress algorithm, currently f2fs supports 
"lzo",
 "lz4", "zstd" and "lzo-rle" algorithm.
 compress_algorithm=%s:%d Control compress algorithm and its compress level, 
now, only
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 56696f6cfa86..1000d21120ca 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -145,6 +145,7 @@ enum {
Opt_checkpoint_disable_cap_perc,
Opt_checkpoint_enable,
Opt_checkpoint_merge,
+   Opt_nocheckpoint_merge,
Opt_compress_algorithm,
Opt_compress_log_size,
Opt_compress_extension,
@@ -215,7 +216,8 @@ static match_table_t f2fs_tokens = {
{Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
{Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
{Opt_checkpoint_enable, "checkpoint=enable"},
-   {Opt_checkpoint_merge, "checkpoint=merge"},
+   {Opt_checkpoint_merge, "checkpoint_merge"},
+   {Opt_nocheckpoint_merge, "nocheckpoint_merge"},
{Opt_compress_algorithm, "compress_algorithm=%s"},
{Opt_compress_log_size, "compress_log_size=%u"},
{Opt_compress_extension, "compress_extension=%s"},
@@ -946,6 +948,9 @@ static int parse_options(struct super_block *sb, char 
*options, bool is_remount)
case Opt_checkpoint_merge:
set_opt(sbi, MERGE_CHECKPOINT);
break;
+   case Opt_nocheckpoint_merge:
+   clear_opt(sbi, MERGE_CHECKPOINT);
+   break;
 #ifdef CONFIG_F2FS_FS_COMPRESSION
case Opt_compress_algorithm:
if (!f2fs_sb_has_compression(sbi)) {
@@ -1142,12 +1147,6 @@ static int parse_options(struct super_block *sb, char 
*options, bool is_remount)
return -EINVAL;
}
 
-   if (test_opt(sbi, DISABLE_CHECKPOINT) &&
-   test_opt(sbi, MERGE_CHECKPOINT)) {
-   f2fs_err(sbi, "checkpoint=merge cannot be used with 
checkpoint=disable\n");
-   return -EINVAL;
-   }
-
/* Not pass down write hints if the number of active logs is lesser
 * than

[PATCH] f2fs: protect new segment allocation in expand_inode_data

2020-05-31 Thread Daeho Jeong
From: Daeho Jeong 

Found a new segemnt allocation without f2fs_lock_op() in
expand_inode_data(). So, when we do fallocate() for a pinned file
and trigger checkpoint very frequently and simultaneously. F2FS gets
stuck in the below code of do_checkpoint() forever.

  f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
  /* Wait for all dirty meta pages to be submitted for IO */
<= if fallocate() here,
  f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META); <= it'll wait forever.

Signed-off-by: Daeho Jeong 
Reviewed-by: Chao Yu 
---
 fs/f2fs/file.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f7de2a1da528..14ace885baa9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1660,7 +1660,11 @@ static int expand_inode_data(struct inode *inode, loff_t 
offset,
 
down_write(&sbi->pin_sem);
map.m_seg_type = CURSEG_COLD_DATA_PINNED;
+
+   f2fs_lock_op(sbi);
f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA);
+   f2fs_unlock_op(sbi);
+
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
up_write(&sbi->pin_sem);
 
-- 
2.27.0.rc0.183.gde8f92d652-goog



[PATCH] f2fs: make file immutable even if releasing zero compression block

2020-07-29 Thread Daeho Jeong
From: Daeho Jeong 

When we use F2FS_IOC_RELEASE_COMPRESS_BLOCKS ioctl, if we can't find
any compressed blocks in the file even with large file size, the
ioctl just ends up without changing the file's status as immutable.
It makes the user, who expects that the file is immutable when it
returns successfully, confused.

Signed-off-by: Daeho Jeong 
---
 fs/f2fs/file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index cc7f5670390f..8a422400e824 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3535,14 +3535,14 @@ static int f2fs_release_compress_blocks(struct file 
*filp, unsigned long arg)
if (ret)
goto out;
 
-   if (!F2FS_I(inode)->i_compr_blocks)
-   goto out;
-
F2FS_I(inode)->i_flags |= F2FS_IMMUTABLE_FL;
f2fs_set_inode_flags(inode);
inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode, true);
 
+   if (!F2FS_I(inode)->i_compr_blocks)
+   goto out;
+
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
 
-- 
2.28.0.rc0.142.g3c755180ce-goog



[PATCH] f2fs: change virtual mapping way for compression pages

2020-08-10 Thread Daeho Jeong
From: Daeho Jeong 

By profiling f2fs compression works, I've found vmap() callings are
bottlenecks of f2fs decompression path. Changing these with
vm_map_ram(), we can enhance f2fs decompression speed pretty much.

[Verification]
dd if=/dev/zero of=dummy bs=1m count=1000
echo 3 > /proc/sys/vm/drop_caches
dd if=dummy of=/dev/zero bs=512k

- w/o compression -
1048576000 bytes (0.9 G) copied, 1.999384 s, 500 M/s
1048576000 bytes (0.9 G) copied, 2.035988 s, 491 M/s
1048576000 bytes (0.9 G) copied, 2.039457 s, 490 M/s

- before patch -
1048576000 bytes (0.9 G) copied, 9.146217 s, 109 M/s
1048576000 bytes (0.9 G) copied, 9.997542 s, 100 M/s
1048576000 bytes (0.9 G) copied, 10.109727 s, 99 M/s

- after patch -
1048576000 bytes (0.9 G) copied, 2.253441 s, 444 M/s
1048576000 bytes (0.9 G) copied, 2.739764 s, 365 M/s
1048576000 bytes (0.9 G) copied, 2.185649 s, 458 M/s

Signed-off-by: Daeho Jeong 
---
 fs/f2fs/compress.c | 42 --
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 6e7db450006c..46b7e359f313 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -554,6 +554,8 @@ static void f2fs_compress_free_page(struct page *page)
mempool_free(page, compress_page_pool);
 }
 
+#define MAX_VMAP_RETRIES   3
+
 static int f2fs_compress_pages(struct compress_ctx *cc)
 {
struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
@@ -590,13 +592,23 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
}
}
 
-   cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   cc->rbuf = vm_map_ram(cc->rpages, cc->cluster_size, -1);
+   if (cc->rbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!cc->rbuf) {
ret = -ENOMEM;
goto out_free_cpages;
}
 
-   cc->cbuf = vmap(cc->cpages, cc->nr_cpages, VM_MAP, PAGE_KERNEL);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   cc->cbuf = vm_map_ram(cc->cpages, cc->nr_cpages, -1);
+   if (cc->cbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!cc->cbuf) {
ret = -ENOMEM;
goto out_vunmap_rbuf;
@@ -624,8 +636,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
memset(&cc->cbuf->cdata[cc->clen], 0,
   (nr_cpages * PAGE_SIZE) - (cc->clen + COMPRESS_HEADER_SIZE));
 
-   vunmap(cc->cbuf);
-   vunmap(cc->rbuf);
+   vm_unmap_ram(cc->cbuf, cc->nr_cpages);
+   vm_unmap_ram(cc->rbuf, cc->cluster_size);
 
for (i = nr_cpages; i < cc->nr_cpages; i++) {
f2fs_compress_free_page(cc->cpages[i]);
@@ -642,9 +654,9 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
return 0;
 
 out_vunmap_cbuf:
-   vunmap(cc->cbuf);
+   vm_unmap_ram(cc->cbuf, cc->nr_cpages);
 out_vunmap_rbuf:
-   vunmap(cc->rbuf);
+   vm_unmap_ram(cc->rbuf, cc->cluster_size);
 out_free_cpages:
for (i = 0; i < cc->nr_cpages; i++) {
if (cc->cpages[i])
@@ -715,13 +727,23 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)
goto out_free_dic;
}
 
-   dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   dic->rbuf = vm_map_ram(dic->tpages, dic->cluster_size, -1);
+   if (dic->rbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!dic->rbuf) {
ret = -ENOMEM;
goto destroy_decompress_ctx;
}
 
-   dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   dic->cbuf = vm_map_ram(dic->cpages, dic->nr_cpages, -1);
+   if (dic->cbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!dic->cbuf) {
ret = -ENOMEM;
goto out_vunmap_rbuf;
@@ -738,9 +760,9 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)
ret = cops->decompress_pages(dic);
 
 out_vunmap_cbuf:
-   vunmap(dic->cbuf);
+   vm_unmap_ram(dic->cbuf, dic->nr_cpages);
 out_vunmap_rbuf:
-   vunmap(dic->rbuf);
+   vm_unmap_ram(dic->rbuf, dic->cluster_size);
 destroy_decompress_ctx:
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
-- 
2.28.0.236.gb10cc79966-goog



Re: [f2fs-dev] [PATCH] f2fs: change virtual mapping way for compression pages

2020-08-11 Thread Daeho Jeong
Actually, as you can see, I use the whole zero data blocks in the test file.
It can maximize the effect of changing virtual mapping.
When I use normal files which can be compressed about 70% from the
original file,
The vm_map_ram() version is about 2x faster than vmap() version.

2020년 8월 11일 (화) 오후 4:55, Chao Yu 님이 작성:
>
> On 2020/8/11 15:15, Gao Xiang wrote:
> > On Tue, Aug 11, 2020 at 12:37:53PM +0900, Daeho Jeong wrote:
> >> From: Daeho Jeong 
> >>
> >> By profiling f2fs compression works, I've found vmap() callings are
> >> bottlenecks of f2fs decompression path. Changing these with
> >> vm_map_ram(), we can enhance f2fs decompression speed pretty much.
> >>
> >> [Verification]
> >> dd if=/dev/zero of=dummy bs=1m count=1000
> >> echo 3 > /proc/sys/vm/drop_caches
> >> dd if=dummy of=/dev/zero bs=512k
> >>
> >> - w/o compression -
> >> 1048576000 bytes (0.9 G) copied, 1.999384 s, 500 M/s
> >> 1048576000 bytes (0.9 G) copied, 2.035988 s, 491 M/s
> >> 1048576000 bytes (0.9 G) copied, 2.039457 s, 490 M/s
> >>
> >> - before patch -
> >> 1048576000 bytes (0.9 G) copied, 9.146217 s, 109 M/s
> >> 1048576000 bytes (0.9 G) copied, 9.997542 s, 100 M/s
> >> 1048576000 bytes (0.9 G) copied, 10.109727 s, 99 M/s
> >>
> >> - after patch -
> >> 1048576000 bytes (0.9 G) copied, 2.253441 s, 444 M/s
> >> 1048576000 bytes (0.9 G) copied, 2.739764 s, 365 M/s
> >> 1048576000 bytes (0.9 G) copied, 2.185649 s, 458 M/s
> >
> > Indeed, vmap() approach has some impact on the whole
> > workflow. But I don't think the gap is such significant,
> > maybe it relates to unlocked cpufreq (and big little
> > core difference if it's on some arm64 board).
>
> Agreed,
>
> I guess there should be other reason causing the large performance
> gap, scheduling, frequency, or something else.
>
> >
> >
> >
> > ___
> > Linux-f2fs-devel mailing list
> > linux-f2fs-de...@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > .
> >


Re: [f2fs-dev] [PATCH] f2fs: change virtual mapping way for compression pages

2020-08-11 Thread Daeho Jeong
Plus, when we use vmap(), vmap() normally executes in a short time
like vm_map_ram().
But, sometimes, it has a very long delay.

2020년 8월 11일 (화) 오후 6:28, Daeho Jeong 님이 작성:
>
> Actually, as you can see, I use the whole zero data blocks in the test file.
> It can maximize the effect of changing virtual mapping.
> When I use normal files which can be compressed about 70% from the
> original file,
> The vm_map_ram() version is about 2x faster than vmap() version.
>
> 2020년 8월 11일 (화) 오후 4:55, Chao Yu 님이 작성:
> >
> > On 2020/8/11 15:15, Gao Xiang wrote:
> > > On Tue, Aug 11, 2020 at 12:37:53PM +0900, Daeho Jeong wrote:
> > >> From: Daeho Jeong 
> > >>
> > >> By profiling f2fs compression works, I've found vmap() callings are
> > >> bottlenecks of f2fs decompression path. Changing these with
> > >> vm_map_ram(), we can enhance f2fs decompression speed pretty much.
> > >>
> > >> [Verification]
> > >> dd if=/dev/zero of=dummy bs=1m count=1000
> > >> echo 3 > /proc/sys/vm/drop_caches
> > >> dd if=dummy of=/dev/zero bs=512k
> > >>
> > >> - w/o compression -
> > >> 1048576000 bytes (0.9 G) copied, 1.999384 s, 500 M/s
> > >> 1048576000 bytes (0.9 G) copied, 2.035988 s, 491 M/s
> > >> 1048576000 bytes (0.9 G) copied, 2.039457 s, 490 M/s
> > >>
> > >> - before patch -
> > >> 1048576000 bytes (0.9 G) copied, 9.146217 s, 109 M/s
> > >> 1048576000 bytes (0.9 G) copied, 9.997542 s, 100 M/s
> > >> 1048576000 bytes (0.9 G) copied, 10.109727 s, 99 M/s
> > >>
> > >> - after patch -
> > >> 1048576000 bytes (0.9 G) copied, 2.253441 s, 444 M/s
> > >> 1048576000 bytes (0.9 G) copied, 2.739764 s, 365 M/s
> > >> 1048576000 bytes (0.9 G) copied, 2.185649 s, 458 M/s
> > >
> > > Indeed, vmap() approach has some impact on the whole
> > > workflow. But I don't think the gap is such significant,
> > > maybe it relates to unlocked cpufreq (and big little
> > > core difference if it's on some arm64 board).
> >
> > Agreed,
> >
> > I guess there should be other reason causing the large performance
> > gap, scheduling, frequency, or something else.
> >
> > >
> > >
> > >
> > > ___
> > > Linux-f2fs-devel mailing list
> > > linux-f2fs-de...@lists.sourceforge.net
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > > .
> > >


Re: [f2fs-dev] [PATCH] f2fs: change virtual mapping way for compression pages

2020-08-11 Thread Daeho Jeong
Sure, I'll update the test condition as you said in the commit message.
FYI, the test is done with 16kb chunk and Pixel 3 (arm64) device.

Thanks,

2020년 8월 11일 (화) 오후 7:18, Gao Xiang 님이 작성:
>
> On Tue, Aug 11, 2020 at 06:33:26PM +0900, Daeho Jeong wrote:
> > Plus, when we use vmap(), vmap() normally executes in a short time
> > like vm_map_ram().
> > But, sometimes, it has a very long delay.
> >
> > 2020년 8월 11� (화) 오후 6:28, Daeho Jeong 님� 
> > 작성:
> > >
> > > Actually, as you can see, I use the whole zero data blocks in the test 
> > > file.
> > > It can maximize the effect of changing virtual mapping.
> > > When I use normal files which can be compressed about 70% from the
> > > original file,
> > > The vm_map_ram() version is about 2x faster than vmap() version.
>
> What f2fs does is much similar to btrfs compression. Even if these
> blocks are all zeroed. In principle, the maximum compression ratio
> is determined (cluster sized blocks into one compressed block, e.g
> 16k cluster into one compressed block).
>
> So it'd be better to describe your configured cluster size (16k or
> 128k) and your hardware information in the commit message as well.
>
> Actually, I also tried with this patch as well on my x86 laptop just
> now with FIO (I didn't use zeroed block though), and I didn't notice
> much difference with turbo boost off and maxfreq.
>
> I'm not arguing this commit, just a note about this commit message.
> > > > >> 1048576000 bytes (0.9 G) copied, 9.146217 s, 109 M/s
> > > > >> 1048576000 bytes (0.9 G) copied, 9.997542 s, 100 M/s
> > > > >> 1048576000 bytes (0.9 G) copied, 10.109727 s, 99 M/s
>
> IMHO, the above number is much like decompressing in the arm64 little cores.
>
> Thanks,
> Gao Xiang
>
>
> > >
> > > 2020년 8월 11� (화) 오후 4:55, Chao Yu 님� 
> > > 작성:
> > > >
> > > > On 2020/8/11 15:15, Gao Xiang wrote:
> > > > > On Tue, Aug 11, 2020 at 12:37:53PM +0900, Daeho Jeong wrote:
> > > > >> From: Daeho Jeong 
> > > > >>
> > > > >> By profiling f2fs compression works, I've found vmap() callings are
> > > > >> bottlenecks of f2fs decompression path. Changing these with
> > > > >> vm_map_ram(), we can enhance f2fs decompression speed pretty much.
> > > > >>
> > > > >> [Verification]
> > > > >> dd if=/dev/zero of=dummy bs=1m count=1000
> > > > >> echo 3 > /proc/sys/vm/drop_caches
> > > > >> dd if=dummy of=/dev/zero bs=512k
> > > > >>
> > > > >> - w/o compression -
> > > > >> 1048576000 bytes (0.9 G) copied, 1.999384 s, 500 M/s
> > > > >> 1048576000 bytes (0.9 G) copied, 2.035988 s, 491 M/s
> > > > >> 1048576000 bytes (0.9 G) copied, 2.039457 s, 490 M/s
> > > > >>
> > > > >> - before patch -
> > > > >> 1048576000 bytes (0.9 G) copied, 9.146217 s, 109 M/s
> > > > >> 1048576000 bytes (0.9 G) copied, 9.997542 s, 100 M/s
> > > > >> 1048576000 bytes (0.9 G) copied, 10.109727 s, 99 M/s
> > > > >>
> > > > >> - after patch -
> > > > >> 1048576000 bytes (0.9 G) copied, 2.253441 s, 444 M/s
> > > > >> 1048576000 bytes (0.9 G) copied, 2.739764 s, 365 M/s
> > > > >> 1048576000 bytes (0.9 G) copied, 2.185649 s, 458 M/s
> > > > >
> > > > > Indeed, vmap() approach has some impact on the whole
> > > > > workflow. But I don't think the gap is such significant,
> > > > > maybe it relates to unlocked cpufreq (and big little
> > > > > core difference if it's on some arm64 board).
> > > >
> > > > Agreed,
> > > >
> > > > I guess there should be other reason causing the large performance
> > > > gap, scheduling, frequency, or something else.
> > > >
> > > > >
> > > > >
> > > > >
> > > > > ___
> > > > > Linux-f2fs-devel mailing list
> > > > > linux-f2fs-de...@lists.sourceforge.net
> > > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > > > > .
> > > > >
> >
>


Re: [f2fs-dev] [PATCH] f2fs: change virtual mapping way for compression pages

2020-08-11 Thread Daeho Jeong
Plus, differently from your testbed, in my pixel device, there seems
to be much more contention in vmap() operation.
If it's not there, I agree that there might not be a big difference
between vmap() and vm_map_ram().

2020년 8월 11일 (화) 오후 8:29, Gao Xiang 님이 작성:
>
> On Tue, Aug 11, 2020 at 08:21:23PM +0900, Daeho Jeong wrote:
> > Sure, I'll update the test condition as you said in the commit message.
> > FYI, the test is done with 16kb chunk and Pixel 3 (arm64) device.
>
> Yeah, anyway, it'd better to lock the freq and offline the little
> cores in your test as well (it'd make more sense). e.g. if 16k cluster
> is applied, even all data is zeroed, the count of vmap/vm_map_ram
> isn't hugeous (and as you said, "sometimes, it has a very long delay",
> it's much like another scheduling concern as well).
>
> Anyway, I'm not against your commit but the commit message is a bit
> of unclear. At least, if you think that is really the case, I'm ok
> with that.
>
> Thanks,
> Gao Xiang
>
> >
> > Thanks,
> >
> > 2020년 8월 11� (화) 오후 7:18, Gao Xiang 
> > 님� 작성:
> > >
> > > On Tue, Aug 11, 2020 at 06:33:26PM +0900, Daeho Jeong wrote:
> > > > Plus, when we use vmap(), vmap() normally executes in a short time
> > > > like vm_map_ram().
> > > > But, sometimes, it has a very long delay.
> > > >
> > > > 2020년 8� 11� (�) 오후 6:28, Daeho 
> > > > Jeong 님� 작성:
> > > > >
> > > > > Actually, as you can see, I use the whole zero data blocks in the 
> > > > > test file.
> > > > > It can maximize the effect of changing virtual mapping.
> > > > > When I use normal files which can be compressed about 70% from the
> > > > > original file,
> > > > > The vm_map_ram() version is about 2x faster than vmap() version.
> > >
> > > What f2fs does is much similar to btrfs compression. Even if these
> > > blocks are all zeroed. In principle, the maximum compression ratio
> > > is determined (cluster sized blocks into one compressed block, e.g
> > > 16k cluster into one compressed block).
> > >
> > > So it'd be better to describe your configured cluster size (16k or
> > > 128k) and your hardware information in the commit message as well.
> > >
> > > Actually, I also tried with this patch as well on my x86 laptop just
> > > now with FIO (I didn't use zeroed block though), and I didn't notice
> > > much difference with turbo boost off and maxfreq.
> > >
> > > I'm not arguing this commit, just a note about this commit message.
> > > > > > >> 1048576000 bytes (0.9 G) copied, 9.146217 s, 109 M/s
> > > > > > >> 1048576000 bytes (0.9 G) copied, 9.997542 s, 100 M/s
> > > > > > >> 1048576000 bytes (0.9 G) copied, 10.109727 s, 99 M/s
> > >
> > > IMHO, the above number is much like decompressing in the arm64 little 
> > > cores.
> > >
> > > Thanks,
> > > Gao Xiang
> > >
> > >
> > > > >
> > > > > 2020년 8� 11� (�) 오후 4:55, Chao 
> > > > > Yu 님� 작성:
> > > > > >
> > > > > > On 2020/8/11 15:15, Gao Xiang wrote:
> > > > > > > On Tue, Aug 11, 2020 at 12:37:53PM +0900, Daeho Jeong wrote:
> > > > > > >> From: Daeho Jeong 
> > > > > > >>
> > > > > > >> By profiling f2fs compression works, I've found vmap() callings 
> > > > > > >> are
> > > > > > >> bottlenecks of f2fs decompression path. Changing these with
> > > > > > >> vm_map_ram(), we can enhance f2fs decompression speed pretty 
> > > > > > >> much.
> > > > > > >>
> > > > > > >> [Verification]
> > > > > > >> dd if=/dev/zero of=dummy bs=1m count=1000
> > > > > > >> echo 3 > /proc/sys/vm/drop_caches
> > > > > > >> dd if=dummy of=/dev/zero bs=512k
> > > > > > >>
> > > > > > >> - w/o compression -
> > > > > > >> 1048576000 bytes (0.9 G) copied, 1.999384 s, 500 M/s
> > > > > > >> 1048576000 bytes (0.9 G) copied, 2.035988 s, 491 M/s
> > > > > > >> 1048576

[PATCH v2] f2fs: change virtual mapping way for compression pages

2020-08-11 Thread Daeho Jeong
From: Daeho Jeong 

By profiling f2fs compression works, I've found vmap() callings have
unexpected hikes in the execution time in our test environment and
those are bottlenecks of f2fs decompression path. Changing these with
vm_map_ram(), we can enhance f2fs decompression speed pretty much.

[Verification]
Android Pixel 3(ARM64, 6GB RAM, 128GB UFS)
dd if=/dev/zero of=dummy bs=1m count=1000
echo 3 > /proc/sys/vm/drop_caches
dd if=dummy of=/dev/zero bs=512k

- w/o compression -
1048576000 bytes (0.9 G) copied, 1.999384 s, 500 M/s
1048576000 bytes (0.9 G) copied, 2.035988 s, 491 M/s
1048576000 bytes (0.9 G) copied, 2.039457 s, 490 M/s

- before patch -
1048576000 bytes (0.9 G) copied, 9.146217 s, 109 M/s
1048576000 bytes (0.9 G) copied, 9.997542 s, 100 M/s
1048576000 bytes (0.9 G) copied, 10.109727 s, 99 M/s

- after patch -
1048576000 bytes (0.9 G) copied, 2.253441 s, 444 M/s
1048576000 bytes (0.9 G) copied, 2.739764 s, 365 M/s
1048576000 bytes (0.9 G) copied, 2.185649 s, 458 M/s

Signed-off-by: Daeho Jeong 
---
Changes in v2:
 - Added test environment description.
---
 fs/f2fs/compress.c | 42 --
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 6e7db450006c..46b7e359f313 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -554,6 +554,8 @@ static void f2fs_compress_free_page(struct page *page)
mempool_free(page, compress_page_pool);
 }
 
+#define MAX_VMAP_RETRIES   3
+
 static int f2fs_compress_pages(struct compress_ctx *cc)
 {
struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
@@ -590,13 +592,23 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
}
}
 
-   cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   cc->rbuf = vm_map_ram(cc->rpages, cc->cluster_size, -1);
+   if (cc->rbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!cc->rbuf) {
ret = -ENOMEM;
goto out_free_cpages;
}
 
-   cc->cbuf = vmap(cc->cpages, cc->nr_cpages, VM_MAP, PAGE_KERNEL);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   cc->cbuf = vm_map_ram(cc->cpages, cc->nr_cpages, -1);
+   if (cc->cbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!cc->cbuf) {
ret = -ENOMEM;
goto out_vunmap_rbuf;
@@ -624,8 +636,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
memset(&cc->cbuf->cdata[cc->clen], 0,
   (nr_cpages * PAGE_SIZE) - (cc->clen + COMPRESS_HEADER_SIZE));
 
-   vunmap(cc->cbuf);
-   vunmap(cc->rbuf);
+   vm_unmap_ram(cc->cbuf, cc->nr_cpages);
+   vm_unmap_ram(cc->rbuf, cc->cluster_size);
 
for (i = nr_cpages; i < cc->nr_cpages; i++) {
f2fs_compress_free_page(cc->cpages[i]);
@@ -642,9 +654,9 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
return 0;
 
 out_vunmap_cbuf:
-   vunmap(cc->cbuf);
+   vm_unmap_ram(cc->cbuf, cc->nr_cpages);
 out_vunmap_rbuf:
-   vunmap(cc->rbuf);
+   vm_unmap_ram(cc->rbuf, cc->cluster_size);
 out_free_cpages:
for (i = 0; i < cc->nr_cpages; i++) {
if (cc->cpages[i])
@@ -715,13 +727,23 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)
goto out_free_dic;
}
 
-   dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   dic->rbuf = vm_map_ram(dic->tpages, dic->cluster_size, -1);
+   if (dic->rbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!dic->rbuf) {
ret = -ENOMEM;
goto destroy_decompress_ctx;
}
 
-   dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   dic->cbuf = vm_map_ram(dic->cpages, dic->nr_cpages, -1);
+   if (dic->cbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!dic->cbuf) {
ret = -ENOMEM;
goto out_vunmap_rbuf;
@@ -738,9 +760,9 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)
ret = cops->decompress_pages(dic);
 
 out_vunmap_cbuf:
-   vunmap(dic->cbuf);
+   vm_unmap_ram(dic->cbuf, dic->nr_cpages);
 out_vunmap_rbuf:
-   vunmap(dic->rbuf);
+   vm_unmap_ram(dic->rbuf, dic->cluster_size);
 destroy_decompress_ctx:
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
-- 
2.28.0.236.gb10cc79966-goog



[PATCH v3] f2fs: change virtual mapping way for compression pages

2020-08-11 Thread Daeho Jeong
From: Daeho Jeong 

By profiling f2fs compression works, I've found vmap() callings have
unexpected hikes in the execution time in our test environment and
those are bottlenecks of f2fs decompression path. Changing these with
vm_map_ram(), we can enhance f2fs decompression speed pretty much.

[Verification]
Android Pixel 3(ARM64, 6GB RAM, 128GB UFS)
Turned on only 0-3 little cores(at 1.785GHz)

dd if=/dev/zero of=dummy bs=1m count=1000
echo 3 > /proc/sys/vm/drop_caches
dd if=dummy of=/dev/zero bs=512k

- w/o compression -
1048576000 bytes (0.9 G) copied, 2.082554 s, 480 M/s
1048576000 bytes (0.9 G) copied, 2.081634 s, 480 M/s
1048576000 bytes (0.9 G) copied, 2.090861 s, 478 M/s

- before patch -
1048576000 bytes (0.9 G) copied, 7.407527 s, 135 M/s
1048576000 bytes (0.9 G) copied, 7.283734 s, 137 M/s
1048576000 bytes (0.9 G) copied, 7.291508 s, 137 M/s

- after patch -
1048576000 bytes (0.9 G) copied, 1.998959 s, 500 M/s
1048576000 bytes (0.9 G) copied, 1.987554 s, 503 M/s
1048576000 bytes (0.9 G) copied, 1.986380 s, 503 M/s

Signed-off-by: Daeho Jeong 
---
Changes in v2:
 - Added test environment description.
Changes in v3:
 - Re-tested using only ARM little cores and maximzing cpu clock.
---
 fs/f2fs/compress.c | 42 --
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 6e7db450006c..46b7e359f313 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -554,6 +554,8 @@ static void f2fs_compress_free_page(struct page *page)
mempool_free(page, compress_page_pool);
 }
 
+#define MAX_VMAP_RETRIES   3
+
 static int f2fs_compress_pages(struct compress_ctx *cc)
 {
struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
@@ -590,13 +592,23 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
}
}
 
-   cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   cc->rbuf = vm_map_ram(cc->rpages, cc->cluster_size, -1);
+   if (cc->rbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!cc->rbuf) {
ret = -ENOMEM;
goto out_free_cpages;
}
 
-   cc->cbuf = vmap(cc->cpages, cc->nr_cpages, VM_MAP, PAGE_KERNEL);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   cc->cbuf = vm_map_ram(cc->cpages, cc->nr_cpages, -1);
+   if (cc->cbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!cc->cbuf) {
ret = -ENOMEM;
goto out_vunmap_rbuf;
@@ -624,8 +636,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
memset(&cc->cbuf->cdata[cc->clen], 0,
   (nr_cpages * PAGE_SIZE) - (cc->clen + COMPRESS_HEADER_SIZE));
 
-   vunmap(cc->cbuf);
-   vunmap(cc->rbuf);
+   vm_unmap_ram(cc->cbuf, cc->nr_cpages);
+   vm_unmap_ram(cc->rbuf, cc->cluster_size);
 
for (i = nr_cpages; i < cc->nr_cpages; i++) {
f2fs_compress_free_page(cc->cpages[i]);
@@ -642,9 +654,9 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
return 0;
 
 out_vunmap_cbuf:
-   vunmap(cc->cbuf);
+   vm_unmap_ram(cc->cbuf, cc->nr_cpages);
 out_vunmap_rbuf:
-   vunmap(cc->rbuf);
+   vm_unmap_ram(cc->rbuf, cc->cluster_size);
 out_free_cpages:
for (i = 0; i < cc->nr_cpages; i++) {
if (cc->cpages[i])
@@ -715,13 +727,23 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)
goto out_free_dic;
}
 
-   dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   dic->rbuf = vm_map_ram(dic->tpages, dic->cluster_size, -1);
+   if (dic->rbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!dic->rbuf) {
ret = -ENOMEM;
goto destroy_decompress_ctx;
}
 
-   dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO);
+   for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+   dic->cbuf = vm_map_ram(dic->cpages, dic->nr_cpages, -1);
+   if (dic->cbuf)
+   break;
+   vm_unmap_aliases();
+   }
if (!dic->cbuf) {
ret = -ENOMEM;
goto out_vunmap_rbuf;
@@ -738,9 +760,9 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)
ret = cops->decompress_pages(dic);
 
 out_vunmap_cbuf:
-   vunmap(dic->cbuf);
+   vm_unmap_ram(dic->cbuf, dic->nr_cpages);
 out_vunmap_rbuf:
-   vunmap(dic->rbuf);
+   vm_unmap_ram(dic->rbuf, dic->cluster_size);
 destroy_decompress_ctx:
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
-- 
2.28.0.236.gb10cc79966-goog



[PATCH 2/2] f2fs: add ckpt_thread_ioprio sysfs node

2021-01-10 Thread Daeho Jeong
From: Daeho Jeong 

Added "ckpt_thread_ioprio" sysfs node to give a way to change checkpoint
merge daemon's io priority. Its default value is "be,3", which means
"BE" I/O class and I/O priority "3". We can select the class between "rt"
and "be", and set the I/O priority within valid range of it.
"," delimiter is necessary in between I/O class and priority number.

Signed-off-by: Daeho Jeong 
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  8 
 fs/f2fs/checkpoint.c|  3 +-
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/sysfs.c | 51 +
 4 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3dfee94e0618..0c48b2e7dfd4 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -377,3 +377,11 @@ Description:   This gives a control to limit the bio 
size in f2fs.
Default is zero, which will follow underlying block layer limit,
whereas, if it has a certain bytes value, f2fs won't submit a
bio larger than that size.
+What:  /sys/fs/f2fs//ckpt_thread_ioprio
+Date:  January 2021
+Contact:   "Daeho Jeong" 
+Description:   Give a way to change checkpoint merge daemon's io priority.
+   Its default value is "be,3", which means "BE" I/O class and
+   I/O priority "3". We can select the class between "rt" and "be",
+   and set the I/O priority within valid range of it. "," delimiter
+   is necessary in between I/O class and priority number.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 11288f435dbe..37a393f97d5d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1839,6 +1839,7 @@ int f2fs_create_ckpt_req_control(struct f2fs_sb_info *sbi)
atomic_set(&cprc->issued_ckpt, 0);
atomic_set(&cprc->total_ckpt, 0);
atomic_set(&cprc->queued_ckpt, 0);
+   cprc->ckpt_thread_ioprio = DEFAULT_CHECKPOINT_IOPRIO;
init_waitqueue_head(&cprc->ckpt_wait_queue);
init_llist_head(&cprc->issue_list);
sbi->cprc_info = cprc;
@@ -1859,7 +1860,7 @@ int f2fs_create_ckpt_req_control(struct f2fs_sb_info *sbi)
return err;
}
 
-   set_task_ioprio(cprc->f2fs_issue_ckpt, DEFAULT_CHECKPOINT_IOPRIO);
+   set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio);
 
return 0;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4de5285df17d..957bf4c42d40 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -278,6 +278,7 @@ struct ckpt_req {
 
 struct ckpt_req_control {
struct task_struct *f2fs_issue_ckpt;/* checkpoint task */
+   int ckpt_thread_ioprio; /* checkpoint merge thread 
ioprio */
wait_queue_head_t ckpt_wait_queue;  /* waiting queue for wake-up */
atomic_t issued_ckpt;   /* # of actually issued ckpts */
atomic_t total_ckpt;/* # of total ckpts */
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 30bae57428d1..295ebd84986b 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "f2fs.h"
 #include "segment.h"
@@ -34,6 +35,7 @@ enum {
FAULT_INFO_TYPE,/* struct f2fs_fault_info */
 #endif
RESERVED_BLOCKS,/* struct f2fs_sb_info */
+   CPRC_INFO,  /* struct ckpt_req_control */
 };
 
 struct f2fs_attr {
@@ -70,6 +72,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, 
int struct_type)
else if (struct_type == STAT_INFO)
return (unsigned char *)F2FS_STAT(sbi);
 #endif
+   else if (struct_type == CPRC_INFO)
+   return (unsigned char *)sbi->cprc_info;
return NULL;
 }
 
@@ -255,6 +259,23 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
return len;
}
 
+   if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) {
+   struct ckpt_req_control *cprc = sbi->cprc_info;
+   int len = 0;
+   int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio);
+   int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio);
+
+   if (class == IOPRIO_CLASS_RT)
+   len += scnprintf(buf + len, PAGE_SIZE - len, "rt,");
+   else if (class == IOPRIO_CLASS_BE)
+   len += scnprintf(buf + len, PAGE_SIZE - len, "be,");
+   else
+   return -EINVAL;
+
+   len += scnprintf(buf + len, PAGE_SIZE - len, "%d\n", data);
+  

[PATCH 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-10 Thread Daeho Jeong
From: Daeho Jeong 

We've added a new mount option "checkpoint=merge", which creates a
kernel daemon and makes it to merge concurrent checkpoint requests as
much as possible to eliminate redundant checkpoint issues. Plus, we
can eliminate the sluggish issue caused by slow checkpoint operation
when the checkpoint is done in a process context in a cgroup having
low i/o budget and cpu shares, and The below verification result
explains this.
The basic idea has come from https://opensource.samsung.com.

[Verification]
Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)

In "fg" cgroup,
- thread A => trigger 1000 checkpoint operations
  "for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
   done"
- thread B => gererating async. I/O
  "fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
   --filename=test_img --name=test"

In "bg" cgroup,
- thread C => trigger repeated checkpoint operations
  "echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
   fsync test_dir2; done"

We've measured thread A's execution time.

[ w/o patch ]
Elapsed Time: Avg. 68 seconds
[ w/  patch ]
Elapsed Time: Avg. 48 seconds

Signed-off-by: Daeho Jeong 
Signed-off-by: Sungjong Seo 
---
 Documentation/filesystems/f2fs.rst |   6 +
 fs/f2fs/checkpoint.c   | 176 +
 fs/f2fs/debug.c|   6 +
 fs/f2fs/f2fs.h |  24 
 fs/f2fs/super.c|  53 -
 5 files changed, 261 insertions(+), 4 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index dae15c96e659..bccc021bf31a 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -247,6 +247,12 @@ checkpoint=%s[:%u[%]]   Set to "disable" to turn off 
checkpointing. Set to "enabl
 hide up to all remaining free space. The actual space 
that
 would be unusable can be viewed at 
/sys/fs/f2fs//unusable
 This space is reclaimed once checkpoint=enable.
+Here is another option "merge", which creates a kernel 
daemon
+and makes it to merge concurrent checkpoint requests 
as much
+as possible to eliminate redundant checkpoint issues. 
Plus,
+we can eliminate the sluggish issue caused by slow 
checkpoint
+operation when the checkpoint is done in a process 
context in
+a cgroup having low i/o budget and cpu shares.
 compress_algorithm=%s   Control compress algorithm, currently f2fs supports 
"lzo",
 "lz4", "zstd" and "lzo-rle" algorithm.
 compress_log_size=%uSupport configuring compress cluster size, the size 
will
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 897edb7c951a..11288f435dbe 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "f2fs.h"
 #include "node.h"
@@ -20,6 +21,8 @@
 #include "trace.h"
 #include 
 
+#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
+
 static struct kmem_cache *ino_entry_slab;
 struct kmem_cache *f2fs_inode_entry_slab;
 
@@ -1707,3 +1710,176 @@ void f2fs_destroy_checkpoint_caches(void)
kmem_cache_destroy(ino_entry_slab);
kmem_cache_destroy(f2fs_inode_entry_slab);
 }
+
+static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
+{
+   struct cp_control cpc = { .reason = CP_SYNC, };
+   int err;
+
+   down_write(&sbi->gc_lock);
+   err = f2fs_write_checkpoint(sbi, &cpc);
+   up_write(&sbi->gc_lock);
+
+   return err;
+}
+
+static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi)
+{
+   struct ckpt_req_control *cprc = sbi->cprc_info;
+   struct ckpt_req *req, *next;
+   struct llist_node *dispatch_list;
+   int ret;
+
+   dispatch_list = llist_del_all(&cprc->issue_list);
+   if (!dispatch_list)
+   return;
+   dispatch_list = llist_reverse_order(dispatch_list);
+
+   ret = __write_checkpoint_sync(sbi);
+   atomic_inc(&cprc->issued_ckpt);
+
+   llist_for_each_entry_safe(req, next, dispatch_list, llnode) {
+   atomic_dec(&cprc->queued_ckpt);
+   atomic_inc(&cprc->total_ckpt);
+   req->complete_time = jiffies;
+   req->ret = ret;
+   complete(&req->wait);
+   }
+}
+
+static int issue_checkpoint_thread(void *data)
+{
+   struct f2fs_sb_info *sbi = data;
+   struct ckpt_req_control *cprc = sbi->cprc_info;
+   wait_

Re: [f2fs-dev] [PATCH 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-11 Thread Daeho Jeong
Got it~ :)

2021년 1월 11일 (월) 오후 6:50, Jaegeuk Kim 님이 작성:
>
> On 01/11, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > We've added a new mount option "checkpoint=merge", which creates a
> > kernel daemon and makes it to merge concurrent checkpoint requests as
> > much as possible to eliminate redundant checkpoint issues. Plus, we
> > can eliminate the sluggish issue caused by slow checkpoint operation
> > when the checkpoint is done in a process context in a cgroup having
> > low i/o budget and cpu shares, and The below verification result
> > explains this.
> > The basic idea has come from https://opensource.samsung.com.
> >
> > [Verification]
> > Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
> > Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
> >
> > In "fg" cgroup,
> > - thread A => trigger 1000 checkpoint operations
> >   "for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
> >done"
> > - thread B => gererating async. I/O
> >   "fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
> >--filename=test_img --name=test"
> >
> > In "bg" cgroup,
> > - thread C => trigger repeated checkpoint operations
> >   "echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
> >fsync test_dir2; done"
> >
> > We've measured thread A's execution time.
> >
> > [ w/o patch ]
> > Elapsed Time: Avg. 68 seconds
> > [ w/  patch ]
> > Elapsed Time: Avg. 48 seconds
> >
> > Signed-off-by: Daeho Jeong 
> > Signed-off-by: Sungjong Seo 
> > ---
> >  Documentation/filesystems/f2fs.rst |   6 +
> >  fs/f2fs/checkpoint.c   | 176 +
> >  fs/f2fs/debug.c|   6 +
> >  fs/f2fs/f2fs.h |  24 
> >  fs/f2fs/super.c|  53 -
> >  5 files changed, 261 insertions(+), 4 deletions(-)
> >
> > diff --git a/Documentation/filesystems/f2fs.rst 
> > b/Documentation/filesystems/f2fs.rst
> > index dae15c96e659..bccc021bf31a 100644
> > --- a/Documentation/filesystems/f2fs.rst
> > +++ b/Documentation/filesystems/f2fs.rst
> > @@ -247,6 +247,12 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off 
> > checkpointing. Set to "enabl
> >hide up to all remaining free space. The actual 
> > space that
> >would be unusable can be viewed at 
> > /sys/fs/f2fs//unusable
> >This space is reclaimed once checkpoint=enable.
> > +  Here is another option "merge", which creates a 
> > kernel daemon
> > +  and makes it to merge concurrent checkpoint requests 
> > as much
> > +  as possible to eliminate redundant checkpoint 
> > issues. Plus,
> > +  we can eliminate the sluggish issue caused by slow 
> > checkpoint
> > +  operation when the checkpoint is done in a process 
> > context in
> > +  a cgroup having low i/o budget and cpu shares.
> >  compress_algorithm=%s Control compress algorithm, currently f2fs 
> > supports "lzo",
> >"lz4", "zstd" and "lzo-rle" algorithm.
> >  compress_log_size=%u  Support configuring compress cluster size, the size 
> > will
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 897edb7c951a..11288f435dbe 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -13,6 +13,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  #include "f2fs.h"
> >  #include "node.h"
> > @@ -20,6 +21,8 @@
> >  #include "trace.h"
> >  #include 
> >
> > +#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
> > +
> >  static struct kmem_cache *ino_entry_slab;
> >  struct kmem_cache *f2fs_inode_entry_slab;
> >
> > @@ -1707,3 +1710,176 @@ void f2fs_destroy_checkpoint_caches(void)
> >   kmem_cache_destroy(ino_entry_slab);
> >   kmem_cache_destroy(f2fs_inode_entry_slab);
> >  }
> > +
> > +static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
> > +{
> > + struct cp_control cpc = { .reason = CP_SYNC, };
> > + int err;
> > +
> > + down_write(&sbi->gc_l

Re: [f2fs-dev] [PATCH 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-11 Thread Daeho Jeong
2021년 1월 11일 (월) 오후 6:34, Chao Yu 님이 작성:
>
> On 2021/1/11 13:15, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > We've added a new mount option "checkpoint=merge", which creates a
> > kernel daemon and makes it to merge concurrent checkpoint requests as
> > much as possible to eliminate redundant checkpoint issues. Plus, we
> > can eliminate the sluggish issue caused by slow checkpoint operation
> > when the checkpoint is done in a process context in a cgroup having
> > low i/o budget and cpu shares, and The below verification result
> > explains this.
> > The basic idea has come from https://opensource.samsung.com.
>
> Nice proposal, we have the same idea for foreground GC, I guess we
> can upstream this later as well:
>
> https://consumer.huawei.com/en/opensource/
>
> >
> > [Verification]
> > Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
> > Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
> >
> > In "fg" cgroup,
> > - thread A => trigger 1000 checkpoint operations
> >"for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
> > done"
> > - thread B => gererating async. I/O
> >"fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
> > --filename=test_img --name=test"
> >
> > In "bg" cgroup,
> > - thread C => trigger repeated checkpoint operations
> >"echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
> > fsync test_dir2; done"
> >
> > We've measured thread A's execution time.
> >
> > [ w/o patch ]
> > Elapsed Time: Avg. 68 seconds
> > [ w/  patch ]
> > Elapsed Time: Avg. 48 seconds
> >
> > Signed-off-by: Daeho Jeong 
> > Signed-off-by: Sungjong Seo 
> > ---
> >   Documentation/filesystems/f2fs.rst |   6 +
> >   fs/f2fs/checkpoint.c   | 176 +
> >   fs/f2fs/debug.c|   6 +
> >   fs/f2fs/f2fs.h |  24 
> >   fs/f2fs/super.c|  53 -
> >   5 files changed, 261 insertions(+), 4 deletions(-)
> >
> > diff --git a/Documentation/filesystems/f2fs.rst 
> > b/Documentation/filesystems/f2fs.rst
> > index dae15c96e659..bccc021bf31a 100644
> > --- a/Documentation/filesystems/f2fs.rst
> > +++ b/Documentation/filesystems/f2fs.rst
> > @@ -247,6 +247,12 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off 
> > checkpointing. Set to "enabl
> >hide up to all remaining free space. The actual 
> > space that
> >would be unusable can be viewed at 
> > /sys/fs/f2fs//unusable
> >This space is reclaimed once checkpoint=enable.
> > +  Here is another option "merge", which creates a 
> > kernel daemon
> > +  and makes it to merge concurrent checkpoint requests 
> > as much
> > +  as possible to eliminate redundant checkpoint 
> > issues. Plus,
> > +  we can eliminate the sluggish issue caused by slow 
> > checkpoint
> > +  operation when the checkpoint is done in a process 
> > context in
> > +  a cgroup having low i/o budget and cpu shares.
> >   compress_algorithm=%sControl compress algorithm, currently f2fs 
> > supports "lzo",
> >"lz4", "zstd" and "lzo-rle" algorithm.
> >   compress_log_size=%u Support configuring compress cluster size, 
> > the size will
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 897edb7c951a..11288f435dbe 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -13,6 +13,7 @@
> >   #include 
> >   #include 
> >   #include 
> > +#include 
> >
> >   #include "f2fs.h"
> >   #include "node.h"
> > @@ -20,6 +21,8 @@
> >   #include "trace.h"
> >   #include 
> >
> > +#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
> > +
> >   static struct kmem_cache *ino_entry_slab;
> >   struct kmem_cache *f2fs_inode_entry_slab;
> >
> > @@ -1707,3 +1710,176 @@ void f2fs_destroy_checkpoint_caches(void)
> >   kmem_cache_destroy(ino_entry_slab);
> >   kmem_cache_destroy(f2fs_inode_entry_slab);
> >   }
> > +
> > +stati

Re: [f2fs-dev] [PATCH v3] f2fs: fix race of pending_pages in decompression

2020-12-07 Thread Daeho Jeong
Chao, Jaegeuk,

Thanks. I'll update it as your comments. :)

Eric,

Decompression and verity can be executed in different thread contexts
in different timing, so we need separate counts for each.

We already use STEP_VERITY for non-compression case, so I think using
this flag in here looks more making sense.

Thanks,

2020년 12월 8일 (화) 오전 5:31, Eric Biggers 님이 작성:
>
> On Sat, Dec 05, 2020 at 01:26:26PM +0900, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > I found out f2fs_free_dic() is invoked in a wrong timing, but
> > f2fs_verify_bio() still needed the dic info and it triggered the
> > below kernel panic. It has been caused by the race condition of
> > pending_pages value between decompression and verity logic, when
> > the same compression cluster had been split in different bios.
> > By split bios, f2fs_verify_bio() ended up with decreasing
> > pending_pages value before it is reset to nr_cpages by
> > f2fs_decompress_pages() and caused the kernel panic.
> >
> > [ 4416.564763] Unable to handle kernel NULL pointer dereference
> >at virtual address 
> > ...
> > [ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
> > [ 4416.908515] pc : fsverity_verify_page+0x20/0x78
> > [ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
> > [ 4416.913722] sp : ffc019533cd0
> > [ 4416.913723] x29: ffc019533cd0 x28: 0402
> > [ 4416.913724] x27: 0001 x26: 0100
> > [ 4416.913726] x25: 0001 x24: 0004
> > [ 4416.913727] x23: 1000 x22: 
> > [ 4416.913728] x21:  x20: 2076f9c0
> > [ 4416.913729] x19: 2076f9c0 x18: ff8a32380c30
> > [ 4416.913731] x17: ffc01f966d97 x16: 0298
> > [ 4416.913732] x15:  x14: 
> > [ 4416.913733] x13: f074faec89ff x12: 
> > [ 4416.913734] x11: 1000 x10: 1000
> > [ 4416.929176] x9 : 20d1f5c7 x8 : 
> > [ 4416.929178] x7 : 626d7464ff286b6b x6 : ffc019533ade
> > [ 4416.929179] x5 : 8049000e x4 : 2793e9e0
> > [ 4416.929180] x3 : 8049000e x2 : ff89ecfa74d0
> > [ 4416.929181] x1 : 0c40 x0 : 2076f9c0
> > [ 4416.929184] Call trace:
> > [ 4416.929187]  fsverity_verify_page+0x20/0x78
> > [ 4416.929189]  f2fs_verify_bio+0x11c/0x29c
> > [ 4416.929192]  f2fs_verity_work+0x58/0x84
> > [ 4417.050667]  process_one_work+0x270/0x47c
> > [ 4417.055354]  worker_thread+0x27c/0x4d8
> > [ 4417.059784]  kthread+0x13c/0x320
> > [ 4417.063693]  ret_from_fork+0x10/0x18
> >
> > Signed-off-by: Daeho Jeong 
> > Signed-off-by: Jaegeuk Kim 
> > ---
> > v3: back to v1 and enabled verity in a unit of cluster
> > v2: merged verity_pages with pending_pages, and increased the
> > pending_pages count only if STEP_VERITY is set on bio
>
> I am trying to review this but it is very hard, as the f2fs compression code 
> is
> very hard to understand.
>
> It looks like a 'struct decompress_io_ctx' represents the work to decompress a
> particular cluster.  Since the compressed data of the cluster can be read 
> using
> multiple bios, there is a reference count of how many pages are remaining to 
> be
> read before all the cluster's pages have been read and decompression can 
> start.
>
> What I don't understand is why that reference counting needs to work 
> differently
> depending on whether verity is enabled or not.  Shouldn't it be exactly the
> same?
>
> There also seems to be some confusion about the scope of STEP_VERITY.  Before
> f2fs compression was added, it was a per-bio thing.  But now in a compressed
> file, it's really a per-cluster thing, since all decompressed pages in a
> compressed cluster are verified (or not verified) at once.
>
> Wouldn't it make a lot more sense to, when a cluster needs both compression 
> and
> verity, *not* set STEP_VERITY on the bios, but rather set a similar flag in 
> the
> decompress_io_ctx?
>
> - Eric


Re: [f2fs-dev] [PATCH v2 1/2] f2fs: add compress_mode mount option

2020-12-07 Thread Daeho Jeong
compression enabled regular files have different formats of node
metadata on disk. So, using the
"compress_mode=user,compress_extension=*" mount option, we want to
make the metadata of files ready for compression and make them
compressed whenever the user wants using new ioctls.

2020년 12월 8일 (화) 오후 12:51, Eric Biggers 님이 작성:
>
> On Tue, Dec 01, 2020 at 01:08:02PM +0900, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > We will add a new "compress_mode" mount option to control file
> > compression mode. This supports "fs" and "user". In "fs" mode (default),
> > f2fs does automatic compression on the compression enabled files.
> > In "user" mode, f2fs disables the automaic compression and gives the
> > user discretion of choosing the target file and the timing. It means
> > the user can do manual compression/decompression on the compression
> > enabled files using ioctls.
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> > v2: changed mount option name and added more explanation of mount option
> > ---
> >  Documentation/filesystems/f2fs.rst | 35 ++
> >  fs/f2fs/compress.c |  2 +-
> >  fs/f2fs/data.c |  2 +-
> >  fs/f2fs/f2fs.h | 30 +
> >  fs/f2fs/segment.c  |  2 +-
> >  fs/f2fs/super.c| 23 
> >  6 files changed, 91 insertions(+), 3 deletions(-)
> >
> > diff --git a/Documentation/filesystems/f2fs.rst 
> > b/Documentation/filesystems/f2fs.rst
> > index b8ee761c9922..5eb8d63439ec 100644
> > --- a/Documentation/filesystems/f2fs.rst
> > +++ b/Documentation/filesystems/f2fs.rst
> > @@ -260,6 +260,13 @@ compress_extension=%s Support adding specified 
> > extension, so that f2fs can enab
> >For other files, we can still enable compression via 
> > ioctl.
> >Note that, there is one reserved special extension 
> > '*', it
> >can be set to enable compression for all files.
> > +compress_mode=%s  Control file compression mode. This supports "fs" 
> > and "user"
> > +  modes. In "fs" mode (default), f2fs does automatic 
> > compression
> > +  on the compression enabled files. In "user" mode, 
> > f2fs disables
> > +  the automaic compression and gives the user 
> > discretion of
> > +  choosing the target file and the timing. The user 
> > can do manual
> > +  compression/decompression on the compression enabled 
> > files using
> > +  ioctls.
> >  inlinecrypt   When possible, encrypt/decrypt the contents of 
> > encrypted
> >files using the blk-crypto framework rather than
> >filesystem-layer encryption. This allows the use of
> > @@ -810,6 +817,34 @@ Compress metadata layout::
> >   | data length | data chksum | reserved |  compressed data   |
> >   +-+-+--++
> >
> > +Compression mode
> > +--
> > +
> > +f2fs supports "fs" and "user" compression modes with "compression_mode" 
> > mount option.
> > +With this option, f2fs provides a choice to select the way how to compress 
> > the
> > +compression enabled files (refer to "Compression implementation" section 
> > for how to
> > +enable compression on a regular inode).
> > +
> > +1) compress_mode=fs
> > +This is the default option. f2fs does automatic compression in the 
> > writeback of the
> > +compression enabled files.
> > +
> > +2) compress_mode=user
> > +This disables the automaic compression and gives the user discretion of 
> > choosing the
> > +target file and the timing. The user can do manual 
> > compression/decompression on the
> > +compression enabled files using F2FS_IOC_DECOMPRESS_FILE and 
> > F2FS_IOC_COMPRESS_FILE
> > +ioctls like the below.
> > +
> > +To decompress a file,
> > +
> > +fd = open(filename, O_WRONLY, 0);
> > +ret = ioctl(fd, F2FS_IOC_DECOMPRESS_FILE);
> > +
> > +To compress a file,
> > +
> > +fd = open(filename, O_WRONLY, 0);
> > +ret = ioctl(fd, F2FS_IOC_COMPRESS_FILE);
> > +
>
> Why doesn't compress_mode=user just cause regular files to not inherit the
> compression flag?  Then users could set or clear the compression flag using
> FS_IOC_SETFLAGS, without any need for these new ioctls.
>
> - Eric


[PATCH v4 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-18 Thread Daeho Jeong
From: Daeho Jeong 

We've added a new mount option "checkpoint=merge", which creates a
kernel daemon and makes it to merge concurrent checkpoint requests as
much as possible to eliminate redundant checkpoint issues. Plus, we
can eliminate the sluggish issue caused by slow checkpoint operation
when the checkpoint is done in a process context in a cgroup having
low i/o budget and cpu shares. To make this do better, we set the
default i/o priority of the kernel daemon to "3", to give one higher
priority than other kernel threads. The below verification result
explains this.
The basic idea has come from https://opensource.samsung.com.

[Verification]
Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
Set "strict_guarantees" to "1" in BFQ tunables

In "fg" cgroup,
- thread A => trigger 1000 checkpoint operations
  "for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
   done"
- thread B => gererating async. I/O
  "fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
   --filename=test_img --name=test"

In "bg" cgroup,
- thread C => trigger repeated checkpoint operations
  "echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
   fsync test_dir2; done"

We've measured thread A's execution time.

[ w/o patch ]
Elapsed Time: Avg. 68 seconds
[ w/  patch ]
Elapsed Time: Avg. 48 seconds

Signed-off-by: Daeho Jeong 
Signed-off-by: Sungjong Seo 
---
v2:
- inlined ckpt_req_control into f2fs_sb_info and collected stastics
  of checkpoint merge operations
v3:
- fixed some minor errors and cleaned up f2fs_sync_fs()
v4:
- added an explanation to raise the default i/o priority of the
  checkpoint daemon
---
 Documentation/filesystems/f2fs.rst |  10 ++
 fs/f2fs/checkpoint.c   | 177 +
 fs/f2fs/debug.c|  12 ++
 fs/f2fs/f2fs.h |  27 +
 fs/f2fs/super.c|  55 +++--
 5 files changed, 273 insertions(+), 8 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index dae15c96e659..9624a0be0364 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -247,6 +247,16 @@ checkpoint=%s[:%u[%]]   Set to "disable" to turn off 
checkpointing. Set to "enabl
 hide up to all remaining free space. The actual space 
that
 would be unusable can be viewed at 
/sys/fs/f2fs//unusable
 This space is reclaimed once checkpoint=enable.
+Here is another option "merge", which creates a kernel 
daemon
+and makes it to merge concurrent checkpoint requests 
as much
+as possible to eliminate redundant checkpoint issues. 
Plus,
+we can eliminate the sluggish issue caused by slow 
checkpoint
+operation when the checkpoint is done in a process 
context in
+a cgroup having low i/o budget and cpu shares. To make 
this
+do better, we set the default i/o priority of the 
kernel daemon
+to "3", to give one higher priority than other kernel 
threads.
+This is the same way to give a I/O priority to the jbd2
+journaling thread of ext4 filesystem.
 compress_algorithm=%s   Control compress algorithm, currently f2fs supports 
"lzo",
 "lz4", "zstd" and "lzo-rle" algorithm.
 compress_log_size=%uSupport configuring compress cluster size, the size 
will
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 897edb7c951a..ef6ad3d1957d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "f2fs.h"
 #include "node.h"
@@ -20,6 +21,8 @@
 #include "trace.h"
 #include 
 
+#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
+
 static struct kmem_cache *ino_entry_slab;
 struct kmem_cache *f2fs_inode_entry_slab;
 
@@ -1707,3 +1710,177 @@ void f2fs_destroy_checkpoint_caches(void)
kmem_cache_destroy(ino_entry_slab);
kmem_cache_destroy(f2fs_inode_entry_slab);
 }
+
+static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
+{
+   struct cp_control cpc = { .reason = CP_SYNC, };
+   int err;
+
+   down_write(&sbi->gc_lock);
+   err = f2fs_write_checkpoint(sbi, &cpc);
+   up_write(&sbi->gc_lock);
+
+   return err;
+}
+
+static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi)
+{
+   struct ckpt_req_control *cprc = &sbi->cprc_info;
+   struct ckpt_req *

Re: [f2fs-dev] [PATCH v4 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-19 Thread Daeho Jeong
Sorry, I think you were definitely confused.

There is no change for 2/2. So I didn't send it.

Next time, I am going to send it all, even though some of them don't
have changes.

2021년 1월 20일 (수) 오전 4:08, Jaegeuk Kim 님이 작성:
>
> Is there v4 2/2?
>
> On 01/19, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > We've added a new mount option "checkpoint=merge", which creates a
> > kernel daemon and makes it to merge concurrent checkpoint requests as
> > much as possible to eliminate redundant checkpoint issues. Plus, we
> > can eliminate the sluggish issue caused by slow checkpoint operation
> > when the checkpoint is done in a process context in a cgroup having
> > low i/o budget and cpu shares. To make this do better, we set the
> > default i/o priority of the kernel daemon to "3", to give one higher
> > priority than other kernel threads. The below verification result
> > explains this.
> > The basic idea has come from https://opensource.samsung.com.
> >
> > [Verification]
> > Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
> > Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
> > Set "strict_guarantees" to "1" in BFQ tunables
> >
> > In "fg" cgroup,
> > - thread A => trigger 1000 checkpoint operations
> >   "for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
> >done"
> > - thread B => gererating async. I/O
> >   "fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
> >--filename=test_img --name=test"
> >
> > In "bg" cgroup,
> > - thread C => trigger repeated checkpoint operations
> >   "echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
> >fsync test_dir2; done"
> >
> > We've measured thread A's execution time.
> >
> > [ w/o patch ]
> > Elapsed Time: Avg. 68 seconds
> > [ w/  patch ]
> > Elapsed Time: Avg. 48 seconds
> >
> > Signed-off-by: Daeho Jeong 
> > Signed-off-by: Sungjong Seo 
> > ---
> > v2:
> > - inlined ckpt_req_control into f2fs_sb_info and collected stastics
> >   of checkpoint merge operations
> > v3:
> > - fixed some minor errors and cleaned up f2fs_sync_fs()
> > v4:
> > - added an explanation to raise the default i/o priority of the
> >   checkpoint daemon
> > ---
> >  Documentation/filesystems/f2fs.rst |  10 ++
> >  fs/f2fs/checkpoint.c   | 177 +
> >  fs/f2fs/debug.c|  12 ++
> >  fs/f2fs/f2fs.h |  27 +
> >  fs/f2fs/super.c|  55 +++--
> >  5 files changed, 273 insertions(+), 8 deletions(-)
> >
> > diff --git a/Documentation/filesystems/f2fs.rst 
> > b/Documentation/filesystems/f2fs.rst
> > index dae15c96e659..9624a0be0364 100644
> > --- a/Documentation/filesystems/f2fs.rst
> > +++ b/Documentation/filesystems/f2fs.rst
> > @@ -247,6 +247,16 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off 
> > checkpointing. Set to "enabl
> >hide up to all remaining free space. The actual 
> > space that
> >would be unusable can be viewed at 
> > /sys/fs/f2fs//unusable
> >This space is reclaimed once checkpoint=enable.
> > +  Here is another option "merge", which creates a 
> > kernel daemon
> > +  and makes it to merge concurrent checkpoint requests 
> > as much
> > +  as possible to eliminate redundant checkpoint 
> > issues. Plus,
> > +  we can eliminate the sluggish issue caused by slow 
> > checkpoint
> > +  operation when the checkpoint is done in a process 
> > context in
> > +  a cgroup having low i/o budget and cpu shares. To 
> > make this
> > +  do better, we set the default i/o priority of the 
> > kernel daemon
> > +  to "3", to give one higher priority than other 
> > kernel threads.
> > +  This is the same way to give a I/O priority to the 
> > jbd2
> > +  journaling thread of ext4 filesystem.
> >  compress_algorithm=%s Control compress algorithm, currently f2fs 
> > supports "lzo",
> >"lz4", "zstd" and "lzo-rle" algorithm.
> >  compress_log_size=%u  Support c

[PATCH v2 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-13 Thread Daeho Jeong
From: Daeho Jeong 

We've added a new mount option "checkpoint=merge", which creates a
kernel daemon and makes it to merge concurrent checkpoint requests as
much as possible to eliminate redundant checkpoint issues. Plus, we
can eliminate the sluggish issue caused by slow checkpoint operation
when the checkpoint is done in a process context in a cgroup having
low i/o budget and cpu shares, and The below verification result
explains this.
The basic idea has come from https://opensource.samsung.com.

[Verification]
Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
Set "strict_guarantees" to "1" in BFQ tunables

In "fg" cgroup,
- thread A => trigger 1000 checkpoint operations
  "for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
   done"
- thread B => gererating async. I/O
  "fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
   --filename=test_img --name=test"

In "bg" cgroup,
- thread C => trigger repeated checkpoint operations
  "echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
   fsync test_dir2; done"

We've measured thread A's execution time.

[ w/o patch ]
Elapsed Time: Avg. 68 seconds
[ w/  patch ]
Elapsed Time: Avg. 48 seconds

Signed-off-by: Daeho Jeong 
Signed-off-by: Sungjong Seo 
---
v2:
- inlined ckpt_req_control into f2fs_sb_info and collected stastics
  of checkpoint merge operations
---
 Documentation/filesystems/f2fs.rst |   6 ++
 fs/f2fs/checkpoint.c   | 163 +
 fs/f2fs/debug.c|  12 +++
 fs/f2fs/f2fs.h |  27 +
 fs/f2fs/super.c|  56 +-
 5 files changed, 260 insertions(+), 4 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index dae15c96e659..bccc021bf31a 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -247,6 +247,12 @@ checkpoint=%s[:%u[%]]   Set to "disable" to turn off 
checkpointing. Set to "enabl
 hide up to all remaining free space. The actual space 
that
 would be unusable can be viewed at 
/sys/fs/f2fs//unusable
 This space is reclaimed once checkpoint=enable.
+Here is another option "merge", which creates a kernel 
daemon
+and makes it to merge concurrent checkpoint requests 
as much
+as possible to eliminate redundant checkpoint issues. 
Plus,
+we can eliminate the sluggish issue caused by slow 
checkpoint
+operation when the checkpoint is done in a process 
context in
+a cgroup having low i/o budget and cpu shares.
 compress_algorithm=%s   Control compress algorithm, currently f2fs supports 
"lzo",
 "lz4", "zstd" and "lzo-rle" algorithm.
 compress_log_size=%uSupport configuring compress cluster size, the size 
will
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 897edb7c951a..e0668cec3b80 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "f2fs.h"
 #include "node.h"
@@ -20,6 +21,8 @@
 #include "trace.h"
 #include 
 
+#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
+
 static struct kmem_cache *ino_entry_slab;
 struct kmem_cache *f2fs_inode_entry_slab;
 
@@ -1707,3 +1710,163 @@ void f2fs_destroy_checkpoint_caches(void)
kmem_cache_destroy(ino_entry_slab);
kmem_cache_destroy(f2fs_inode_entry_slab);
 }
+
+static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
+{
+   struct cp_control cpc = { .reason = CP_SYNC, };
+   int err;
+
+   down_write(&sbi->gc_lock);
+   err = f2fs_write_checkpoint(sbi, &cpc);
+   up_write(&sbi->gc_lock);
+
+   return err;
+}
+
+static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi)
+{
+   struct ckpt_req_control *cprc = &sbi->cprc_info;
+   struct ckpt_req *req, *next;
+   struct llist_node *dispatch_list;
+   u64 sum_diff = 0, diff, count = 0;
+   int ret;
+
+   dispatch_list = llist_del_all(&cprc->issue_list);
+   if (!dispatch_list)
+   return;
+   dispatch_list = llist_reverse_order(dispatch_list);
+
+   ret = __write_checkpoint_sync(sbi);
+   atomic_inc(&cprc->issued_ckpt);
+
+   llist_for_each_entry_safe(req, next, dispatch_list, llnode) {
+   atomic_dec(&cprc->queued_ckpt);
+   atomic_inc(&cprc->total_ckpt);
+   diff = (u64)ktime_ms_delta(ktime_get(), req->queue_time);
+

[PATCH v2 2/2] f2fs: add ckpt_thread_ioprio sysfs node

2021-01-13 Thread Daeho Jeong
From: Daeho Jeong 

Added "ckpt_thread_ioprio" sysfs node to give a way to change checkpoint
merge daemon's io priority. Its default value is "be,3", which means
"BE" I/O class and I/O priority "3". We can select the class between "rt"
and "be", and set the I/O priority within valid range of it.
"," delimiter is necessary in between I/O class and priority number.

Signed-off-by: Daeho Jeong 
---
v2:
- adapt to inlining ckpt_req_control of f2fs_sb_info
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  8 
 fs/f2fs/checkpoint.c|  2 +-
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/sysfs.c | 51 +
 4 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3dfee94e0618..0c48b2e7dfd4 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -377,3 +377,11 @@ Description:   This gives a control to limit the bio 
size in f2fs.
Default is zero, which will follow underlying block layer limit,
whereas, if it has a certain bytes value, f2fs won't submit a
bio larger than that size.
+What:  /sys/fs/f2fs//ckpt_thread_ioprio
+Date:  January 2021
+Contact:   "Daeho Jeong" 
+Description:   Give a way to change checkpoint merge daemon's io priority.
+   Its default value is "be,3", which means "BE" I/O class and
+   I/O priority "3". We can select the class between "rt" and "be",
+   and set the I/O priority within valid range of it. "," delimiter
+   is necessary in between I/O class and priority number.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index e0668cec3b80..62bd6f449bb7 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1840,7 +1840,7 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
if (IS_ERR(cprc->f2fs_issue_ckpt))
return PTR_ERR(cprc->f2fs_issue_ckpt);
 
-   set_task_ioprio(cprc->f2fs_issue_ckpt, DEFAULT_CHECKPOINT_IOPRIO);
+   set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio);
 
return 0;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f2ae075aa723..517eb0eda638 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -276,6 +276,7 @@ struct ckpt_req {
 
 struct ckpt_req_control {
struct task_struct *f2fs_issue_ckpt;/* checkpoint task */
+   int ckpt_thread_ioprio; /* checkpoint merge thread 
ioprio */
wait_queue_head_t ckpt_wait_queue;  /* waiting queue for wake-up */
atomic_t issued_ckpt;   /* # of actually issued ckpts */
atomic_t total_ckpt;/* # of total ckpts */
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 30bae57428d1..ddd70395148d 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "f2fs.h"
 #include "segment.h"
@@ -34,6 +35,7 @@ enum {
FAULT_INFO_TYPE,/* struct f2fs_fault_info */
 #endif
RESERVED_BLOCKS,/* struct f2fs_sb_info */
+   CPRC_INFO,  /* struct ckpt_req_control */
 };
 
 struct f2fs_attr {
@@ -70,6 +72,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, 
int struct_type)
else if (struct_type == STAT_INFO)
return (unsigned char *)F2FS_STAT(sbi);
 #endif
+   else if (struct_type == CPRC_INFO)
+   return (unsigned char *)&sbi->cprc_info;
return NULL;
 }
 
@@ -255,6 +259,23 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
return len;
}
 
+   if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) {
+   struct ckpt_req_control *cprc = &sbi->cprc_info;
+   int len = 0;
+   int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio);
+   int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio);
+
+   if (class == IOPRIO_CLASS_RT)
+   len += scnprintf(buf + len, PAGE_SIZE - len, "rt,");
+   else if (class == IOPRIO_CLASS_BE)
+   len += scnprintf(buf + len, PAGE_SIZE - len, "be,");
+   else
+   return -EINVAL;
+
+   len += scnprintf(buf + len, PAGE_SIZE - len, "%d\n", data);
+   return len;
+   }
+
ui = (unsigned int *)(ptr + a->offset);
 
return sprintf(buf, "%u\n", *ui);
@@ -308,6 +329,34 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return ret ? ret : count;
}
 
+   if (!strcmp(a->attr.name, 

[PATCH v2] f2fs: add sysfs nodes to get accumulated compression info

2021-03-04 Thread Daeho Jeong
From: Daeho Jeong 

Added acc_compr_inodes to show accumulated compressed inode count and
acc_compr_blocks to show accumulated secured block count with
compression in sysfs. These can be re-initialized to "0" by writing "0"
value in one of both.

Signed-off-by: Daeho Jeong 
---
v2: thanks to kernel test robot , fixed compile issue
related to kernel config.
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 13 +++
 fs/f2fs/checkpoint.c|  8 
 fs/f2fs/compress.c  |  4 +-
 fs/f2fs/data.c  |  2 +-
 fs/f2fs/f2fs.h  | 50 -
 fs/f2fs/file.c  |  8 ++--
 fs/f2fs/inode.c |  1 +
 fs/f2fs/super.c | 10 -
 fs/f2fs/sysfs.c | 45 ++
 include/linux/f2fs_fs.h |  4 +-
 10 files changed, 135 insertions(+), 10 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index cbeac1bebe2f..f4fc87503754 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -409,3 +409,16 @@ Description:   Give a way to change checkpoint merge 
daemon's io priority.
I/O priority "3". We can select the class between "rt" and "be",
and set the I/O priority within valid range of it. "," delimiter
is necessary in between I/O class and priority number.
+
+What:  /sys/fs/f2fs//acc_compr_inodes
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show accumulated compressed inode count. If you write "0" here,
+   you can initialize acc_compr_inodes and acc_compr_blocks as "0".
+
+What:  /sys/fs/f2fs//acc_compr_blocks
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show accumulated secured block count with compression.
+   If you write "0" here, you can initialize acc_compr_inodes and
+   acc_compr_blocks as "0".
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 174a0819ad96..cd944a569162 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1514,6 +1514,14 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
 
if (__remain_node_summaries(cpc->reason)) {
+   /* Record compression statistics in the hot node summary */
+   spin_lock(&sbi->acc_compr_lock);
+   seg_i->journal->info.acc_compr_blocks =
+   cpu_to_le64(sbi->acc_compr_blocks);
+   seg_i->journal->info.acc_compr_inodes =
+   cpu_to_le32(sbi->acc_compr_inodes);
+   spin_unlock(&sbi->acc_compr_lock);
+
f2fs_write_node_summaries(sbi, start_blk);
start_blk += NR_CURSEG_NODE_TYPE;
}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 77fa342de38f..9029e95f4ae4 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1351,8 +1351,8 @@ static int f2fs_write_compressed_pages(struct 
compress_ctx *cc,
}
 
if (fio.compr_blocks)
-   f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
-   f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
+   f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false, 
true);
+   f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true, true);
 
set_inode_flag(cc->inode, FI_APPEND_WRITE);
if (cc->cluster_idx == 0)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b9721c8f116c..d3afb9b0090e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2591,7 +2591,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
ClearPageError(page);
 
if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
-   f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
+   f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false, 
true);
 
/* LFS mode write path */
f2fs_outplace_write_data(&dn, fio);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e2d302ae3a46..a12edf5283cd 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1609,6 +1609,11 @@ struct f2fs_sb_info {
u64 sectors_written_start;
u64 kbytes_written;
 
+   /* For accumulated compression statistics */
+   u64 acc_compr_blocks;
+   u32 acc_compr_inodes;
+   spinlock_t acc_compr_lock;
+
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
 
@@ -3985,6 +3990,43 @@ static inline int __i

[PATCH] f2fs: add sysfs nodes to get accumulated compression info

2021-03-04 Thread Daeho Jeong
From: Daeho Jeong 

Added acc_compr_inodes to show accumulated compressed inode count and
acc_compr_blocks to show accumulated secured block count with
compression in sysfs. These can be re-initialized to "0" by writing "0"
value in one of both.

Signed-off-by: Daeho Jeong 
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 13 +++
 fs/f2fs/checkpoint.c|  8 
 fs/f2fs/compress.c  |  4 +-
 fs/f2fs/data.c  |  2 +-
 fs/f2fs/f2fs.h  | 49 -
 fs/f2fs/file.c  |  8 ++--
 fs/f2fs/inode.c |  1 +
 fs/f2fs/super.c | 10 -
 fs/f2fs/sysfs.c | 45 +++
 include/linux/f2fs_fs.h |  4 +-
 10 files changed, 134 insertions(+), 10 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index cbeac1bebe2f..f4fc87503754 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -409,3 +409,16 @@ Description:   Give a way to change checkpoint merge 
daemon's io priority.
I/O priority "3". We can select the class between "rt" and "be",
and set the I/O priority within valid range of it. "," delimiter
is necessary in between I/O class and priority number.
+
+What:  /sys/fs/f2fs//acc_compr_inodes
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show accumulated compressed inode count. If you write "0" here,
+   you can initialize acc_compr_inodes and acc_compr_blocks as "0".
+
+What:  /sys/fs/f2fs//acc_compr_blocks
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show accumulated secured block count with compression.
+   If you write "0" here, you can initialize acc_compr_inodes and
+   acc_compr_blocks as "0".
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 174a0819ad96..cd944a569162 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1514,6 +1514,14 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
 
if (__remain_node_summaries(cpc->reason)) {
+   /* Record compression statistics in the hot node summary */
+   spin_lock(&sbi->acc_compr_lock);
+   seg_i->journal->info.acc_compr_blocks =
+   cpu_to_le64(sbi->acc_compr_blocks);
+   seg_i->journal->info.acc_compr_inodes =
+   cpu_to_le32(sbi->acc_compr_inodes);
+   spin_unlock(&sbi->acc_compr_lock);
+
f2fs_write_node_summaries(sbi, start_blk);
start_blk += NR_CURSEG_NODE_TYPE;
}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 77fa342de38f..9029e95f4ae4 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1351,8 +1351,8 @@ static int f2fs_write_compressed_pages(struct 
compress_ctx *cc,
}
 
if (fio.compr_blocks)
-   f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
-   f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
+   f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false, 
true);
+   f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true, true);
 
set_inode_flag(cc->inode, FI_APPEND_WRITE);
if (cc->cluster_idx == 0)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b9721c8f116c..d3afb9b0090e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2591,7 +2591,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
ClearPageError(page);
 
if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
-   f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
+   f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false, 
true);
 
/* LFS mode write path */
f2fs_outplace_write_data(&dn, fio);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e2d302ae3a46..4351ca77fa13 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1609,6 +1609,11 @@ struct f2fs_sb_info {
u64 sectors_written_start;
u64 kbytes_written;
 
+   /* For accumulated compression statistics */
+   u64 acc_compr_blocks;
+   u32 acc_compr_inodes;
+   spinlock_t acc_compr_lock;
+
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
 
@@ -3675,10 +3680,46 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct 
f2fs_sb_info *sbi)
if (f2fs_compressed_file(inode))   

[PATCH v2 1/2] f2fs: add compress_mode mount option

2020-11-30 Thread Daeho Jeong
From: Daeho Jeong 

We will add a new "compress_mode" mount option to control file
compression mode. This supports "fs" and "user". In "fs" mode (default),
f2fs does automatic compression on the compression enabled files.
In "user" mode, f2fs disables the automaic compression and gives the
user discretion of choosing the target file and the timing. It means
the user can do manual compression/decompression on the compression
enabled files using ioctls.

Signed-off-by: Daeho Jeong 
---
v2: changed mount option name and added more explanation of mount option
---
 Documentation/filesystems/f2fs.rst | 35 ++
 fs/f2fs/compress.c |  2 +-
 fs/f2fs/data.c |  2 +-
 fs/f2fs/f2fs.h | 30 +
 fs/f2fs/segment.c  |  2 +-
 fs/f2fs/super.c| 23 
 6 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index b8ee761c9922..5eb8d63439ec 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -260,6 +260,13 @@ compress_extension=%s   Support adding specified 
extension, so that f2fs can enab
 For other files, we can still enable compression via 
ioctl.
 Note that, there is one reserved special extension 
'*', it
 can be set to enable compression for all files.
+compress_mode=%sControl file compression mode. This supports "fs" and 
"user"
+modes. In "fs" mode (default), f2fs does automatic 
compression
+on the compression enabled files. In "user" mode, f2fs 
disables
+the automaic compression and gives the user discretion 
of
+choosing the target file and the timing. The user can 
do manual
+compression/decompression on the compression enabled 
files using
+ioctls.
 inlinecrypt When possible, encrypt/decrypt the contents of 
encrypted
 files using the blk-crypto framework rather than
 filesystem-layer encryption. This allows the use of
@@ -810,6 +817,34 @@ Compress metadata layout::
| data length | data chksum | reserved |  compressed data   |
+-+-+--++
 
+Compression mode
+--
+
+f2fs supports "fs" and "user" compression modes with "compression_mode" mount 
option.
+With this option, f2fs provides a choice to select the way how to compress the
+compression enabled files (refer to "Compression implementation" section for 
how to
+enable compression on a regular inode).
+
+1) compress_mode=fs
+This is the default option. f2fs does automatic compression in the writeback 
of the
+compression enabled files.
+
+2) compress_mode=user
+This disables the automaic compression and gives the user discretion of 
choosing the
+target file and the timing. The user can do manual compression/decompression 
on the
+compression enabled files using F2FS_IOC_DECOMPRESS_FILE and 
F2FS_IOC_COMPRESS_FILE
+ioctls like the below.
+
+To decompress a file,
+
+fd = open(filename, O_WRONLY, 0);
+ret = ioctl(fd, F2FS_IOC_DECOMPRESS_FILE);
+
+To compress a file,
+
+fd = open(filename, O_WRONLY, 0);
+ret = ioctl(fd, F2FS_IOC_COMPRESS_FILE);
+
 NVMe Zoned Namespace devices
 
 
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 3957a84a185e..87090da8693d 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -926,7 +926,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t 
index)
 
 static bool cluster_may_compress(struct compress_ctx *cc)
 {
-   if (!f2fs_compressed_file(cc->inode))
+   if (!f2fs_need_compress_data(cc->inode))
return false;
if (f2fs_is_atomic_file(cc->inode))
return false;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index be4da52604ed..42254d3859c7 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3164,7 +3164,7 @@ static inline bool __should_serialize_io(struct inode 
*inode,
if (IS_NOQUOTA(inode))
return false;
 
-   if (f2fs_compressed_file(inode))
+   if (f2fs_need_compress_data(inode))
return true;
if (wbc->sync_mode != WB_SYNC_ALL)
return true;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e0826779a101..94d16bde5e24 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -149,6 +149,7 @@ struct f2fs_mount_info {
unsigned char compress_algorithm;   /* algorithm type */
unsigned compress_log_size; /* clust

[PATCH v2 2/2] f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

2020-11-30 Thread Daeho Jeong
From: Daeho Jeong 

Added two ioctl to decompress/compress explicitly the compression
enabled file in "compress_mode=user" mount option.

Using these two ioctls, the users can make a control of compression
and decompression of their files.

Signed-off-by: Daeho Jeong 
---
v2: reformed codes based on comments and put gradual flush routine
---
 fs/f2fs/file.c| 185 ++
 include/uapi/linux/f2fs.h |   2 +
 2 files changed, 187 insertions(+)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index be8db06aca27..300355fe25f0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4026,6 +4026,185 @@ static int f2fs_ioc_set_compress_option(struct file 
*filp, unsigned long arg)
return ret;
 }
 
+static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
+{
+   DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx);
+   struct address_space *mapping = inode->i_mapping;
+   struct page *page;
+   pgoff_t redirty_idx = page_idx;
+   int i, page_len = 0, ret = 0;
+
+   page_cache_ra_unbounded(&ractl, len, 0);
+
+   for (i = 0; i < len; i++, page_idx++) {
+   page = read_cache_page(mapping, page_idx, NULL, NULL);
+   if (IS_ERR(page)) {
+   ret = PTR_ERR(page);
+   break;
+   }
+   page_len++;
+   }
+
+   for (i = 0; i < page_len; i++, redirty_idx++) {
+   page = find_lock_page(mapping, redirty_idx);
+   if (!page)
+   ret = -ENOENT;
+   set_page_dirty(page);
+   f2fs_put_page(page, 1);
+   f2fs_put_page(page, 0);
+   }
+
+   return ret;
+}
+
+static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
+{
+   struct inode *inode = file_inode(filp);
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+   struct f2fs_inode_info *fi = F2FS_I(inode);
+   pgoff_t page_idx = 0, last_idx;
+   unsigned int blk_per_seg = sbi->blocks_per_seg, dirtied = 0;
+   int cluster_size = F2FS_I(inode)->i_cluster_size;
+   int count, ret;
+
+   if (!f2fs_sb_has_compression(sbi) ||
+   F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
+   return -EOPNOTSUPP;
+
+   if (!(filp->f_mode & FMODE_WRITE))
+   return -EBADF;
+
+   if (!f2fs_compressed_file(inode))
+   return -EINVAL;
+
+   if (!f2fs_is_compress_backend_ready(inode))
+   return -ENOPKG;
+
+   f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+   file_start_write(filp);
+   inode_lock(inode);
+
+   if (f2fs_is_mmap_file(inode)) {
+   ret = -EBUSY;
+   goto out;
+   }
+
+   ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+   if (ret)
+   goto out;
+
+   if (!atomic_read(&fi->i_compr_blocks))
+   goto out;
+
+   last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+   count = last_idx - page_idx;
+   while (count) {
+   int len = min(cluster_size, count);
+
+   ret = redirty_blocks(inode, page_idx, len);
+   if (ret < 0)
+   break;
+
+   count -= len;
+   page_idx += len;
+   dirtied += len;
+   if (dirtied >= blk_per_seg) {
+   filemap_fdatawrite(inode->i_mapping);
+   dirtied = 0;
+   }
+   }
+
+   if (!ret)
+   ret = filemap_write_and_wait_range(inode->i_mapping, 0,
+   LLONG_MAX);
+
+   if (ret)
+   f2fs_warn(sbi, "%s: The file might be partially decompressed "
+   "(errno=%d). Please delete the file.\n",
+   __func__, ret);
+out:
+   inode_unlock(inode);
+   file_end_write(filp);
+
+   return ret;
+}
+
+static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg)
+{
+   struct inode *inode = file_inode(filp);
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+   pgoff_t page_idx = 0, last_idx;
+   unsigned int blk_per_seg = sbi->blocks_per_seg, dirtied = 0;
+   int cluster_size = F2FS_I(inode)->i_cluster_size;
+   int count, ret;
+
+   if (!f2fs_sb_has_compression(sbi) ||
+   F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
+   return -EOPNOTSUPP;
+
+   if (!(filp->f_mode & FMODE_WRITE))
+   return -EBADF;
+
+   if (!f2fs_compressed_file(inode))
+   return -EINVAL;
+
+   if (!f2fs_is_compress_backend_ready(inode))
+   return -ENOPKG;
+
+   f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+   file_start_write(filp);
+   inode_lock(inode);
+

Re: [f2fs-dev] [PATCH 2/2] f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

2020-11-23 Thread Daeho Jeong
Jaegeuk,

My mistake~

Eric,

What I want is like do_page_cache_ra(), but I used
page_cache_ra_unbounded() directly, because we already checked that
read is within i_size.
Or we could use do_page_cache_ra(), but it might do the same check in it again.
What do you think?

I could add some description about these in
Documentation/filesystems/f2fs.rst and I implemented tests internally.

2020년 11월 24일 (화) 오전 3:48, Eric Biggers 님이 작성:
>
> On Mon, Nov 23, 2020 at 12:17:51PM +0900, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > Added two ioctl to decompress/compress explicitly the compression
> > enabled file in "compress_mode=user-based" mount option.
> >
> > Using these two ioctls, the users can make a control of compression
> > and decompression of their files.
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> >  fs/f2fs/file.c| 181 +-
> >  include/uapi/linux/f2fs.h |   2 +
> >  2 files changed, 182 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index be8db06aca27..e8f142470e87 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -4026,6 +4026,180 @@ static int f2fs_ioc_set_compress_option(struct file 
> > *filp, unsigned long arg)
> >   return ret;
> >  }
> >
> > +static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
> > +{
> > + DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx);
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > + struct address_space *mapping = inode->i_mapping;
> > + struct page *page;
> > + pgoff_t redirty_idx = page_idx;
> > + int i, page_len = 0, ret = 0;
> > +
> > + page_cache_ra_unbounded(&ractl, len, 0);
>
> Using page_cache_ra_unbounded() here looks wrong.  See the comment above
> page_cache_ra_unbounded().
>
> >  static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned 
> > long arg)
> >  {
> >   switch (cmd) {
> > @@ -4113,6 +4287,10 @@ static long __f2fs_ioctl(struct file *filp, unsigned 
> > int cmd, unsigned long arg)
> >   return f2fs_ioc_get_compress_option(filp, arg);
> >   case F2FS_IOC_SET_COMPRESS_OPTION:
> >   return f2fs_ioc_set_compress_option(filp, arg);
> > + case F2FS_IOC_DECOMPRESS_FILE:
> > + return f2fs_ioc_decompress_file(filp, arg);
> > + case F2FS_IOC_COMPRESS_FILE:
> > + return f2fs_ioc_compress_file(filp, arg);
> >   default:
> >   return -ENOTTY;
> >   }
>
> Where is the documentation and tests for these new ioctls?
>
> - Eric


Re: [f2fs-dev] [PATCH 1/2] f2fs: add compress_mode mount option

2020-11-23 Thread Daeho Jeong
Jaegeuk,

Got it.

Eric,

Yep.

2020년 11월 24일 (화) 오전 3:46, Eric Biggers 님이 작성:
>
> On Mon, Nov 23, 2020 at 12:17:50PM +0900, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > We will add a new "compress_mode" mount option to control file
> > compression mode. This supports "fs-based" and "user-based".
> > In "fs-based" mode (default), f2fs does automatic compression on
> > the compression enabled files. In "user-based" mode, f2fs disables
> > the automaic compression and gives the user discretion of choosing
> > the target file and the timing. It means the user can do manual
> > compression/decompression on the compression enabled files using ioctls.
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> >  Documentation/filesystems/f2fs.rst |  7 +++
> >  fs/f2fs/data.c | 10 +-
> >  fs/f2fs/f2fs.h | 30 ++
> >  fs/f2fs/segment.c  |  2 +-
> >  fs/f2fs/super.c| 23 +++
> >  5 files changed, 66 insertions(+), 6 deletions(-)
> >
> > diff --git a/Documentation/filesystems/f2fs.rst 
> > b/Documentation/filesystems/f2fs.rst
> > index b8ee761c9922..0679c53d5012 100644
> > --- a/Documentation/filesystems/f2fs.rst
> > +++ b/Documentation/filesystems/f2fs.rst
> > @@ -260,6 +260,13 @@ compress_extension=%s Support adding specified 
> > extension, so that f2fs can enab
> >For other files, we can still enable compression via 
> > ioctl.
> >Note that, there is one reserved special extension 
> > '*', it
> >can be set to enable compression for all files.
> > +compress_mode=%s  Control file compression mode. This supports 
> > "fs-based" and
> > +  "user-based". In "fs-based" mode (default), f2fs does
> > +  automatic compression on the compression enabled 
> > files.
> > +  In "user-based" mode, f2fs disables the automaic 
> > compression
> > +  and gives the user discretion of choosing the target 
> > file and
> > +  the timing. The user can do manual 
> > compression/decompression
> > +  on the compression enabled files using ioctls.
>
> Please clarify in the documentation what it means for compression-enabled 
> files
> to not be compressed.  It is not obvious.
>
> - Eric


Re: [f2fs-dev] [PATCH 2/2] f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

2020-11-23 Thread Daeho Jeong
2020년 11월 24일 (화) 오전 8:29, Eric Biggers 님이 작성:
>
> On Tue, Nov 24, 2020 at 08:02:21AM +0900, Daeho Jeong wrote:
> > Jaegeuk,
> >
> > My mistake~
> >
> > Eric,
> >
> > What I want is like do_page_cache_ra(), but I used
> > page_cache_ra_unbounded() directly, because we already checked that
> > read is within i_size.
> >
> > Or we could use do_page_cache_ra(), but it might do the same check in it 
> > again.
> > What do you think?
>
> page_cache_ra_unbounded() is basically a quirk for how fs-verity is 
> implemented
> in ext4 and f2fs.  I don't think people would be happy if it's used in other
> cases, where it's not needed.  Checking against i_size multiple times is fine.
>

Got your point. Thanks.

> >
> > I could add some description about these in
> > Documentation/filesystems/f2fs.rst and I implemented tests internally.
>
> Documentation in f2fs.rst sounds good.  All the f2fs ioctls should be
> documented there.
>
> The tests should be runnable by any kernel developer; "internal" tests aren't
> very useful.  Could you add tests to xfstests?
>

Yes, I'll add all the internal test cases to xfstests soon~

> - Eric


Re: [f2fs-dev] [PATCH] f2fs: fix race of pending_pages in decompression

2020-12-03 Thread Daeho Jeong
STEP_VERITY is enabled by f2fs_need_verity() and the function is like below.
We already know the second condition (idx < DIV_ROUND_UP...) is
satisfied when invoking f2fs_alloc_dic().

static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
{
return fsverity_active(inode) &&
   idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
}

2020년 12월 4일 (금) 오후 12:28, Eric Biggers 님이 작성:
>
> On Fri, Dec 04, 2020 at 09:58:47AM +0900, Daeho Jeong wrote:
> > diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> > index 87090da8693d..cdf72e153da0 100644
> > --- a/fs/f2fs/compress.c
> > +++ b/fs/f2fs/compress.c
> > @@ -803,8 +803,6 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
> > *page, bool verity)
> >   if (cops->destroy_decompress_ctx)
> >   cops->destroy_decompress_ctx(dic);
> >  out_free_dic:
> > - if (verity)
> > - atomic_set(&dic->pending_pages, dic->nr_cpages);
> >   if (!verity)
> >   f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
> >   ret, false);
> > @@ -1498,6 +1496,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct 
> > compress_ctx *cc)
> >   dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
> >   dic->inode = cc->inode;
> >   atomic_set(&dic->pending_pages, cc->nr_cpages);
> > + if (fsverity_active(cc->inode))
> > + atomic_set(&dic->verity_pages, cc->nr_cpages);
> >   dic->cluster_idx = cc->cluster_idx;
> >   dic->cluster_size = cc->cluster_size;
> >   dic->log_cluster_size = cc->log_cluster_size;
>
> The check for fsverity_active() is wrong.  It looks like you need to know
> whether the bio needs to go through the fs-verity data verification.  The
> correct way to determine that is to check whether STEP_VERITY is enabled in 
> the
> bio's bio_post_read_ctx.  It's set by f2fs_grab_read_bio() when needed.
>
> - Eric


Re: [f2fs-dev] [PATCH] f2fs: fix race of pending_pages in decompression

2020-12-03 Thread Daeho Jeong
> Are you sure?  I thought that compression (and encryption) apply to the whole
> file, including any Merkle tree blocks past i_size.

This "dic" structure is only for "de"compression, so we don't need to
worry about going beyond i_size case.

> Also, even if you include the i_size check, it's still wrong to check
> fsverity_active() in the middle of the I/O because FS_IOC_ENABLE_VERITY can
> execute concurrently, causing fsverity_active() to return false at the 
> beginning
> of the I/O and true later in the I/O.  It needs to be checked only once, at 
> the
> beginning...

Got it. Our previous implementation didn't consider this case. Need to fix this.

Thanks,


Re: [f2fs-dev] [PATCH] f2fs: fix race of pending_pages in decompression

2020-12-03 Thread Daeho Jeong
Eric,

I have another question.
I understand enabling the verity can be possible in the middle of I/O.
Is the opposite way also possible? Actually, I couldn't find any
disabling function of it, though.

2020년 12월 4일 (금) 오후 1:31, Daeho Jeong 님이 작성:
>
> > Are you sure?  I thought that compression (and encryption) apply to the 
> > whole
> > file, including any Merkle tree blocks past i_size.
>
> This "dic" structure is only for "de"compression, so we don't need to
> worry about going beyond i_size case.
>
> > Also, even if you include the i_size check, it's still wrong to check
> > fsverity_active() in the middle of the I/O because FS_IOC_ENABLE_VERITY can
> > execute concurrently, causing fsverity_active() to return false at the 
> > beginning
> > of the I/O and true later in the I/O.  It needs to be checked only once, at 
> > the
> > beginning...
>
> Got it. Our previous implementation didn't consider this case. Need to fix 
> this.
>
> Thanks,


Re: [f2fs-dev] [PATCH] f2fs: fix race of pending_pages in decompression

2020-12-03 Thread Daeho Jeong
I think I don't understand how verity works.
Right after verity is enabled on a file, is the verity logic working
for the whole file data area?
Or it's just working for the data area which is updated after verity is enabled?

2020년 12월 4일 (금) 오후 1:51, Eric Biggers 님이 작성:
>
> On Fri, Dec 04, 2020 at 01:48:24PM +0900, Daeho Jeong wrote:
> > Eric,
> >
> > I have another question.
> > I understand enabling the verity can be possible in the middle of I/O.
> > Is the opposite way also possible? Actually, I couldn't find any
> > disabling function of it, though.
> >
>
> No, disabling fs-verity on a file isn't possible.
>
> - Eric


Re: [f2fs-dev] [PATCH] f2fs: fix race of pending_pages in decompression

2020-12-03 Thread Daeho Jeong
Thanks for the explanation about verity.
I got your point. Thanks~

2020년 12월 4일 (금) 오후 2:18, Eric Biggers 님이 작성:
>
> On Fri, Dec 04, 2020 at 02:00:34PM +0900, Daeho Jeong wrote:
> > I think I don't understand how verity works.
> > Right after verity is enabled on a file, is the verity logic working
> > for the whole file data area?
> > Or it's just working for the data area which is updated after verity is 
> > enabled?
> >
>
> It's for the whole file.
>
> My point is just that if there is a bio that saw that verity isn't enabled yet
> when it started and therefore STEP_VERITY didn't get set in the
> bio_post_read_ctx (or the bio_post_read_ctx didn't get allocated due to one 
> not
> being needed), then the filesystem shouldn't change its mind and try to verify
> the pages when the bio completes if verity happened to be enabled 
> concurrently.
> It's too late for that bio.
>
> - Eric


[PATCH v2] f2fs: fix race of pending_pages in decompression

2020-12-03 Thread Daeho Jeong
From: Daeho Jeong 

I found out f2fs_free_dic() is invoked in a wrong timing, but
f2fs_verify_bio() still needed the dic info and it triggered the
below kernel panic. It has been caused by the race condition of
pending_pages value between decompression and verity logic, when
the same compression cluster had been split in different bios.
By split bios, f2fs_verify_bio() ended up with decreasing
pending_pages value before it is reset to nr_cpages by
f2fs_decompress_pages() and caused the kernel panic.

[ 4416.564763] Unable to handle kernel NULL pointer dereference
   at virtual address 
...
[ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
[ 4416.908515] pc : fsverity_verify_page+0x20/0x78
[ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
[ 4416.913722] sp : ffc019533cd0
[ 4416.913723] x29: ffc019533cd0 x28: 0402
[ 4416.913724] x27: 0001 x26: 0100
[ 4416.913726] x25: 0001 x24: 0004
[ 4416.913727] x23: 1000 x22: 
[ 4416.913728] x21:  x20: 2076f9c0
[ 4416.913729] x19: 2076f9c0 x18: ff8a32380c30
[ 4416.913731] x17: ffc01f966d97 x16: 0298
[ 4416.913732] x15:  x14: 
[ 4416.913733] x13: f074faec89ff x12: 
[ 4416.913734] x11: 1000 x10: 1000
[ 4416.929176] x9 : 20d1f5c7 x8 : 
[ 4416.929178] x7 : 626d7464ff286b6b x6 : ffc019533ade
[ 4416.929179] x5 : 8049000e x4 : 2793e9e0
[ 4416.929180] x3 : 8049000e x2 : ff89ecfa74d0
[ 4416.929181] x1 : 0c40 x0 : 2076f9c0
[ 4416.929184] Call trace:
[ 4416.929187]  fsverity_verify_page+0x20/0x78
[ 4416.929189]  f2fs_verify_bio+0x11c/0x29c
[ 4416.929192]  f2fs_verity_work+0x58/0x84
[ 4417.050667]  process_one_work+0x270/0x47c
[ 4417.055354]  worker_thread+0x27c/0x4d8
[ 4417.059784]  kthread+0x13c/0x320
[ 4417.063693]  ret_from_fork+0x10/0x18

Signed-off-by: Daeho Jeong 
Signed-off-by: Jaegeuk Kim 
---
v2: merged verity_pages with pending_pages, and increased the
pending_pages count only if STEP_VERITY is set on bio
---
 fs/f2fs/compress.c | 2 --
 fs/f2fs/data.c | 2 ++
 fs/f2fs/f2fs.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 87090da8693d..832b19986caf 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -803,8 +803,6 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
 out_free_dic:
-   if (verity)
-   atomic_set(&dic->pending_pages, dic->nr_cpages);
if (!verity)
f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
ret, false);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 42254d3859c7..b825d63cabdd 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2290,6 +2290,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct 
bio **bio_ret,
ctx = bio->bi_private;
if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
+   if (ctx->enabled_steps & (1 << STEP_VERITY))
+   atomic_inc(&dic->pending_pages);
 
inc_page_count(sbi, F2FS_RD_DATA);
f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 94d16bde5e24..a9ee7921c7ec 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1340,7 +1340,7 @@ struct decompress_io_ctx {
struct compress_data *cbuf; /* virtual mapped address on cpages */
size_t rlen;/* valid data length in rbuf */
size_t clen;/* valid data length in cbuf */
-   atomic_t pending_pages; /* in-flight compressed page count */
+   atomic_t pending_pages; /* in-flight compressed + verity page 
count */
bool failed;/* indicate IO error during 
decompression */
void *private;  /* payload buffer for specified 
decompression algorithm */
void *private2; /* extra payload buffer */
-- 
2.29.2.576.ga3fc446d84-goog



Re: [f2fs-dev] [PATCH v2] f2fs: fix race of pending_pages in decompression

2020-12-04 Thread Daeho Jeong
As I told, this patch has a bug. Ignore this one. So, I'm implementing
another one. :(

2020년 12월 4일 (금) 오후 6:50, Daeho Jeong 님이 작성:
>
> As I told, this patch has a bug. Ignore this one. So, I'm implementing 
> another one. :(
>
>
> 2020년 12월 4일 (금) 오후 5:04, Chao Yu 님이 작성:
>>
>> On 2020/12/4 15:28, Daeho Jeong wrote:
>> > From: Daeho Jeong 
>> >
>> > I found out f2fs_free_dic() is invoked in a wrong timing, but
>> > f2fs_verify_bio() still needed the dic info and it triggered the
>> > below kernel panic. It has been caused by the race condition of
>> > pending_pages value between decompression and verity logic, when
>> > the same compression cluster had been split in different bios.
>> > By split bios, f2fs_verify_bio() ended up with decreasing
>> > pending_pages value before it is reset to nr_cpages by
>> > f2fs_decompress_pages() and caused the kernel panic.
>> >
>> > [ 4416.564763] Unable to handle kernel NULL pointer dereference
>> > at virtual address 
>> > ...
>> > [ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
>> > [ 4416.908515] pc : fsverity_verify_page+0x20/0x78
>> > [ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
>> > [ 4416.913722] sp : ffc019533cd0
>> > [ 4416.913723] x29: ffc019533cd0 x28: 0402
>> > [ 4416.913724] x27: 0001 x26: 0100
>> > [ 4416.913726] x25: 0001 x24: 0004
>> > [ 4416.913727] x23: 1000 x22: 
>> > [ 4416.913728] x21:  x20: 2076f9c0
>> > [ 4416.913729] x19: 2076f9c0 x18: ff8a32380c30
>> > [ 4416.913731] x17: ffc01f966d97 x16: 0298
>> > [ 4416.913732] x15:  x14: 
>> > [ 4416.913733] x13: f074faec89ff x12: 
>> > [ 4416.913734] x11: 1000 x10: 1000
>> > [ 4416.929176] x9 : 20d1f5c7 x8 : 
>> > [ 4416.929178] x7 : 626d7464ff286b6b x6 : ffc019533ade
>> > [ 4416.929179] x5 : 8049000e x4 : 2793e9e0
>> > [ 4416.929180] x3 : 8049000e x2 : ff89ecfa74d0
>> > [ 4416.929181] x1 : 0c40 x0 : 2076f9c0
>> > [ 4416.929184] Call trace:
>> > [ 4416.929187]  fsverity_verify_page+0x20/0x78
>> > [ 4416.929189]  f2fs_verify_bio+0x11c/0x29c
>> > [ 4416.929192]  f2fs_verity_work+0x58/0x84
>> > [ 4417.050667]  process_one_work+0x270/0x47c
>> > [ 4417.055354]  worker_thread+0x27c/0x4d8
>> > [ 4417.059784]  kthread+0x13c/0x320
>> > [ 4417.063693]  ret_from_fork+0x10/0x18
>> >
>> > Signed-off-by: Daeho Jeong 
>> > Signed-off-by: Jaegeuk Kim 
>> > ---
>> > v2: merged verity_pages with pending_pages, and increased the
>> >  pending_pages count only if STEP_VERITY is set on bio
>> > ---
>> >   fs/f2fs/compress.c | 2 --
>> >   fs/f2fs/data.c | 2 ++
>> >   fs/f2fs/f2fs.h | 2 +-
>> >   3 files changed, 3 insertions(+), 3 deletions(-)
>> >
>> > diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
>> > index 87090da8693d..832b19986caf 100644
>> > --- a/fs/f2fs/compress.c
>> > +++ b/fs/f2fs/compress.c
>> > @@ -803,8 +803,6 @@ void f2fs_decompress_pages(struct bio *bio, struct 
>> > page *page, bool verity)
>> >   if (cops->destroy_decompress_ctx)
>> >   cops->destroy_decompress_ctx(dic);
>> >   out_free_dic:
>> > - if (verity)
>> > - atomic_set(&dic->pending_pages, dic->nr_cpages);
>> >   if (!verity)
>> >   f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
>> >   ret, false);
>> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> > index 42254d3859c7..b825d63cabdd 100644
>> > --- a/fs/f2fs/data.c
>> > +++ b/fs/f2fs/data.c
>> > @@ -2290,6 +2290,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, 
>> > struct bio **bio_ret,
>> >   ctx = bio->bi_private;
>> >   if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
>> >   ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
>> > + if (ctx->enabled_steps & (1 << STEP_VERITY))
>> > + atomic_inc(&dic->pending_pages);
>>
>> Hmm

Re: [f2fs-dev] [PATCH] f2fs: fix race of pending_pages in decompression

2020-12-04 Thread Daeho Jeong
Yep, we need to come back to v1 and enable verity in a unit of cluster.
Plus, as I told you, I'll prevent newly verity enalbed pages from
being merged with verity disabled bio.

Thanks,

2020년 12월 5일 (토) 오전 3:29, Jaegeuk Kim 님이 작성:
>
> On 12/04, Daeho Jeong wrote:
> > Thanks for the explanation about verity.
> > I got your point. Thanks~
>
> Possible fix can be like this?
>
> ---
>  fs/f2fs/compress.c |  2 --
>  fs/f2fs/data.c | 19 +--
>  2 files changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index 89f73a7c8667..c5fee4d7ea72 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -1491,8 +1491,6 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct 
> compress_ctx *cc)
> dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
> dic->inode = cc->inode;
> atomic_set(&dic->pending_pages, cc->nr_cpages);
> -   if (fsverity_active(cc->inode))
> -   atomic_set(&dic->verity_pages, cc->nr_cpages);
> dic->cluster_idx = cc->cluster_idx;
> dic->cluster_size = cc->cluster_size;
> dic->log_cluster_size = cc->log_cluster_size;
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index e3168f32f943..657fb562d7d4 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1035,7 +1035,8 @@ static inline bool f2fs_need_verity(const struct inode 
> *inode, pgoff_t idx)
>
>  static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
>   unsigned nr_pages, unsigned op_flag,
> - pgoff_t first_idx, bool for_write)
> + pgoff_t first_idx, bool for_write,
> + bool for_verity)
>  {
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> struct bio *bio;
> @@ -1057,7 +1058,7 @@ static struct bio *f2fs_grab_read_bio(struct inode 
> *inode, block_t blkaddr,
> post_read_steps |= 1 << STEP_DECRYPT;
> if (f2fs_compressed_file(inode))
> post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
> -   if (f2fs_need_verity(inode, first_idx))
> +   if (for_verity && f2fs_need_verity(inode, first_idx))
> post_read_steps |= 1 << STEP_VERITY;
>
> if (post_read_steps) {
> @@ -1087,7 +1088,7 @@ static int f2fs_submit_page_read(struct inode *inode, 
> struct page *page,
> struct bio *bio;
>
> bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
> -   page->index, for_write);
> +   page->index, for_write, true);
> if (IS_ERR(bio))
> return PTR_ERR(bio);
>
> @@ -2141,7 +2142,7 @@ static int f2fs_read_single_page(struct inode *inode, 
> struct page *page,
> if (bio == NULL) {
> bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
> is_readahead ? REQ_RAHEAD : 0, page->index,
> -   false);
> +   false, true);
> if (IS_ERR(bio)) {
> ret = PTR_ERR(bio);
> bio = NULL;
> @@ -2188,6 +2189,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, 
> struct bio **bio_ret,
> const unsigned blkbits = inode->i_blkbits;
> const unsigned blocksize = 1 << blkbits;
> struct decompress_io_ctx *dic = NULL;
> +   bool for_verity = false;
> int i;
> int ret = 0;
>
> @@ -2253,6 +2255,11 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, 
> struct bio **bio_ret,
> goto out_put_dnode;
> }
>
> +   if (fsverity_active(cc->inode)) {
> +   atomic_set(&dic->verity_pages, cc->nr_cpages);
> +   for_verity = true;
> +   }
> +
> for (i = 0; i < dic->nr_cpages; i++) {
> struct page *page = dic->cpages[i];
> block_t blkaddr;
> @@ -2272,7 +2279,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, 
> struct bio **bio_ret,
> if (!bio) {
> bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
> is_readahead ? REQ_RAHEAD : 0,
> -   page->index, for_write);
> +   page->index, for_write, for_verity);
> if (IS_ERR(bio)) {
> unsigned int remained = dic->nr_cpages - i;
> 

[PATCH v3] f2fs: fix race of pending_pages in decompression

2020-12-04 Thread Daeho Jeong
From: Daeho Jeong 

I found out f2fs_free_dic() is invoked in a wrong timing, but
f2fs_verify_bio() still needed the dic info and it triggered the
below kernel panic. It has been caused by the race condition of
pending_pages value between decompression and verity logic, when
the same compression cluster had been split in different bios.
By split bios, f2fs_verify_bio() ended up with decreasing
pending_pages value before it is reset to nr_cpages by
f2fs_decompress_pages() and caused the kernel panic.

[ 4416.564763] Unable to handle kernel NULL pointer dereference
   at virtual address 
...
[ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
[ 4416.908515] pc : fsverity_verify_page+0x20/0x78
[ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
[ 4416.913722] sp : ffc019533cd0
[ 4416.913723] x29: ffc019533cd0 x28: 0402
[ 4416.913724] x27: 0001 x26: 0100
[ 4416.913726] x25: 0001 x24: 0004
[ 4416.913727] x23: 1000 x22: 
[ 4416.913728] x21:  x20: 2076f9c0
[ 4416.913729] x19: 2076f9c0 x18: ff8a32380c30
[ 4416.913731] x17: ffc01f966d97 x16: 0298
[ 4416.913732] x15:  x14: 
[ 4416.913733] x13: f074faec89ff x12: 
[ 4416.913734] x11: 1000 x10: 1000
[ 4416.929176] x9 : 20d1f5c7 x8 : 
[ 4416.929178] x7 : 626d7464ff286b6b x6 : ffc019533ade
[ 4416.929179] x5 : 8049000e x4 : 2793e9e0
[ 4416.929180] x3 : 8049000e x2 : ff89ecfa74d0
[ 4416.929181] x1 : 0c40 x0 : 2076f9c0
[ 4416.929184] Call trace:
[ 4416.929187]  fsverity_verify_page+0x20/0x78
[ 4416.929189]  f2fs_verify_bio+0x11c/0x29c
[ 4416.929192]  f2fs_verity_work+0x58/0x84
[ 4417.050667]  process_one_work+0x270/0x47c
[ 4417.055354]  worker_thread+0x27c/0x4d8
[ 4417.059784]  kthread+0x13c/0x320
[ 4417.063693]  ret_from_fork+0x10/0x18

Signed-off-by: Daeho Jeong 
Signed-off-by: Jaegeuk Kim 
---
v3: back to v1 and enabled verity in a unit of cluster
v2: merged verity_pages with pending_pages, and increased the
pending_pages count only if STEP_VERITY is set on bio
---
 fs/f2fs/compress.c |  2 --
 fs/f2fs/data.c | 51 --
 fs/f2fs/f2fs.h |  1 +
 3 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 87090da8693d..832b19986caf 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -803,8 +803,6 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
 out_free_dic:
-   if (verity)
-   atomic_set(&dic->pending_pages, dic->nr_cpages);
if (!verity)
f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
ret, false);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 42254d3859c7..861e5783a5fc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -202,7 +202,7 @@ static void f2fs_verify_bio(struct bio *bio)
dic = (struct decompress_io_ctx *)page_private(page);
 
if (dic) {
-   if (atomic_dec_return(&dic->pending_pages))
+   if (atomic_dec_return(&dic->verity_pages))
continue;
f2fs_verify_pages(dic->rpages,
dic->cluster_size);
@@ -1027,7 +1027,8 @@ static inline bool f2fs_need_verity(const struct inode 
*inode, pgoff_t idx)
 
 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
  unsigned nr_pages, unsigned op_flag,
- pgoff_t first_idx, bool for_write)
+ pgoff_t first_idx, bool for_write,
+ bool for_verity)
 {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
@@ -1049,7 +1050,7 @@ static struct bio *f2fs_grab_read_bio(struct inode 
*inode, block_t blkaddr,
post_read_steps |= 1 << STEP_DECRYPT;
if (f2fs_compressed_file(inode))
post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
-   if (f2fs_need_verity(inode, first_idx))
+   if (for_verity && f2fs_need_verity(inode, first_idx))
post_read_steps |= 1 << STEP_VERITY;
 
if (post_read_steps) {
@@ -1079,7 +1080,7 @@ static int f2fs_submit_page_read(struct inode *inode, 
struct page *page,
struct bio *bio;
 
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
-   page->index, for_write);
+ 

Re: [f2fs-dev] [PATCH v3] f2fs: fix race of pending_pages in decompression

2020-12-06 Thread Daeho Jeong
> It looks like it will be better to move this into merge condition?
>
> if (bio && (!page_is_mergeable(sbi, bio,
> *last_block_in_bio, blkaddr) ||
> !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL) 
> ||
> f2fs_verify_mergeable_bio())) {
>

I tried this for the first time, but it requires unnecessary checks
within the compression cluster.
I wanted to just check one time in the beginning of the cluster.
What do you think?


Re: [f2fs-dev] [PATCH 2/2] f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

2020-11-25 Thread Daeho Jeong
Eric,

do_page_cache_ra() is defined in mm/internal.h for internal use
between in mm, so we cannot use this one right now.
So, I think we could use page_cache_ra_unbounded(), because we already
check i_size boundary on our own.
What do you think?

2020년 11월 24일 (화) 오후 12:05, Chao Yu 님이 작성:
>
> On 2020/11/23 11:17, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > Added two ioctl to decompress/compress explicitly the compression
> > enabled file in "compress_mode=user-based" mount option.
> >
> > Using these two ioctls, the users can make a control of compression
> > and decompression of their files.
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> >   fs/f2fs/file.c| 181 +-
> >   include/uapi/linux/f2fs.h |   2 +
> >   2 files changed, 182 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index be8db06aca27..e8f142470e87 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -4026,6 +4026,180 @@ static int f2fs_ioc_set_compress_option(struct file 
> > *filp, unsigned long arg)
> >   return ret;
> >   }
> >
> > +static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
> > +{
> > + DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx);
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > + struct address_space *mapping = inode->i_mapping;
> > + struct page *page;
> > + pgoff_t redirty_idx = page_idx;
> > + int i, page_len = 0, ret = 0;
> > +
> > + page_cache_ra_unbounded(&ractl, len, 0);
> > +
> > + for (i = 0; i < len; i++, page_idx++) {
> > + page = read_cache_page(mapping, page_idx, NULL, NULL);
> > + if (IS_ERR(page)) {
> > + ret = PTR_ERR(page);
> > + f2fs_warn(sbi, "%s: inode (%lu) : page_index (%lu) "
> > + "couldn't be read (errno:%d).\n",
> > + __func__, inode->i_ino, page_idx, ret);
>
> This is a common error case during calling read_cache_page(), IMO, this looks
> more like a debug log, so I prefer to print nothing here, or at least using
> f2fs_debug() instead.
>
> > + break;
> > + }
> > + page_len++;
> > + }
> > +
> > + for (i = 0; i < page_len; i++, redirty_idx++) {
> > + page = find_lock_page(mapping, redirty_idx);
> > + if (!page) {
> > + ret = -ENOENT;
> > + f2fs_warn(sbi, "%s: inode (%lu) : page_index (%lu) "
> > + "couldn't be found (errno:%d).\n",
> > + __func__, inode->i_ino, redirty_idx, ret);
>
> Ditto.
>
> > + }
> > + set_page_dirty(page);
> > + f2fs_put_page(page, 1);
> > + f2fs_put_page(page, 0);
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
> > +{
> > + struct inode *inode = file_inode(filp);
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > + struct f2fs_inode_info *fi = F2FS_I(inode);
> > + pgoff_t page_idx = 0, last_idx;
> > + int cluster_size = F2FS_I(inode)->i_cluster_size;
> > + int count, ret;
> > +
> > + if (!f2fs_sb_has_compression(sbi))
> > + return -EOPNOTSUPP;
> > +
> > + if (!(filp->f_mode & FMODE_WRITE))
> > + return -EBADF;
> > +
> > + if (!f2fs_compressed_file(inode))
> > + return -EINVAL;
>
> Before compressubg/decompressing file, should we check whether current inode's
> compress algorithm backend is available in f2fs module?
>
> > +
> > + f2fs_balance_fs(F2FS_I_SB(inode), true);
> > +
> > + file_start_write(filp);
> > + inode_lock(inode);
> > +
> > + if (f2fs_is_mmap_file(inode)) {
> > + ret = -EBUSY;
> > + goto out;
> > + }
> > +
> > + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
> > + if (ret)
> > + goto out;
> > +
> > + if (!atomic_read(&fi->i_compr_blocks))
> > + goto out;
> > +
> > + last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
> > +
> > + count = last_idx - pag

Re: [f2fs-dev] [PATCH 2/2] f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

2020-11-25 Thread Daeho Jeong
Chao,

> A little bit wired, why not failing cluster_may_compress() for user mode, and
> let writepages write cluster as raw blocks, in-where we can update 
> i_compr_blocks
> and global compr block stats correctly.

For decompression ioctl, I've made f2fs_need_compress_data() return
"false" to prevent compression write, so we don't use
f2fs_write_compressed_pages() anymore in this case.
Because of this, I manually updated i_compr_blocks. Do you have any
suggestions on this?

2020년 11월 26일 (목) 오후 2:04, Daeho Jeong 님이 작성:
>
> Eric,
>
> do_page_cache_ra() is defined in mm/internal.h for internal use
> between in mm, so we cannot use this one right now.
> So, I think we could use page_cache_ra_unbounded(), because we already
> check i_size boundary on our own.
> What do you think?
>
> 2020년 11월 24일 (화) 오후 12:05, Chao Yu 님이 작성:
> >
> > On 2020/11/23 11:17, Daeho Jeong wrote:
> > > From: Daeho Jeong 
> > >
> > > Added two ioctl to decompress/compress explicitly the compression
> > > enabled file in "compress_mode=user-based" mount option.
> > >
> > > Using these two ioctls, the users can make a control of compression
> > > and decompression of their files.
> > >
> > > Signed-off-by: Daeho Jeong 
> > > ---
> > >   fs/f2fs/file.c| 181 +-
> > >   include/uapi/linux/f2fs.h |   2 +
> > >   2 files changed, 182 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > index be8db06aca27..e8f142470e87 100644
> > > --- a/fs/f2fs/file.c
> > > +++ b/fs/f2fs/file.c
> > > @@ -4026,6 +4026,180 @@ static int f2fs_ioc_set_compress_option(struct 
> > > file *filp, unsigned long arg)
> > >   return ret;
> > >   }
> > >
> > > +static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
> > > +{
> > > + DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx);
> > > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > > + struct address_space *mapping = inode->i_mapping;
> > > + struct page *page;
> > > + pgoff_t redirty_idx = page_idx;
> > > + int i, page_len = 0, ret = 0;
> > > +
> > > + page_cache_ra_unbounded(&ractl, len, 0);
> > > +
> > > + for (i = 0; i < len; i++, page_idx++) {
> > > + page = read_cache_page(mapping, page_idx, NULL, NULL);
> > > + if (IS_ERR(page)) {
> > > + ret = PTR_ERR(page);
> > > + f2fs_warn(sbi, "%s: inode (%lu) : page_index (%lu) "
> > > + "couldn't be read (errno:%d).\n",
> > > + __func__, inode->i_ino, page_idx, ret);
> >
> > This is a common error case during calling read_cache_page(), IMO, this 
> > looks
> > more like a debug log, so I prefer to print nothing here, or at least using
> > f2fs_debug() instead.
> >
> > > + break;
> > > + }
> > > + page_len++;
> > > + }
> > > +
> > > + for (i = 0; i < page_len; i++, redirty_idx++) {
> > > + page = find_lock_page(mapping, redirty_idx);
> > > + if (!page) {
> > > + ret = -ENOENT;
> > > + f2fs_warn(sbi, "%s: inode (%lu) : page_index (%lu) "
> > > + "couldn't be found (errno:%d).\n",
> > > + __func__, inode->i_ino, redirty_idx, ret);
> >
> > Ditto.
> >
> > > + }
> > > + set_page_dirty(page);
> > > + f2fs_put_page(page, 1);
> > > + f2fs_put_page(page, 0);
> > > + }
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
> > > +{
> > > + struct inode *inode = file_inode(filp);
> > > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > > + struct f2fs_inode_info *fi = F2FS_I(inode);
> > > + pgoff_t page_idx = 0, last_idx;
> > > + int cluster_size = F2FS_I(inode)->i_cluster_size;
> > > + int count, ret;
> > > +
> > > + if (!f2fs_sb_has_compression(sbi))
> > > + return -EOPNOTSUPP;
> > > +
> > > +

Re: [f2fs-dev] [PATCH v2 1/2] f2fs: add compress_mode mount option

2020-12-02 Thread Daeho Jeong
Yep, you're right~ :)

2020년 12월 3일 (목) 오전 10:16, Chao Yu 님이 작성:
>
> On 2020/12/2 18:54, Daeho Jeong wrote:
> > We might use compress_extension=*,compress_option=user.
> > In this option, we're gonna allocate all the writes in cold zone.
>
> Oh, so all files in data partition will be tagged as compressed file,
> but the compressing time will be controlled by user, then, only blocks
> belong to in-compress-process inode will go to cold area, right?
>
> Thanks,
>
> >
> >
> > 2020년 12월 2일 (수) 오후 3:31, Chao Yu  > <mailto:yuch...@huawei.com>>님이 작성:
> >
> > On 2020/12/1 12:08, Daeho Jeong wrote:
> >  > From: Daeho Jeong  > <mailto:daehoje...@google.com>>
> >  >
> >  > We will add a new "compress_mode" mount option to control file
> >  > compression mode. This supports "fs" and "user". In "fs" mode 
> > (default),
> >  > f2fs does automatic compression on the compression enabled files.
> >  > In "user" mode, f2fs disables the automaic compression and gives the
> >  > user discretion of choosing the target file and the timing. It means
> >  > the user can do manual compression/decompression on the compression
> >  > enabled files using ioctls.
> >  >
> >  > Signed-off-by: Daeho Jeong  > <mailto:daehoje...@google.com>>
> >  > ---
> >  > v2: changed mount option name and added more explanation of mount 
> > option
> >  > ---
> >  >   Documentation/filesystems/f2fs.rst | 35 
> > ++
> >  >   fs/f2fs/compress.c |  2 +-
> >  >   fs/f2fs/data.c |  2 +-
> >  >   fs/f2fs/f2fs.h | 30 +
> >  >   fs/f2fs/segment.c  |  2 +-
> >  >   fs/f2fs/super.c| 23 
> >  >   6 files changed, 91 insertions(+), 3 deletions(-)
> >  >
> >  > diff --git a/Documentation/filesystems/f2fs.rst 
> > b/Documentation/filesystems/f2fs.rst
> >  > index b8ee761c9922..5eb8d63439ec 100644
> >  > --- a/Documentation/filesystems/f2fs.rst
> >  > +++ b/Documentation/filesystems/f2fs.rst
> >  > @@ -260,6 +260,13 @@ compress_extension=%s Support adding 
> > specified extension, so that f2fs can enab
> >  >For other files, we can still enable 
> > compression via ioctl.
> >  >Note that, there is one reserved special 
> > extension '*', it
> >  >can be set to enable compression for all 
> > files.
> >  > +compress_mode=%s  Control file compression mode. This supports 
> > "fs" and "user"
> >  > +  modes. In "fs" mode (default), f2fs does 
> > automatic compression
> >  > +  on the compression enabled files. In "user" 
> > mode, f2fs disables
> >  > +  the automaic compression and gives the user 
> > discretion of
> >  > +  choosing the target file and the timing. The 
> > user can do manual
> >  > +  compression/decompression on the compression 
> > enabled files using
> >  > +  ioctls.
> >  >   inlinecrypt  When possible, encrypt/decrypt the contents 
> > of encrypted
> >  >files using the blk-crypto framework rather 
> > than
> >  >filesystem-layer encryption. This allows the 
> > use of
> >  > @@ -810,6 +817,34 @@ Compress metadata layout::
> >  >   | data length | data chksum | reserved |  compressed data  
> >  |
> >  >   
> > +-+-+--++
> >  >
> >  > +Compression mode
> >  > +--
> >  > +
> >  > +f2fs supports "fs" and "user" compression modes with 
> > "compression_mode" mount option.
> >  > +With this option, f2fs provides a choice to select the way how to 
> > compress the
> >  > +compression enabled files (refer to "Compression implementation" 
> > section for how to
> >  > +enable compression on a reg

Re: [f2fs-dev] [PATCH v7 2/2] f2fs: add F2FS_IOC_SET_COMPRESS_OPTION ioctl

2020-12-02 Thread Daeho Jeong
Oh, by the control logic changed in the previous patch, we don't need
to use f2fs_is_compress_algorithm_valid() anymore.
Looks good~

2020년 12월 3일 (목) 오전 11:49, Chao Yu 님이 작성:
>
> Jaegeuk, not sure, is it too late to merge this cleanup into original patch?
>
>  From a5c63ec58e0cda6eb5d186b46942eea46422b7a9 Mon Sep 17 00:00:00 2001
> From: Chao Yu 
> Date: Thu, 3 Dec 2020 10:04:26 +0800
> Subject: [PATCH] f2fs: remove f2fs_is_compress_algorithm_valid() for cleanup
>
> No logic changes.
>
> Signed-off-by: Chao Yu 
> ---
>   fs/f2fs/compress.c | 5 -
>   fs/f2fs/f2fs.h | 5 -
>   fs/f2fs/file.c | 2 +-
>   3 files changed, 1 insertion(+), 11 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index dfadbc78946c..869b047a4801 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -574,11 +574,6 @@ bool f2fs_is_compress_backend_ready(struct inode *inode)
> return f2fs_cops[F2FS_I(inode)->i_compress_algorithm];
>   }
>
> -bool f2fs_is_compress_algorithm_valid(unsigned char algorithm)
> -{
> -   return f2fs_cops[algorithm] != NULL;
> -}
> -
>   static mempool_t *compress_page_pool;
>   static int num_compress_pages = 512;
>   module_param(num_compress_pages, uint, 0444);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index b70c8d553439..17b45c2d2b04 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3882,7 +3882,6 @@ bool f2fs_compress_write_end(struct inode *inode, void 
> *fsdata,
>   int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock);
>   void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
>   bool f2fs_is_compress_backend_ready(struct inode *inode);
> -bool f2fs_is_compress_algorithm_valid(unsigned char algorithm);
>   int f2fs_init_compress_mempool(void);
>   void f2fs_destroy_compress_mempool(void);
>   void f2fs_do_decompress_pages(struct decompress_io_ctx *dic, bool verity);
> @@ -3927,10 +3926,6 @@ static inline bool 
> f2fs_is_compress_backend_ready(struct inode *inode)
> /* not support compression */
> return false;
>   }
> -static inline bool f2fs_is_compress_algorithm_valid(unsigned char algorithm)
> -{
> -   return false;
> -}
>   static inline struct page *f2fs_compress_control_page(struct page *page)
>   {
> WARN_ON_ONCE(1);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 300355fe25f0..0453b441228d 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -4016,7 +4016,7 @@ static int f2fs_ioc_set_compress_option(struct file 
> *filp, unsigned long arg)
> F2FS_I(inode)->i_cluster_size = 1 << option.log_cluster_size;
> f2fs_mark_inode_dirty_sync(inode, true);
>
> -   if (!f2fs_is_compress_algorithm_valid(option.algorithm))
> +   if (!f2fs_is_compress_backend_ready(inode))
> f2fs_warn(sbi, "compression algorithm is successfully set, "
> "but current kernel doesn't support this algorithm.");
>   out:
> --
> 2.26.2
>
>
>
>
>
> On 2020/10/30 12:10, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > Added a new F2FS_IOC_SET_COMPRESS_OPTION ioctl to change file
> > compression option of a file.
> >
> > struct f2fs_comp_option {
> >  u8 algorithm; => compression algorithm
> >    => 0:lzo, 1:lz4, 2:zstd, 3:lzorle
> >  u8 log_cluster_size;  => log scale cluster size
> >=> 2 ~ 8
> > };
> >
> > struct f2fs_comp_option option;
> >
> > option.algorithm = 1;
> > option.log_cluster_size = 7;
> >
> > ioctl(fd, F2FS_IOC_SET_COMPRESS_OPTION, &option);
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> >
> > v6: changed the function name of checking compression algorithm validity.
> > v5: allowed to set algorithm which is not currently enabled by kernel.
> > v4: changed commit message.
> > v3: changed the error number more specific.
> >  folded in fix for build breakage reported by kernel test robot
> >   and Dan Carpenter .
> > v2: added ioctl description.
> > ---
> >   fs/f2fs/compress.c |  5 +
> >   fs/f2fs/f2fs.h |  7 ++
> >   fs/f2fs/file.c | 54 ++
> >   3 files changed, 66 insertions(+)
> >
> > diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> > index 7895186cc765..b0144670d320 100644
> > --- a/fs/f2fs/compress.c
> > +++ b/fs/f2fs/compress.c
> > @@ -514,6 +514,11 @@ bool f2fs_is_compress_backend_ready(struct inode 
> > *inode)
> >  

[PATCH v3] f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

2020-12-02 Thread Daeho Jeong
From: Daeho Jeong 

Added two ioctl to decompress/compress explicitly the compression
enabled file in "compress_mode=user" mount option.

Using these two ioctls, the users can make a control of compression
and decompression of their files.

Signed-off-by: Daeho Jeong 
---
v3: changed error condition and use get_dirty_pages for flush routine
v2: reformed codes based on comments and put gradual flush routine
---
 fs/f2fs/file.c| 185 ++
 include/uapi/linux/f2fs.h |   2 +
 2 files changed, 187 insertions(+)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index be8db06aca27..3678e25ed17a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4026,6 +4026,185 @@ static int f2fs_ioc_set_compress_option(struct file 
*filp, unsigned long arg)
return ret;
 }
 
+static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
+{
+   DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx);
+   struct address_space *mapping = inode->i_mapping;
+   struct page *page;
+   pgoff_t redirty_idx = page_idx;
+   int i, page_len = 0, ret = 0;
+
+   page_cache_ra_unbounded(&ractl, len, 0);
+
+   for (i = 0; i < len; i++, page_idx++) {
+   page = read_cache_page(mapping, page_idx, NULL, NULL);
+   if (IS_ERR(page)) {
+   ret = PTR_ERR(page);
+   break;
+   }
+   page_len++;
+   }
+
+   for (i = 0; i < page_len; i++, redirty_idx++) {
+   page = find_lock_page(mapping, redirty_idx);
+   if (!page)
+   ret = -ENOENT;
+   set_page_dirty(page);
+   f2fs_put_page(page, 1);
+   f2fs_put_page(page, 0);
+   }
+
+   return ret;
+}
+
+static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
+{
+   struct inode *inode = file_inode(filp);
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+   struct f2fs_inode_info *fi = F2FS_I(inode);
+   pgoff_t page_idx = 0, last_idx;
+   unsigned int blk_per_seg = sbi->blocks_per_seg;
+   int cluster_size = F2FS_I(inode)->i_cluster_size;
+   int count, ret;
+
+   if (!f2fs_sb_has_compression(sbi) ||
+   F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
+   return -EOPNOTSUPP;
+
+   if (!(filp->f_mode & FMODE_WRITE))
+   return -EBADF;
+
+   if (!f2fs_compressed_file(inode))
+   return -EINVAL;
+
+   f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+   file_start_write(filp);
+   inode_lock(inode);
+
+   if (!f2fs_is_compress_backend_ready(inode)) {
+   ret = -EOPNOTSUPP;
+   goto out;
+   }
+
+   if (f2fs_is_mmap_file(inode)) {
+   ret = -EBUSY;
+   goto out;
+   }
+
+   ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+   if (ret)
+   goto out;
+
+   if (!atomic_read(&fi->i_compr_blocks))
+   goto out;
+
+   last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+   count = last_idx - page_idx;
+   while (count) {
+   int len = min(cluster_size, count);
+
+   ret = redirty_blocks(inode, page_idx, len);
+   if (ret < 0)
+   break;
+
+   if (get_dirty_pages(inode) >= blk_per_seg)
+   filemap_fdatawrite(inode->i_mapping);
+
+   count -= len;
+   page_idx += len;
+   }
+
+   if (!ret)
+   ret = filemap_write_and_wait_range(inode->i_mapping, 0,
+   LLONG_MAX);
+
+   if (ret)
+   f2fs_warn(sbi, "%s: The file might be partially decompressed "
+   "(errno=%d). Please delete the file.\n",
+   __func__, ret);
+out:
+   inode_unlock(inode);
+   file_end_write(filp);
+
+   return ret;
+}
+
+static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg)
+{
+   struct inode *inode = file_inode(filp);
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+   pgoff_t page_idx = 0, last_idx;
+   unsigned int blk_per_seg = sbi->blocks_per_seg;
+   int cluster_size = F2FS_I(inode)->i_cluster_size;
+   int count, ret;
+
+   if (!f2fs_sb_has_compression(sbi) ||
+   F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
+   return -EOPNOTSUPP;
+
+   if (!(filp->f_mode & FMODE_WRITE))
+   return -EBADF;
+
+   if (!f2fs_compressed_file(inode))
+   return -EINVAL;
+
+   f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+   file_start_write(filp);
+   inode_lock(inode);
+
+   if (!f2fs_is_compress_backend_ready(inode)) {
+ 

[PATCH] f2fs: fix race of pending_pages in decompression

2020-12-03 Thread Daeho Jeong
From: Daeho Jeong 

I found out f2fs_free_dic() is invoked in a wrong timing, but
f2fs_verify_bio() still needed the dic info and it triggered the
below kernel panic. It has been caused by the race condition of
pending_pages value between decompression and verity logic, when
the same compression cluster had been split in different bios.
By split bios, f2fs_verify_bio() ended up with decreasing
pending_pages value before it is reset to nr_cpages by
f2fs_decompress_pages() and caused the kernel panic.

[ 4416.564763] Unable to handle kernel NULL pointer dereference
   at virtual address 
...
[ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
[ 4416.908515] pc : fsverity_verify_page+0x20/0x78
[ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
[ 4416.913722] sp : ffc019533cd0
[ 4416.913723] x29: ffc019533cd0 x28: 0402
[ 4416.913724] x27: 0001 x26: 0100
[ 4416.913726] x25: 0001 x24: 0004
[ 4416.913727] x23: 1000 x22: 
[ 4416.913728] x21:  x20: 2076f9c0
[ 4416.913729] x19: 2076f9c0 x18: ff8a32380c30
[ 4416.913731] x17: ffc01f966d97 x16: 0298
[ 4416.913732] x15:  x14: 
[ 4416.913733] x13: f074faec89ff x12: 
[ 4416.913734] x11: 1000 x10: 1000
[ 4416.929176] x9 : 20d1f5c7 x8 : 
[ 4416.929178] x7 : 626d7464ff286b6b x6 : ffc019533ade
[ 4416.929179] x5 : 8049000e x4 : 2793e9e0
[ 4416.929180] x3 : 8049000e x2 : ff89ecfa74d0
[ 4416.929181] x1 : 0c40 x0 : 2076f9c0
[ 4416.929184] Call trace:
[ 4416.929187]  fsverity_verify_page+0x20/0x78
[ 4416.929189]  f2fs_verify_bio+0x11c/0x29c
[ 4416.929192]  f2fs_verity_work+0x58/0x84
[ 4417.050667]  process_one_work+0x270/0x47c
[ 4417.055354]  worker_thread+0x27c/0x4d8
[ 4417.059784]  kthread+0x13c/0x320
[ 4417.063693]  ret_from_fork+0x10/0x18

Signed-off-by: Daeho Jeong 
---
 fs/f2fs/compress.c |  4 ++--
 fs/f2fs/data.c | 24 +++-
 fs/f2fs/f2fs.h |  1 +
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 87090da8693d..cdf72e153da0 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -803,8 +803,6 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
 out_free_dic:
-   if (verity)
-   atomic_set(&dic->pending_pages, dic->nr_cpages);
if (!verity)
f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
ret, false);
@@ -1498,6 +1496,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct 
compress_ctx *cc)
dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
dic->inode = cc->inode;
atomic_set(&dic->pending_pages, cc->nr_cpages);
+   if (fsverity_active(cc->inode))
+   atomic_set(&dic->verity_pages, cc->nr_cpages);
dic->cluster_idx = cc->cluster_idx;
dic->cluster_size = cc->cluster_size;
dic->log_cluster_size = cc->log_cluster_size;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 42254d3859c7..be0567dcace9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -202,7 +202,7 @@ static void f2fs_verify_bio(struct bio *bio)
dic = (struct decompress_io_ctx *)page_private(page);
 
if (dic) {
-   if (atomic_dec_return(&dic->pending_pages))
+   if (atomic_dec_return(&dic->verity_pages))
continue;
f2fs_verify_pages(dic->rpages,
dic->cluster_size);
@@ -2266,15 +2266,29 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, 
struct bio **bio_ret,
is_readahead ? REQ_RAHEAD : 0,
page->index, for_write);
if (IS_ERR(bio)) {
+   unsigned int remained = dic->nr_cpages - i;
+   bool release = false;
+
ret = PTR_ERR(bio);
dic->failed = true;
-   if (!atomic_sub_return(dic->nr_cpages - i,
-   &dic->pending_pages)) {
+
+   if (fsverity_active(inode)) {
+   if (!atomic_sub_return(remained,
+   &dic->verity_pages))
+   release = true;
+   } else {
+  

Re: [f2fs-dev] [PATCH v3] f2fs: add sysfs nodes to get runtime compression stat

2021-03-12 Thread Daeho Jeong
We're going to use these just accumulating numbers and periodically
gather and reset them.
Yes, I will add the description like you suggested.

Thanks,

2021년 3월 12일 (금) 오후 6:50, Chao Yu 님이 작성:
>
> On 2021/3/11 10:32, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > I've added new sysfs nodes to show runtime compression stat since mount.
> > compr_written_block - show the block count written after compression
> > compr_saved_block - show the saved block count with compression
> > compr_new_inode - show the count of inode newly enabled for compression
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> > v2: thanks to kernel test robot , fixed compile issue
> >  related to kernel config.
> > v3: changed sysfs nodes' names and made them runtime stat, not
> >  persistent on disk
> > ---
> >   Documentation/ABI/testing/sysfs-fs-f2fs | 20 +
> >   fs/f2fs/compress.c  |  1 +
> >   fs/f2fs/f2fs.h  | 19 
> >   fs/f2fs/super.c |  7 +++
> >   fs/f2fs/sysfs.c | 58 +
> >   5 files changed, 105 insertions(+)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
> > b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index cbeac1bebe2f..f2981eb319cb 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -409,3 +409,23 @@ Description: Give a way to change checkpoint merge 
> > daemon's io priority.
> >   I/O priority "3". We can select the class between "rt" and 
> > "be",
> >   and set the I/O priority within valid range of it. "," 
> > delimiter
> >   is necessary in between I/O class and priority number.
> > +
> > +What:/sys/fs/f2fs//compr_written_block
> > +Date:March 2021
> > +Contact: "Daeho Jeong" 
> > +Description: Show the block count written after compression since mount.
> > + If you write "0" here, you can initialize compr_written_block 
> > and
> > + compr_saved_block to "0".
> > +
> > +What:/sys/fs/f2fs//compr_saved_block
> > +Date:March 2021
> > +Contact: "Daeho Jeong" 
> > +Description: Show the saved block count with compression since mount.
> > + If you write "0" here, you can initialize compr_written_block 
> > and
> > + compr_saved_block to "0".
> > +
> > +What:/sys/fs/f2fs//compr_new_inode
> > +Date:March 2021
> > +Contact: "Daeho Jeong" 
> > +Description: Show the count of inode newly enabled for compression since 
> > mount.
> > + If you write "0" here, you can initialize compr_new_inode to 
> > "0".
> > diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> > index 77fa342de38f..3c9d797dbdd6 100644
> > --- a/fs/f2fs/compress.c
> > +++ b/fs/f2fs/compress.c
> > @@ -1353,6 +1353,7 @@ static int f2fs_write_compressed_pages(struct 
> > compress_ctx *cc,
> >   if (fio.compr_blocks)
> >   f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, 
> > false);
> >   f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
> > + add_compr_block_stat(inode, cc->nr_cpages);
>
> If compressed cluster was overwritten as normal cluster, compr_saved_block 
> value
> won't be decreased, is it fine?
>
> >
> >   set_inode_flag(cc->inode, FI_APPEND_WRITE);
> >   if (cc->cluster_idx == 0)
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index e2d302ae3a46..2c989f8caf05 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1623,6 +1623,11 @@ struct f2fs_sb_info {
> >   #ifdef CONFIG_F2FS_FS_COMPRESSION
> >   struct kmem_cache *page_array_slab; /* page array entry */
> >   unsigned int page_array_slab_size;  /* default page array slab 
> > size */
> > +
> > + /* For runtime compression statistics */
> > + atomic64_t compr_written_block;
> > + atomic64_t compr_saved_block;
> > + atomic_t compr_new_inode;
> >   #endif
> >   };
> >
> > @@ -3955,6 +3960,18 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info 
> > *sbi);
> >   void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
> >   int __init f2fs_init_c

[PATCH v4] f2fs: add sysfs nodes to get runtime compression stat

2021-03-12 Thread Daeho Jeong
From: Daeho Jeong 

I've added new sysfs nodes to show runtime compression stat since mount.
compr_written_block - show the block count written after compression
compr_saved_block - show the saved block count with compression
compr_new_inode - show the count of inode newly enabled for compression

Signed-off-by: Daeho Jeong 
---
v2: thanks to kernel test robot , fixed compile issue
related to kernel config
v3: changed sysfs nodes' names and made them runtime stat, not
persistent on disk
v4: changed sysfs nodes' desctiption
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 24 ++
 fs/f2fs/compress.c  |  1 +
 fs/f2fs/f2fs.h  | 19 
 fs/f2fs/super.c |  7 +++
 fs/f2fs/sysfs.c | 58 +
 5 files changed, 109 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index cbeac1bebe2f..ddd4bd6116fc 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -409,3 +409,27 @@ Description:   Give a way to change checkpoint merge 
daemon's io priority.
I/O priority "3". We can select the class between "rt" and "be",
and set the I/O priority within valid range of it. "," delimiter
is necessary in between I/O class and priority number.
+
+What:  /sys/fs/f2fs//compr_written_block
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show the block count written after compression since mount. Note
+   that when the compressed blocks are deleted, this count doesn't
+   decrease. If you write "0" here, you can initialize
+   compr_written_block and compr_saved_block to "0".
+
+What:  /sys/fs/f2fs//compr_saved_block
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show the saved block count with compression since mount. Note
+   that when the compressed blocks are deleted, this count doesn't
+   decrease. If you write "0" here, you can initialize
+   compr_written_block and compr_saved_block to "0".
+
+What:  /sys/fs/f2fs//compr_new_inode
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show the count of inode newly enabled for compression since 
mount.
+   Note that when the compression is disabled for the files, this 
count
+   doesn't decrease. If you write "0" here, you can initialize
+   compr_new_inode to "0".
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 77fa342de38f..3c9d797dbdd6 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1353,6 +1353,7 @@ static int f2fs_write_compressed_pages(struct 
compress_ctx *cc,
if (fio.compr_blocks)
f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
+   add_compr_block_stat(inode, cc->nr_cpages);
 
set_inode_flag(cc->inode, FI_APPEND_WRITE);
if (cc->cluster_idx == 0)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e2d302ae3a46..2c989f8caf05 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1623,6 +1623,11 @@ struct f2fs_sb_info {
 #ifdef CONFIG_F2FS_FS_COMPRESSION
struct kmem_cache *page_array_slab; /* page array entry */
unsigned int page_array_slab_size;  /* default page array slab size 
*/
+
+   /* For runtime compression statistics */
+   atomic64_t compr_written_block;
+   atomic64_t compr_saved_block;
+   atomic_t compr_new_inode;
 #endif
 };
 
@@ -3955,6 +3960,18 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
 void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
 int __init f2fs_init_compress_cache(void);
 void f2fs_destroy_compress_cache(void);
+#define inc_compr_inode_stat(inode)\
+   do {\
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);\
+   atomic_inc(&sbi->compr_new_inode);  \
+   } while (0)
+#define add_compr_block_stat(inode, blocks)\
+   do {\
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);\
+   int diff = F2FS_I(inode)->i_cluster_size - blocks;  \
+   atomic64_add(blocks, &sbi->compr_written_block);\
+   atomic64_add(diff, &sbi->compr_saved_block);\
+   } while (0)
 #else
 static inline bool f2fs_is_compressed_page(struct page *page) { return fa

Re: [PATCH v4] f2fs: add sysfs nodes to get runtime compression stat

2021-03-12 Thread Daeho Jeong
Thanks for suggesting me sysfs_emit().

For atomic values, actually, those are needed for writer part, not reader.

+#define add_compr_block_stat(inode, blocks)\
+   do {\
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);\
+   int diff = F2FS_I(inode)->i_cluster_size - blocks;  \
+   atomic64_add(blocks, &sbi->compr_written_block);\
+   atomic64_add(diff, &sbi->compr_saved_block);\
+   } while (0)

I needed a protection here, because they might be updated in the race condition.

2021년 3월 12일 (금) 오후 9:39, Greg KH 님이 작성:
>
> On Fri, Mar 12, 2021 at 09:25:31PM +0900, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > I've added new sysfs nodes to show runtime compression stat since mount.
> > compr_written_block - show the block count written after compression
> > compr_saved_block - show the saved block count with compression
> > compr_new_inode - show the count of inode newly enabled for compression
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> > v2: thanks to kernel test robot , fixed compile issue
> > related to kernel config
> > v3: changed sysfs nodes' names and made them runtime stat, not
> > persistent on disk
> > v4: changed sysfs nodes' desctiption
> > ---
> >  Documentation/ABI/testing/sysfs-fs-f2fs | 24 ++
> >  fs/f2fs/compress.c  |  1 +
> >  fs/f2fs/f2fs.h  | 19 
> >  fs/f2fs/super.c |  7 +++
> >  fs/f2fs/sysfs.c | 58 +
> >  5 files changed, 109 insertions(+)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
> > b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index cbeac1bebe2f..ddd4bd6116fc 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -409,3 +409,27 @@ Description: Give a way to change checkpoint merge 
> > daemon's io priority.
> >   I/O priority "3". We can select the class between "rt" and 
> > "be",
> >   and set the I/O priority within valid range of it. "," 
> > delimiter
> >   is necessary in between I/O class and priority number.
> > +
> > +What:/sys/fs/f2fs//compr_written_block
> > +Date:March 2021
> > +Contact: "Daeho Jeong" 
> > +Description: Show the block count written after compression since mount. 
> > Note
> > +     that when the compressed blocks are deleted, this count 
> > doesn't
> > + decrease. If you write "0" here, you can initialize
> > + compr_written_block and compr_saved_block to "0".
> > +
> > +What:/sys/fs/f2fs//compr_saved_block
> > +Date:March 2021
> > +Contact: "Daeho Jeong" 
> > +Description: Show the saved block count with compression since mount. Note
> > + that when the compressed blocks are deleted, this count 
> > doesn't
> > + decrease. If you write "0" here, you can initialize
> > + compr_written_block and compr_saved_block to "0".
> > +
> > +What:/sys/fs/f2fs//compr_new_inode
> > +Date:March 2021
> > +Contact: "Daeho Jeong" 
> > +Description: Show the count of inode newly enabled for compression since 
> > mount.
> > + Note that when the compression is disabled for the files, 
> > this count
> > + doesn't decrease. If you write "0" here, you can initialize
> > + compr_new_inode to "0".
> > diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> > index 77fa342de38f..3c9d797dbdd6 100644
> > --- a/fs/f2fs/compress.c
> > +++ b/fs/f2fs/compress.c
> > @@ -1353,6 +1353,7 @@ static int f2fs_write_compressed_pages(struct 
> > compress_ctx *cc,
> >   if (fio.compr_blocks)
> >   f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, 
> > false);
> >   f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
> > + add_compr_block_stat(inode, cc->nr_cpages);
> >
> >   set_inode_flag(cc->inode, FI_APPEND_WRITE);
> >   if (cc->cluster_idx == 0)
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index e2d302ae3a46..2c989f8caf05 1

Re: [PATCH v4] f2fs: add sysfs nodes to get runtime compression stat

2021-03-12 Thread Daeho Jeong
As you can see, if we're doing like the below.

sbi->compr_written_block += blocks;

Let's assume the initial value as 0.

 
sbi->compr_written_block = 0;

sbi->compr_written_block = 0;
+blocks(3);
   + blocks(2);
sbi->compr_written_block = 3;

sbi->compr_written_block = 2;

Finally, we end up with 2, not 5.

As more threads are participating it, we might miss more counting.

2021년 3월 12일 (금) 오후 11:04, Greg KH 님이 작성:
>
> On Fri, Mar 12, 2021 at 10:56:13PM +0900, Daeho Jeong wrote:
> > Thanks for suggesting me sysfs_emit().
> >
> > For atomic values, actually, those are needed for writer part, not reader.
> >
> > +#define add_compr_block_stat(inode, blocks)\
> > +   do {\
> > +   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);\
> > +   int diff = F2FS_I(inode)->i_cluster_size - blocks;  \
> > +   atomic64_add(blocks, &sbi->compr_written_block);\
> > +   atomic64_add(diff, &sbi->compr_saved_block);\
> > +   } while (0)
> >
> > I needed a protection here, because they might be updated in the race 
> > condition.
>
> Why?  What are you trying to protect from "racing" here?
>
> thanks,
>
> greg k-h


Re: [PATCH v4] f2fs: add sysfs nodes to get runtime compression stat

2021-03-12 Thread Daeho Jeong
So, do you want we protect the values here with spin_lock and just
read without spin_lock in sysfs read part?

2021년 3월 12일 (금) 오후 11:37, Daeho Jeong 님이 작성:
>
> As you can see, if we're doing like the below.
>
> sbi->compr_written_block += blocks;
>
> Let's assume the initial value as 0.
>
>  
> sbi->compr_written_block = 0;
>
> sbi->compr_written_block = 0;
> +blocks(3);
>+ blocks(2);
> sbi->compr_written_block = 3;
>
> sbi->compr_written_block = 2;
>
> Finally, we end up with 2, not 5.
>
> As more threads are participating it, we might miss more counting.
>
> 2021년 3월 12일 (금) 오후 11:04, Greg KH 님이 작성:
> >
> > On Fri, Mar 12, 2021 at 10:56:13PM +0900, Daeho Jeong wrote:
> > > Thanks for suggesting me sysfs_emit().
> > >
> > > For atomic values, actually, those are needed for writer part, not reader.
> > >
> > > +#define add_compr_block_stat(inode, blocks)\
> > > +   do {\
> > > +   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);\
> > > +   int diff = F2FS_I(inode)->i_cluster_size - blocks;  \
> > > +   atomic64_add(blocks, &sbi->compr_written_block);\
> > > +   atomic64_add(diff, &sbi->compr_saved_block);\
> > > +   } while (0)
> > >
> > > I needed a protection here, because they might be updated in the race 
> > > condition.
> >
> > Why?  What are you trying to protect from "racing" here?
> >
> > thanks,
> >
> > greg k-h


Re: [PATCH v4] f2fs: add sysfs nodes to get runtime compression stat

2021-03-12 Thread Daeho Jeong
2021년 3월 12일 (금) 오후 11:45, Greg KH 님이 작성:
>
> A: http://en.wikipedia.org/wiki/Top_post
> Q: Were do I find info about this thing called top-posting?
> A: Because it messes up the order in which people normally read text.
> Q: Why is top-posting such a bad thing?
> A: Top-posting.
> Q: What is the most annoying thing in e-mail?
>
> A: No.
> Q: Should I include quotations after my reply?
>
> http://daringfireball.net/2007/07/on_top
>

Thanks for letting me know this!

>
> On Fri, Mar 12, 2021 at 11:37:29PM +0900, Daeho Jeong wrote:
> > As you can see, if we're doing like the below.
> >
> > sbi->compr_written_block += blocks;
> >
> > Let's assume the initial value as 0.
> >
> >  
> > sbi->compr_written_block = 0;
> >
> > sbi->compr_written_block = 0;
> > +blocks(3);
> >+ blocks(2);
> > sbi->compr_written_block = 3;
> >
> > sbi->compr_written_block = 2;
> >
> > Finally, we end up with 2, not 5.
> >
> > As more threads are participating it, we might miss more counting.
>
> Are you sure?  Isn't adding a number something that should happen in a
> "safe" way?
>
> And if you miss 2 blocks, who cares?  What is so critical about these
> things that you take the cache flush of 2 atomic writes just for a
> debugging statistic?
>
> Why not just take 1 lock for everything if it's so important to get
> these "correct"?
>
> What is the performance throughput degradation of adding 2 atomic writes
> to each time you write a block?
>
> But really, will you ever notice missing a few, even if that could be
> possible on your cpu (and I strongly doubt most modern cpus will miss
> this...)
>
> But this isn't my code, I just hate seeing atomic variables used for
> silly things like debugging stats when they do not seem to be really
> needed.  So if you want to keep them, go ahead, but realize that the
> number you are reading has nothing to do with being "atomic" at all.
>
> thanks,
>

I agree that missing number would be extremely few and the overhead of
updating the numbers would be quite bad.

Thanks for your valuable comments. :)

> greg k-h


[PATCH v5] f2fs: add sysfs nodes to get runtime compression stat

2021-03-15 Thread Daeho Jeong
From: Daeho Jeong 

I've added new sysfs nodes to show runtime compression stat since mount.
compr_written_block - show the block count written after compression
compr_saved_block - show the saved block count with compression
compr_new_inode - show the count of inode newly enabled for compression

Signed-off-by: Daeho Jeong 
---
v2: thanks to kernel test robot , fixed compile issue
related to kernel config
v3: changed sysfs nodes' names and made them runtime stat, not
persistent on disk
v4: changed sysfs nodes' desctiption
v5: changed not to use atomic values
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 24 
 fs/f2fs/compress.c  |  1 +
 fs/f2fs/f2fs.h  | 19 +
 fs/f2fs/sysfs.c | 38 +
 4 files changed, 82 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index cbeac1bebe2f..ddd4bd6116fc 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -409,3 +409,27 @@ Description:   Give a way to change checkpoint merge 
daemon's io priority.
I/O priority "3". We can select the class between "rt" and "be",
and set the I/O priority within valid range of it. "," delimiter
is necessary in between I/O class and priority number.
+
+What:  /sys/fs/f2fs//compr_written_block
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show the block count written after compression since mount. Note
+   that when the compressed blocks are deleted, this count doesn't
+   decrease. If you write "0" here, you can initialize
+   compr_written_block and compr_saved_block to "0".
+
+What:  /sys/fs/f2fs//compr_saved_block
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show the saved block count with compression since mount. Note
+   that when the compressed blocks are deleted, this count doesn't
+   decrease. If you write "0" here, you can initialize
+   compr_written_block and compr_saved_block to "0".
+
+What:  /sys/fs/f2fs//compr_new_inode
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show the count of inode newly enabled for compression since 
mount.
+   Note that when the compression is disabled for the files, this 
count
+   doesn't decrease. If you write "0" here, you can initialize
+   compr_new_inode to "0".
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 77fa342de38f..3c9d797dbdd6 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1353,6 +1353,7 @@ static int f2fs_write_compressed_pages(struct 
compress_ctx *cc,
if (fio.compr_blocks)
f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
+   add_compr_block_stat(inode, cc->nr_cpages);
 
set_inode_flag(cc->inode, FI_APPEND_WRITE);
if (cc->cluster_idx == 0)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e2d302ae3a46..ae7e08aa7d87 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1623,6 +1623,11 @@ struct f2fs_sb_info {
 #ifdef CONFIG_F2FS_FS_COMPRESSION
struct kmem_cache *page_array_slab; /* page array entry */
unsigned int page_array_slab_size;  /* default page array slab size 
*/
+
+   /* For runtime compression statistics */
+   u64 compr_written_block;
+   u64 compr_saved_block;
+   u32 compr_new_inode;
 #endif
 };
 
@@ -3955,6 +3960,18 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
 void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
 int __init f2fs_init_compress_cache(void);
 void f2fs_destroy_compress_cache(void);
+#define inc_compr_inode_stat(inode)\
+   do {\
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);\
+   sbi->compr_new_inode++; \
+   } while (0)
+#define add_compr_block_stat(inode, blocks)\
+   do {\
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);\
+   int diff = F2FS_I(inode)->i_cluster_size - blocks;  \
+   sbi->compr_written_block += blocks; \
+   sbi->compr_saved_block += diff; \
+   } while (0)
 #else
 static inline bool f2fs_is_compressed_page(struct page *page) { return false; }
 stat

Re: [f2fs-dev] [PATCH v2 2/2] f2fs: add ckpt_thread_ioprio sysfs node

2021-01-21 Thread Daeho Jeong
Oops, it disappeared while versioning up...

2021년 1월 21일 (목) 오후 7:30, Chao Yu 님이 작성:
>
> On 2021/1/14 14:23, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > Added "ckpt_thread_ioprio" sysfs node to give a way to change checkpoint
> > merge daemon's io priority. Its default value is "be,3", which means
> > "BE" I/O class and I/O priority "3". We can select the class between "rt"
> > and "be", and set the I/O priority within valid range of it.
> > "," delimiter is necessary in between I/O class and priority number.
> >
> > Signed-off-by: Daeho Jeong 
> > ---
> > v2:
> > - adapt to inlining ckpt_req_control of f2fs_sb_info
> > ---
> >   Documentation/ABI/testing/sysfs-fs-f2fs |  8 
> >   fs/f2fs/checkpoint.c|  2 +-
> >   fs/f2fs/f2fs.h  |  1 +
> >   fs/f2fs/sysfs.c | 51 +
> >   4 files changed, 61 insertions(+), 1 deletion(-)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
> > b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index 3dfee94e0618..0c48b2e7dfd4 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -377,3 +377,11 @@ Description: This gives a control to limit the bio 
> > size in f2fs.
> >   Default is zero, which will follow underlying block layer 
> > limit,
> >   whereas, if it has a certain bytes value, f2fs won't submit a
> >   bio larger than that size.
> > +What:/sys/fs/f2fs//ckpt_thread_ioprio
> > +Date:January 2021
> > +Contact: "Daeho Jeong" 
> > +Description: Give a way to change checkpoint merge daemon's io priority.
> > + Its default value is "be,3", which means "BE" I/O class and
> > + I/O priority "3". We can select the class between "rt" and 
> > "be",
> > + and set the I/O priority within valid range of it. "," 
> > delimiter
> > + is necessary in between I/O class and priority number.
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index e0668cec3b80..62bd6f449bb7 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -1840,7 +1840,7 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
> >   if (IS_ERR(cprc->f2fs_issue_ckpt))
> >   return PTR_ERR(cprc->f2fs_issue_ckpt);
> >
> > - set_task_ioprio(cprc->f2fs_issue_ckpt, DEFAULT_CHECKPOINT_IOPRIO);
> > + set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio);
>
> Where do we set default value of cprc->ckpt_thread_ioprio? I guess it should
> be f2fs_init_ckpt_req_control()?
>
> Thanks,


[PATCH v3] f2fs: add sysfs nodes to get runtime compression stat

2021-03-10 Thread Daeho Jeong
From: Daeho Jeong 

I've added new sysfs nodes to show runtime compression stat since mount.
compr_written_block - show the block count written after compression
compr_saved_block - show the saved block count with compression
compr_new_inode - show the count of inode newly enabled for compression

Signed-off-by: Daeho Jeong 
---
v2: thanks to kernel test robot , fixed compile issue
related to kernel config.
v3: changed sysfs nodes' names and made them runtime stat, not
persistent on disk
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 20 +
 fs/f2fs/compress.c  |  1 +
 fs/f2fs/f2fs.h  | 19 
 fs/f2fs/super.c |  7 +++
 fs/f2fs/sysfs.c | 58 +
 5 files changed, 105 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index cbeac1bebe2f..f2981eb319cb 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -409,3 +409,23 @@ Description:   Give a way to change checkpoint merge 
daemon's io priority.
I/O priority "3". We can select the class between "rt" and "be",
and set the I/O priority within valid range of it. "," delimiter
is necessary in between I/O class and priority number.
+
+What:  /sys/fs/f2fs//compr_written_block
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show the block count written after compression since mount.
+   If you write "0" here, you can initialize compr_written_block 
and
+   compr_saved_block to "0".
+
+What:      /sys/fs/f2fs//compr_saved_block
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show the saved block count with compression since mount.
+   If you write "0" here, you can initialize compr_written_block 
and
+   compr_saved_block to "0".
+
+What:  /sys/fs/f2fs//compr_new_inode
+Date:  March 2021
+Contact:   "Daeho Jeong" 
+Description:   Show the count of inode newly enabled for compression since 
mount.
+   If you write "0" here, you can initialize compr_new_inode to 
"0".
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 77fa342de38f..3c9d797dbdd6 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1353,6 +1353,7 @@ static int f2fs_write_compressed_pages(struct 
compress_ctx *cc,
if (fio.compr_blocks)
f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
+   add_compr_block_stat(inode, cc->nr_cpages);
 
set_inode_flag(cc->inode, FI_APPEND_WRITE);
if (cc->cluster_idx == 0)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e2d302ae3a46..2c989f8caf05 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1623,6 +1623,11 @@ struct f2fs_sb_info {
 #ifdef CONFIG_F2FS_FS_COMPRESSION
struct kmem_cache *page_array_slab; /* page array entry */
unsigned int page_array_slab_size;  /* default page array slab size 
*/
+
+   /* For runtime compression statistics */
+   atomic64_t compr_written_block;
+   atomic64_t compr_saved_block;
+   atomic_t compr_new_inode;
 #endif
 };
 
@@ -3955,6 +3960,18 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
 void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
 int __init f2fs_init_compress_cache(void);
 void f2fs_destroy_compress_cache(void);
+#define inc_compr_inode_stat(inode)\
+   do {\
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);\
+   atomic_inc(&sbi->compr_new_inode);  \
+   } while (0)
+#define add_compr_block_stat(inode, blocks)\
+   do {\
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);\
+   int diff = F2FS_I(inode)->i_cluster_size - blocks;  \
+   atomic64_add(blocks, &sbi->compr_written_block);\
+   atomic64_add(diff, &sbi->compr_saved_block);\
+   } while (0)
 #else
 static inline bool f2fs_is_compressed_page(struct page *page) { return false; }
 static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
@@ -3983,6 +4000,7 @@ static inline int f2fs_init_page_array_cache(struct 
f2fs_sb_info *sbi) { return
 static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { }
 static inline int __init f2fs_init_compress_cache(void) { return 0; }
 static inline void f2fs_destroy

[PATCH v5 2/2] f2fs: add ckpt_thread_ioprio sysfs node

2021-01-21 Thread Daeho Jeong
From: Daeho Jeong 

Added "ckpt_thread_ioprio" sysfs node to give a way to change checkpoint
merge daemon's io priority. Its default value is "be,3", which means
"BE" I/O class and I/O priority "3". We can select the class between "rt"
and "be", and set the I/O priority within valid range of it.
"," delimiter is necessary in between I/O class and priority number.

Signed-off-by: Daeho Jeong 
---
v2:
- adapt to inlining ckpt_req_control of f2fs_sb_info
v5:
- initialized cprc->ckpt_thread_ioprio
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  8 
 fs/f2fs/checkpoint.c|  3 +-
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/sysfs.c | 51 +
 4 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3dfee94e0618..0c48b2e7dfd4 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -377,3 +377,11 @@ Description:   This gives a control to limit the bio 
size in f2fs.
Default is zero, which will follow underlying block layer limit,
whereas, if it has a certain bytes value, f2fs won't submit a
bio larger than that size.
+What:  /sys/fs/f2fs//ckpt_thread_ioprio
+Date:  January 2021
+Contact:   "Daeho Jeong" 
+Description:   Give a way to change checkpoint merge daemon's io priority.
+   Its default value is "be,3", which means "BE" I/O class and
+   I/O priority "3". We can select the class between "rt" and "be",
+   and set the I/O priority within valid range of it. "," delimiter
+   is necessary in between I/O class and priority number.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ef6ad3d1957d..a3549923762d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1854,7 +1854,7 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
return PTR_ERR(cprc->f2fs_issue_ckpt);
}
 
-   set_task_ioprio(cprc->f2fs_issue_ckpt, DEFAULT_CHECKPOINT_IOPRIO);
+   set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio);
 
return 0;
 }
@@ -1880,6 +1880,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi)
atomic_set(&cprc->issued_ckpt, 0);
atomic_set(&cprc->total_ckpt, 0);
atomic_set(&cprc->queued_ckpt, 0);
+   cprc->ckpt_thread_ioprio = DEFAULT_CHECKPOINT_IOPRIO;
init_waitqueue_head(&cprc->ckpt_wait_queue);
init_llist_head(&cprc->issue_list);
spin_lock_init(&cprc->stat_lock);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f2ae075aa723..517eb0eda638 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -276,6 +276,7 @@ struct ckpt_req {
 
 struct ckpt_req_control {
struct task_struct *f2fs_issue_ckpt;/* checkpoint task */
+   int ckpt_thread_ioprio; /* checkpoint merge thread 
ioprio */
wait_queue_head_t ckpt_wait_queue;  /* waiting queue for wake-up */
atomic_t issued_ckpt;   /* # of actually issued ckpts */
atomic_t total_ckpt;/* # of total ckpts */
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 30bae57428d1..ddd70395148d 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "f2fs.h"
 #include "segment.h"
@@ -34,6 +35,7 @@ enum {
FAULT_INFO_TYPE,/* struct f2fs_fault_info */
 #endif
RESERVED_BLOCKS,/* struct f2fs_sb_info */
+   CPRC_INFO,  /* struct ckpt_req_control */
 };
 
 struct f2fs_attr {
@@ -70,6 +72,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, 
int struct_type)
else if (struct_type == STAT_INFO)
return (unsigned char *)F2FS_STAT(sbi);
 #endif
+   else if (struct_type == CPRC_INFO)
+   return (unsigned char *)&sbi->cprc_info;
return NULL;
 }
 
@@ -255,6 +259,23 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
return len;
}
 
+   if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) {
+   struct ckpt_req_control *cprc = &sbi->cprc_info;
+   int len = 0;
+   int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio);
+   int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio);
+
+   if (class == IOPRIO_CLASS_RT)
+   len += scnprintf(buf + len, PAGE_SIZE - len, "rt,");
+   else if (class == IOPRIO_CLASS_BE)
+   len += scnprintf(buf + len, PAGE_SIZE - len, "be,");
+   el

[PATCH v5 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-21 Thread Daeho Jeong
From: Daeho Jeong 

We've added a new mount option "checkpoint=merge", which creates a
kernel daemon and makes it to merge concurrent checkpoint requests as
much as possible to eliminate redundant checkpoint issues. Plus, we
can eliminate the sluggish issue caused by slow checkpoint operation
when the checkpoint is done in a process context in a cgroup having
low i/o budget and cpu shares. To make this do better, we set the
default i/o priority of the kernel daemon to "3", to give one higher
priority than other kernel threads. The below verification result
explains this.
The basic idea has come from https://opensource.samsung.com.

[Verification]
Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
Set "strict_guarantees" to "1" in BFQ tunables

In "fg" cgroup,
- thread A => trigger 1000 checkpoint operations
  "for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
   done"
- thread B => gererating async. I/O
  "fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
   --filename=test_img --name=test"

In "bg" cgroup,
- thread C => trigger repeated checkpoint operations
  "echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
   fsync test_dir2; done"

We've measured thread A's execution time.

[ w/o patch ]
Elapsed Time: Avg. 68 seconds
[ w/  patch ]
Elapsed Time: Avg. 48 seconds

Signed-off-by: Daeho Jeong 
Signed-off-by: Sungjong Seo 
---
v2:
- inlined ckpt_req_control into f2fs_sb_info and collected stastics
  of checkpoint merge operations
v3:
- fixed some minor errors and cleaned up f2fs_sync_fs()
v4:
- added an explanation to raise the default i/o priority of the
  checkpoint daemon
---
 Documentation/filesystems/f2fs.rst |  10 ++
 fs/f2fs/checkpoint.c   | 177 +
 fs/f2fs/debug.c|  12 ++
 fs/f2fs/f2fs.h |  27 +
 fs/f2fs/super.c|  55 +++--
 5 files changed, 273 insertions(+), 8 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index dae15c96e659..9624a0be0364 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -247,6 +247,16 @@ checkpoint=%s[:%u[%]]   Set to "disable" to turn off 
checkpointing. Set to "enabl
 hide up to all remaining free space. The actual space 
that
 would be unusable can be viewed at 
/sys/fs/f2fs//unusable
 This space is reclaimed once checkpoint=enable.
+Here is another option "merge", which creates a kernel 
daemon
+and makes it to merge concurrent checkpoint requests 
as much
+as possible to eliminate redundant checkpoint issues. 
Plus,
+we can eliminate the sluggish issue caused by slow 
checkpoint
+operation when the checkpoint is done in a process 
context in
+a cgroup having low i/o budget and cpu shares. To make 
this
+do better, we set the default i/o priority of the 
kernel daemon
+to "3", to give one higher priority than other kernel 
threads.
+This is the same way to give a I/O priority to the jbd2
+journaling thread of ext4 filesystem.
 compress_algorithm=%s   Control compress algorithm, currently f2fs supports 
"lzo",
 "lz4", "zstd" and "lzo-rle" algorithm.
 compress_log_size=%uSupport configuring compress cluster size, the size 
will
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 897edb7c951a..ef6ad3d1957d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "f2fs.h"
 #include "node.h"
@@ -20,6 +21,8 @@
 #include "trace.h"
 #include 
 
+#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
+
 static struct kmem_cache *ino_entry_slab;
 struct kmem_cache *f2fs_inode_entry_slab;
 
@@ -1707,3 +1710,177 @@ void f2fs_destroy_checkpoint_caches(void)
kmem_cache_destroy(ino_entry_slab);
kmem_cache_destroy(f2fs_inode_entry_slab);
 }
+
+static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
+{
+   struct cp_control cpc = { .reason = CP_SYNC, };
+   int err;
+
+   down_write(&sbi->gc_lock);
+   err = f2fs_write_checkpoint(sbi, &cpc);
+   up_write(&sbi->gc_lock);
+
+   return err;
+}
+
+static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi)
+{
+   struct ckpt_req_control *cprc = &sbi->cprc_info;
+   struct ckpt_req *

Re: [f2fs-dev] [PATCH v5 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-21 Thread Daeho Jeong
Got it~! :)

2021년 1월 22일 (금) 오전 9:55, Chao Yu 님이 작성:
>
> If there is no update in v5, it will be better to include my Reviewed-by
> tag, please note that. :)
>
> Thanks,
>
> On 2021/1/21 21:45, Daeho Jeong wrote:
> > From: Daeho Jeong
> >
> > We've added a new mount option "checkpoint=merge", which creates a
> > kernel daemon and makes it to merge concurrent checkpoint requests as
> > much as possible to eliminate redundant checkpoint issues. Plus, we
> > can eliminate the sluggish issue caused by slow checkpoint operation
> > when the checkpoint is done in a process context in a cgroup having
> > low i/o budget and cpu shares. To make this do better, we set the
> > default i/o priority of the kernel daemon to "3", to give one higher
> > priority than other kernel threads. The below verification result
> > explains this.
> > The basic idea has come fromhttps://opensource.samsung.com.
> >
> > [Verification]
> > Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
> > Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
> > Set "strict_guarantees" to "1" in BFQ tunables
> >
> > In "fg" cgroup,
> > - thread A => trigger 1000 checkpoint operations
> >"for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
> > done"
> > - thread B => gererating async. I/O
> >"fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
> > --filename=test_img --name=test"
> >
> > In "bg" cgroup,
> > - thread C => trigger repeated checkpoint operations
> >    "echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
> > fsync test_dir2; done"
> >
> > We've measured thread A's execution time.
> >
> > [ w/o patch ]
> > Elapsed Time: Avg. 68 seconds
> > [ w/  patch ]
> > Elapsed Time: Avg. 48 seconds
> >
> > Signed-off-by: Daeho Jeong
> > Signed-off-by: Sungjong Seo


Re: [f2fs-dev] [PATCH v5 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-21 Thread Daeho Jeong
Yep~

2021년 1월 22일 (금) 오전 10:46, Jaegeuk Kim 님이 작성:
>
> On 01/21, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > We've added a new mount option "checkpoint=merge", which creates a
> > kernel daemon and makes it to merge concurrent checkpoint requests as
> > much as possible to eliminate redundant checkpoint issues. Plus, we
> > can eliminate the sluggish issue caused by slow checkpoint operation
> > when the checkpoint is done in a process context in a cgroup having
> > low i/o budget and cpu shares. To make this do better, we set the
> > default i/o priority of the kernel daemon to "3", to give one higher
> > priority than other kernel threads. The below verification result
> > explains this.
> > The basic idea has come from https://opensource.samsung.com.
> >
> > [Verification]
> > Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
> > Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
> > Set "strict_guarantees" to "1" in BFQ tunables
> >
> > In "fg" cgroup,
> > - thread A => trigger 1000 checkpoint operations
> >   "for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
> >done"
> > - thread B => gererating async. I/O
> >   "fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
> >--filename=test_img --name=test"
> >
> > In "bg" cgroup,
> > - thread C => trigger repeated checkpoint operations
> >   "echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
> >fsync test_dir2; done"
> >
> > We've measured thread A's execution time.
> >
> > [ w/o patch ]
> > Elapsed Time: Avg. 68 seconds
> > [ w/  patch ]
> > Elapsed Time: Avg. 48 seconds
> >
> > Signed-off-by: Daeho Jeong 
> > Signed-off-by: Sungjong Seo 
> > ---
> > v2:
> > - inlined ckpt_req_control into f2fs_sb_info and collected stastics
> >   of checkpoint merge operations
> > v3:
> > - fixed some minor errors and cleaned up f2fs_sync_fs()
> > v4:
> > - added an explanation to raise the default i/o priority of the
> >   checkpoint daemon
> > ---
> >  Documentation/filesystems/f2fs.rst |  10 ++
> >  fs/f2fs/checkpoint.c   | 177 +
> >  fs/f2fs/debug.c|  12 ++
> >  fs/f2fs/f2fs.h |  27 +
> >  fs/f2fs/super.c|  55 +++--
> >  5 files changed, 273 insertions(+), 8 deletions(-)
> >
> > diff --git a/Documentation/filesystems/f2fs.rst 
> > b/Documentation/filesystems/f2fs.rst
> > index dae15c96e659..9624a0be0364 100644
> > --- a/Documentation/filesystems/f2fs.rst
> > +++ b/Documentation/filesystems/f2fs.rst
> > @@ -247,6 +247,16 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off 
> > checkpointing. Set to "enabl
> >hide up to all remaining free space. The actual 
> > space that
> >would be unusable can be viewed at 
> > /sys/fs/f2fs//unusable
> >This space is reclaimed once checkpoint=enable.
> > +  Here is another option "merge", which creates a 
> > kernel daemon
> > +  and makes it to merge concurrent checkpoint requests 
> > as much
> > +  as possible to eliminate redundant checkpoint 
> > issues. Plus,
> > +  we can eliminate the sluggish issue caused by slow 
> > checkpoint
> > +  operation when the checkpoint is done in a process 
> > context in
> > +  a cgroup having low i/o budget and cpu shares. To 
> > make this
> > +  do better, we set the default i/o priority of the 
> > kernel daemon
> > +  to "3", to give one higher priority than other 
> > kernel threads.
> > +  This is the same way to give a I/O priority to the 
> > jbd2
> > +  journaling thread of ext4 filesystem.
> >  compress_algorithm=%s Control compress algorithm, currently f2fs 
> > supports "lzo",
> >"lz4", "zstd" and "lzo-rle" algorithm.
> >  compress_log_size=%u  Support configuring compress cluster size, the size 
> > will
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 897edb7c951a..ef6ad3d1957d 100644
> > --- a/fs/f2fs/checkpoi

Re: [f2fs-dev] [PATCH v2] f2fs: add sysfs nodes to get accumulated compression info

2021-03-09 Thread Daeho Jeong
2021년 3월 9일 (화) 오후 6:22, Chao Yu 님이 작성:
>
> On 2021/3/5 10:24, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > Added acc_compr_inodes to show accumulated compressed inode count and
> > acc_compr_blocks to show accumulated secured block count with
>
> I noticed that these stat numbers are recorded in extra reserved area in
> hot node curseg journal, the journal will be persisted only for umount
> or fastboot checkpoint, so the numbers are not so accurate... does this
> satisfy your requirement?
>

Yes, we are satisfied with just getting rough number of them. But, it
would be better if you suggest more accurate way. :)

> > compression in sysfs. These can be re-initialized to "0" by writing "0"
> > value in one of both.
>
> Why do we allow reset the stat numbers?
>

Actually, I want to have a way to clear any stale number of them, but
I agree we don't need this.

> Why not covering all code with macro CONFIG_F2FS_FS_COMPRESSION, since these
> numbers are only be updated when we enable compression.
>

I wanted to keep the info even in the kernel with doesn't support
per-file compression if those had been written once. What do you
think?

> Thanks,


Re: [f2fs-dev] [PATCH v2] f2fs: add sysfs nodes to get accumulated compression info

2021-03-09 Thread Daeho Jeong
We've reconsidered this feature and decided to get just runtime info
of them, not persistent on disk.
I am re-writing it.

Thanks,

2021년 3월 10일 (수) 오전 10:31, Chao Yu 님이 작성:
>
> On 2021/3/9 21:00, Daeho Jeong wrote:
> > 2021년 3월 9일 (화) 오후 6:22, Chao Yu 님이 작성:
> >>
> >> On 2021/3/5 10:24, Daeho Jeong wrote:
> >>> From: Daeho Jeong 
> >>>
> >>> Added acc_compr_inodes to show accumulated compressed inode count and
> >>> acc_compr_blocks to show accumulated secured block count with
> >>
> >> I noticed that these stat numbers are recorded in extra reserved area in
> >> hot node curseg journal, the journal will be persisted only for umount
> >> or fastboot checkpoint, so the numbers are not so accurate... does this
> >> satisfy your requirement?
> >>
> >
> > Yes, we are satisfied with just getting rough number of them. But, it
>
> Alright,
>
> > would be better if you suggest more accurate way. :)
>
> I think this is the cheapest way to store rough number, otherwise it needs to 
> change
> f2fs_checkpoint structure layout or add a new inner inode to persist these 
> stat
> numbers if we want more accurate one.
>
> >
> >>> compression in sysfs. These can be re-initialized to "0" by writing "0"
> >>> value in one of both.
> >>
> >> Why do we allow reset the stat numbers?
> >>
> >
> > Actually, I want to have a way to clear any stale number of them, but
> > I agree we don't need this.
> >
> >> Why not covering all code with macro CONFIG_F2FS_FS_COMPRESSION, since 
> >> these
> >> numbers are only be updated when we enable compression.
> >>
> >
> > I wanted to keep the info even in the kernel with doesn't support
> > per-file compression if those had been written once. What do you
> > think?
>
> Sure, if so it's fine to me. :)
>
> Thanks,
>
> >
> >> Thanks,
> > .
> >


Re: [f2fs-dev] f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

2021-01-04 Thread Daeho Jeong
Hi Colin,

Thanks for notifying me. We need to just continue without
set_page_dirty() and f2fs_put_page().

2021년 1월 4일 (월) 오후 11:43, Colin Ian King 님이 작성:
>
> Hi,
>
> Static analysis using Coverity has detected a potential null pointer
> dereference after a null check in the following commit:
>
> commit 5fdb322ff2c2b4ad519f490dcb7ebb96c5439af7
> Author: Daeho Jeong 
> Date:   Thu Dec 3 15:56:15 2020 +0900
>
> f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE
>
> The analysis is as follows:
>
> 4025 static int redirty_blocks(struct inode *inode, pgoff_t page_idx,
> int len)
> 4026 {
> 4027DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx);
> 4028struct address_space *mapping = inode->i_mapping;
> 4029struct page *page;
> 4030pgoff_t redirty_idx = page_idx;
> 4031int i, page_len = 0, ret = 0;
> 4032
> 4033page_cache_ra_unbounded(&ractl, len, 0);
> 4034
>
> 1. Condition i < len, taking true branch.
> 4. Condition i < len, taking true branch.
>
> 4035for (i = 0; i < len; i++, page_idx++) {
> 4036page = read_cache_page(mapping, page_idx, NULL, NULL);
>
> 2. Condition IS_ERR(page), taking false branch.
> 5. Condition IS_ERR(page), taking true branch.
>
> 4037if (IS_ERR(page)) {
> 4038ret = PTR_ERR(page);
>
> 6. Breaking from loop.
>
> 4039break;
> 4040}
> 4041page_len++;
>
> 3. Jumping back to the beginning of the loop.
>
> 4042}
> 4043
>
> 7. Condition i < page_len, taking true branch.
>
> 4044for (i = 0; i < page_len; i++, redirty_idx++) {
> 4045page = find_lock_page(mapping, redirty_idx);
>
> 8. Condition !page, taking true branch.
> 9. var_compare_op: Comparing page to null implies that page might be
> null.
>
> 4046if (!page)
> 4047ret = -ENOENT;
>
> Dereference after null check (FORWARD_NULL)
>
>10. var_deref_model: Passing null pointer page to set_page_dirty,
> which dereferences it.
>
> 4048set_page_dirty(page);
> 4049f2fs_put_page(page, 1);
> 4050f2fs_put_page(page, 0);
> 4051}
> 4052
> 4053return ret;
> 4054 }
>
> The !page check on line 4046 sets ret appropriately but we have a
> following call to set_page_dirty on a null page that causes the error.
> Not sure how this should be fixed, should the check bail out immediately
> or just avoid the following set_page_dirty anf f2fs_put_page calls?
>
> Colin
>
>
>
> ___
> Linux-f2fs-devel mailing list
> linux-f2fs-de...@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[PATCH] f2fs: fix null page reference in redirty_blocks

2021-01-04 Thread Daeho Jeong
From: Daeho Jeong 

Fixed null page reference when find_lock_page() fails in
redirty_blocks().

Signed-off-by: Daeho Jeong 
Reported-by: Colin Ian King 
Fixes: 5fdb322ff2c2 ("f2fs: add F2FS_IOC_DECOMPRESS_FILE and 
F2FS_IOC_COMPRESS_FILE")
---
 fs/f2fs/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9e5275716be8..bf6682a52433 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4060,8 +4060,10 @@ static int redirty_blocks(struct inode *inode, pgoff_t 
page_idx, int len)
 
for (i = 0; i < page_len; i++, redirty_idx++) {
page = find_lock_page(mapping, redirty_idx);
-   if (!page)
+   if (!page) {
ret = -ENOENT;
+   continue;
+   }
set_page_dirty(page);
f2fs_put_page(page, 1);
f2fs_put_page(page, 0);
-- 
2.29.2.729.g45daf8777d-goog



Re: [f2fs-dev] [PATCH] f2fs: fix null page reference in redirty_blocks

2021-01-04 Thread Daeho Jeong
Yes, it's better~ :)

2021년 1월 5일 (화) 오전 10:44, Chao Yu 님이 작성:
>
> On 2021/1/5 9:28, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > Fixed null page reference when find_lock_page() fails in
> > redirty_blocks().
> >
> > Signed-off-by: Daeho Jeong 
> > Reported-by: Colin Ian King 
> > Fixes: 5fdb322ff2c2 ("f2fs: add F2FS_IOC_DECOMPRESS_FILE and 
> > F2FS_IOC_COMPRESS_FILE")
> > ---
> >   fs/f2fs/file.c | 4 +++-
> >   1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 9e5275716be8..bf6682a52433 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -4060,8 +4060,10 @@ static int redirty_blocks(struct inode *inode, 
> > pgoff_t page_idx, int len)
> >
> >   for (i = 0; i < page_len; i++, redirty_idx++) {
> >   page = find_lock_page(mapping, redirty_idx);
> > - if (!page)
> > + if (!page) {
> >   ret = -ENOENT;
>
> ret = -ENOMEM;
>
> > + continue;
>
> How about breaking the loop for out-of-memory case, because in such condition
> we have less chance to dirty whole cluster due to no memory, and continue to
> allocate pages for target file will make system suffer more memory pressure,
> it will make many thing slower.
>
> Thnaks,
>
> > + }
> >   set_page_dirty(page);
> >   f2fs_put_page(page, 1);
> >   f2fs_put_page(page, 0);
> >


[PATCH v2] f2fs: fix null page reference in redirty_blocks

2021-01-04 Thread Daeho Jeong
From: Daeho Jeong 

Fixed null page reference when find_lock_page() fails in
redirty_blocks().

Signed-off-by: Daeho Jeong 
Reported-by: Colin Ian King 
Fixes: 5fdb322ff2c2 ("f2fs: add F2FS_IOC_DECOMPRESS_FILE and 
F2FS_IOC_COMPRESS_FILE")
---
v2: changed error value and break the loop when error occurs
---
 fs/f2fs/file.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9e5275716be8..d27173c24391 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4060,8 +4060,10 @@ static int redirty_blocks(struct inode *inode, pgoff_t 
page_idx, int len)
 
for (i = 0; i < page_len; i++, redirty_idx++) {
page = find_lock_page(mapping, redirty_idx);
-   if (!page)
-   ret = -ENOENT;
+   if (!page) {
+   ret = -ENOMEM;
+   break;
+   }
set_page_dirty(page);
f2fs_put_page(page, 1);
f2fs_put_page(page, 0);
-- 
2.29.2.729.g45daf8777d-goog



Re: [f2fs-dev] [PATCH v2 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-15 Thread Daeho Jeong
2021년 1월 15일 (금) 오후 6:22, Chao Yu 님이 작성:
>
> On 2021/1/14 14:23, Daeho Jeong wrote:
> > From: Daeho Jeong 
> >
> > We've added a new mount option "checkpoint=merge", which creates a
> > kernel daemon and makes it to merge concurrent checkpoint requests as
> > much as possible to eliminate redundant checkpoint issues. Plus, we
> > can eliminate the sluggish issue caused by slow checkpoint operation
> > when the checkpoint is done in a process context in a cgroup having
> > low i/o budget and cpu shares, and The below verification result
> > explains this.
> > The basic idea has come from https://opensource.samsung.com.
> >
> > [Verification]
> > Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
> > Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
> > Set "strict_guarantees" to "1" in BFQ tunables
> >
> > In "fg" cgroup,
> > - thread A => trigger 1000 checkpoint operations
> >"for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
> > done"
> > - thread B => gererating async. I/O
> >"fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
> > --filename=test_img --name=test"
> >
> > In "bg" cgroup,
> > - thread C => trigger repeated checkpoint operations
> >"echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
> > fsync test_dir2; done"
> >
> > We've measured thread A's execution time.
> >
> > [ w/o patch ]
> > Elapsed Time: Avg. 68 seconds
> > [ w/  patch ]
> > Elapsed Time: Avg. 48 seconds
> >
> > Signed-off-by: Daeho Jeong 
> > Signed-off-by: Sungjong Seo 
> > ---
> > v2:
> > - inlined ckpt_req_control into f2fs_sb_info and collected stastics
> >of checkpoint merge operations
> > ---
> >   Documentation/filesystems/f2fs.rst |   6 ++
> >   fs/f2fs/checkpoint.c   | 163 +
> >   fs/f2fs/debug.c|  12 +++
> >   fs/f2fs/f2fs.h |  27 +
> >   fs/f2fs/super.c|  56 +-
> >   5 files changed, 260 insertions(+), 4 deletions(-)
> >
> > diff --git a/Documentation/filesystems/f2fs.rst 
> > b/Documentation/filesystems/f2fs.rst
> > index dae15c96e659..bccc021bf31a 100644
> > --- a/Documentation/filesystems/f2fs.rst
> > +++ b/Documentation/filesystems/f2fs.rst
> > @@ -247,6 +247,12 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off 
> > checkpointing. Set to "enabl
> >hide up to all remaining free space. The actual 
> > space that
> >would be unusable can be viewed at 
> > /sys/fs/f2fs//unusable
> >This space is reclaimed once checkpoint=enable.
> > +  Here is another option "merge", which creates a 
> > kernel daemon
> > +  and makes it to merge concurrent checkpoint requests 
> > as much
> > +  as possible to eliminate redundant checkpoint 
> > issues. Plus,
> > +  we can eliminate the sluggish issue caused by slow 
> > checkpoint
> > +  operation when the checkpoint is done in a process 
> > context in
> > +  a cgroup having low i/o budget and cpu shares.
> >   compress_algorithm=%sControl compress algorithm, currently f2fs 
> > supports "lzo",
> >"lz4", "zstd" and "lzo-rle" algorithm.
> >   compress_log_size=%u Support configuring compress cluster size, 
> > the size will
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 897edb7c951a..e0668cec3b80 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -13,6 +13,7 @@
> >   #include 
> >   #include 
> >   #include 
> > +#include 
> >
> >   #include "f2fs.h"
> >   #include "node.h"
> > @@ -20,6 +21,8 @@
> >   #include "trace.h"
> >   #include 
> >
> > +#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
> > +
> >   static struct kmem_cache *ino_entry_slab;
> >   struct kmem_cache *f2fs_inode_entry_slab;
> >
> > @@ -1707,3 +1710,163 @@ void f2fs_destroy_checkpoint_caches(void)
> >   kmem_cache_destroy(ino_entry_slab);
> >   kmem_cache_destroy(f2fs_inode_entry_slab);
>

Re: [f2fs-dev] [PATCH v2 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-15 Thread Daeho Jeong
2021년 1월 15일 (금) 오후 11:00, Daeho Jeong 님이 작성:
>
> 2021년 1월 15일 (금) 오후 6:22, Chao Yu 님이 작성:
> >
> > On 2021/1/14 14:23, Daeho Jeong wrote:
> > > From: Daeho Jeong 
> > >
> > > We've added a new mount option "checkpoint=merge", which creates a
> > > kernel daemon and makes it to merge concurrent checkpoint requests as
> > > much as possible to eliminate redundant checkpoint issues. Plus, we
> > > can eliminate the sluggish issue caused by slow checkpoint operation
> > > when the checkpoint is done in a process context in a cgroup having
> > > low i/o budget and cpu shares, and The below verification result
> > > explains this.
> > > The basic idea has come from https://opensource.samsung.com.
> > >
> > > [Verification]
> > > Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
> > > Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
> > > Set "strict_guarantees" to "1" in BFQ tunables
> > >
> > > In "fg" cgroup,
> > > - thread A => trigger 1000 checkpoint operations
> > >"for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
> > > done"
> > > - thread B => gererating async. I/O
> > >"fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
> > > --filename=test_img --name=test"
> > >
> > > In "bg" cgroup,
> > > - thread C => trigger repeated checkpoint operations
> > >"echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
> > > fsync test_dir2; done"
> > >
> > > We've measured thread A's execution time.
> > >
> > > [ w/o patch ]
> > > Elapsed Time: Avg. 68 seconds
> > > [ w/  patch ]
> > > Elapsed Time: Avg. 48 seconds
> > >
> > > Signed-off-by: Daeho Jeong 
> > > Signed-off-by: Sungjong Seo 
> > > ---
> > > v2:
> > > - inlined ckpt_req_control into f2fs_sb_info and collected stastics
> > >of checkpoint merge operations
> > > ---
> > >   Documentation/filesystems/f2fs.rst |   6 ++
> > >   fs/f2fs/checkpoint.c   | 163 +
> > >   fs/f2fs/debug.c|  12 +++
> > >   fs/f2fs/f2fs.h |  27 +
> > >   fs/f2fs/super.c|  56 +-
> > >   5 files changed, 260 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/Documentation/filesystems/f2fs.rst 
> > > b/Documentation/filesystems/f2fs.rst
> > > index dae15c96e659..bccc021bf31a 100644
> > > --- a/Documentation/filesystems/f2fs.rst
> > > +++ b/Documentation/filesystems/f2fs.rst
> > > @@ -247,6 +247,12 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn 
> > > off checkpointing. Set to "enabl
> > >hide up to all remaining free space. The actual 
> > > space that
> > >would be unusable can be viewed at 
> > > /sys/fs/f2fs//unusable
> > >This space is reclaimed once checkpoint=enable.
> > > +  Here is another option "merge", which creates a 
> > > kernel daemon
> > > +  and makes it to merge concurrent checkpoint 
> > > requests as much
> > > +  as possible to eliminate redundant checkpoint 
> > > issues. Plus,
> > > +  we can eliminate the sluggish issue caused by slow 
> > > checkpoint
> > > +  operation when the checkpoint is done in a process 
> > > context in
> > > +  a cgroup having low i/o budget and cpu shares.
> > >   compress_algorithm=%sControl compress algorithm, currently f2fs 
> > > supports "lzo",
> > >"lz4", "zstd" and "lzo-rle" algorithm.
> > >   compress_log_size=%u Support configuring compress cluster size, 
> > > the size will
> > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > index 897edb7c951a..e0668cec3b80 100644
> > > --- a/fs/f2fs/checkpoint.c
> > > +++ b/fs/f2fs/checkpoint.c
> > > @@ -13,6 +13,7 @@
> > >   #include 
> > >   #include 
> > >   #include 
> > > +#include 
> > >
> > >   #include "f2fs.h"
> > >   #include "node.h"
&g

[PATCH v3] f2fs: add ckpt_thread_ioprio sysfs node

2021-01-15 Thread Daeho Jeong
From: Daeho Jeong 

Added "ckpt_thread_ioprio" sysfs node to give a way to change checkpoint
merge daemon's io priority. Its default value is "be,3", which means
"BE" I/O class and I/O priority "3". We can select the class between "rt"
and "be", and set the I/O priority within valid range of it.
"," delimiter is necessary in between I/O class and priority number.

Signed-off-by: Daeho Jeong 
---
v2:
- adapt to inlining ckpt_req_control of f2fs_sb_info
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  8 
 fs/f2fs/checkpoint.c|  2 +-
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/sysfs.c | 51 +
 4 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3dfee94e0618..0c48b2e7dfd4 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -377,3 +377,11 @@ Description:   This gives a control to limit the bio 
size in f2fs.
Default is zero, which will follow underlying block layer limit,
whereas, if it has a certain bytes value, f2fs won't submit a
bio larger than that size.
+What:  /sys/fs/f2fs//ckpt_thread_ioprio
+Date:  January 2021
+Contact:   "Daeho Jeong" 
+Description:   Give a way to change checkpoint merge daemon's io priority.
+   Its default value is "be,3", which means "BE" I/O class and
+   I/O priority "3". We can select the class between "rt" and "be",
+   and set the I/O priority within valid range of it. "," delimiter
+   is necessary in between I/O class and priority number.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ef6ad3d1957d..30e3fe161bb4 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1854,7 +1854,7 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
return PTR_ERR(cprc->f2fs_issue_ckpt);
}
 
-   set_task_ioprio(cprc->f2fs_issue_ckpt, DEFAULT_CHECKPOINT_IOPRIO);
+   set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio);
 
return 0;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f2ae075aa723..517eb0eda638 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -276,6 +276,7 @@ struct ckpt_req {
 
 struct ckpt_req_control {
struct task_struct *f2fs_issue_ckpt;/* checkpoint task */
+   int ckpt_thread_ioprio; /* checkpoint merge thread 
ioprio */
wait_queue_head_t ckpt_wait_queue;  /* waiting queue for wake-up */
atomic_t issued_ckpt;   /* # of actually issued ckpts */
atomic_t total_ckpt;/* # of total ckpts */
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 30bae57428d1..ddd70395148d 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "f2fs.h"
 #include "segment.h"
@@ -34,6 +35,7 @@ enum {
FAULT_INFO_TYPE,/* struct f2fs_fault_info */
 #endif
RESERVED_BLOCKS,/* struct f2fs_sb_info */
+   CPRC_INFO,  /* struct ckpt_req_control */
 };
 
 struct f2fs_attr {
@@ -70,6 +72,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, 
int struct_type)
else if (struct_type == STAT_INFO)
return (unsigned char *)F2FS_STAT(sbi);
 #endif
+   else if (struct_type == CPRC_INFO)
+   return (unsigned char *)&sbi->cprc_info;
return NULL;
 }
 
@@ -255,6 +259,23 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
return len;
}
 
+   if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) {
+   struct ckpt_req_control *cprc = &sbi->cprc_info;
+   int len = 0;
+   int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio);
+   int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio);
+
+   if (class == IOPRIO_CLASS_RT)
+   len += scnprintf(buf + len, PAGE_SIZE - len, "rt,");
+   else if (class == IOPRIO_CLASS_BE)
+   len += scnprintf(buf + len, PAGE_SIZE - len, "be,");
+   else
+   return -EINVAL;
+
+   len += scnprintf(buf + len, PAGE_SIZE - len, "%d\n", data);
+   return len;
+   }
+
ui = (unsigned int *)(ptr + a->offset);
 
return sprintf(buf, "%u\n", *ui);
@@ -308,6 +329,34 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return ret ? ret : count;
}
 
+   if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) {
+   co

Re: [PATCH v3] f2fs: add ckpt_thread_ioprio sysfs node

2021-01-15 Thread Daeho Jeong
Sorry, ignore this.
I've sent this wrong. :(

2021년 1월 16일 (토) 오전 9:58, Daeho Jeong 님이 작성:
>
> From: Daeho Jeong 
>
> Added "ckpt_thread_ioprio" sysfs node to give a way to change checkpoint
> merge daemon's io priority. Its default value is "be,3", which means
> "BE" I/O class and I/O priority "3". We can select the class between "rt"
> and "be", and set the I/O priority within valid range of it.
> "," delimiter is necessary in between I/O class and priority number.
>
> Signed-off-by: Daeho Jeong 
> ---
> v2:
> - adapt to inlining ckpt_req_control of f2fs_sb_info
> ---
>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 
>  fs/f2fs/checkpoint.c|  2 +-
>  fs/f2fs/f2fs.h  |  1 +
>  fs/f2fs/sysfs.c | 51 +
>  4 files changed, 61 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
> b/Documentation/ABI/testing/sysfs-fs-f2fs
> index 3dfee94e0618..0c48b2e7dfd4 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -377,3 +377,11 @@ Description:   This gives a control to limit the bio 
> size in f2fs.
> Default is zero, which will follow underlying block layer 
> limit,
> whereas, if it has a certain bytes value, f2fs won't submit a
> bio larger than that size.
> +What:  /sys/fs/f2fs//ckpt_thread_ioprio
> +Date:  January 2021
> +Contact:   "Daeho Jeong" 
> +Description:   Give a way to change checkpoint merge daemon's io priority.
> +   Its default value is "be,3", which means "BE" I/O class and
> +   I/O priority "3". We can select the class between "rt" and 
> "be",
> +   and set the I/O priority within valid range of it. "," 
> delimiter
> +   is necessary in between I/O class and priority number.
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index ef6ad3d1957d..30e3fe161bb4 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1854,7 +1854,7 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
> return PTR_ERR(cprc->f2fs_issue_ckpt);
> }
>
> -   set_task_ioprio(cprc->f2fs_issue_ckpt, DEFAULT_CHECKPOINT_IOPRIO);
> +   set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio);
>
> return 0;
>  }
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index f2ae075aa723..517eb0eda638 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -276,6 +276,7 @@ struct ckpt_req {
>
>  struct ckpt_req_control {
> struct task_struct *f2fs_issue_ckpt;/* checkpoint task */
> +   int ckpt_thread_ioprio; /* checkpoint merge thread 
> ioprio */
> wait_queue_head_t ckpt_wait_queue;  /* waiting queue for wake-up 
> */
> atomic_t issued_ckpt;   /* # of actually issued ckpts */
> atomic_t total_ckpt;/* # of total ckpts */
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index 30bae57428d1..ddd70395148d 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -11,6 +11,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include "f2fs.h"
>  #include "segment.h"
> @@ -34,6 +35,7 @@ enum {
> FAULT_INFO_TYPE,/* struct f2fs_fault_info */
>  #endif
> RESERVED_BLOCKS,/* struct f2fs_sb_info */
> +   CPRC_INFO,  /* struct ckpt_req_control */
>  };
>
>  struct f2fs_attr {
> @@ -70,6 +72,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info 
> *sbi, int struct_type)
> else if (struct_type == STAT_INFO)
> return (unsigned char *)F2FS_STAT(sbi);
>  #endif
> +   else if (struct_type == CPRC_INFO)
> +   return (unsigned char *)&sbi->cprc_info;
> return NULL;
>  }
>
> @@ -255,6 +259,23 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
> return len;
> }
>
> +   if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) {
> +   struct ckpt_req_control *cprc = &sbi->cprc_info;
> +   int len = 0;
> +   int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio);
> +   int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio);
> +
> +   if (class == IOPRIO_CLASS_RT)
> +   len += scnprintf(buf + len, PAGE_SIZE - len, "rt,");
> + 

[PATCH v3 1/2] f2fs: introduce checkpoint=merge mount option

2021-01-15 Thread Daeho Jeong
From: Daeho Jeong 

We've added a new mount option "checkpoint=merge", which creates a
kernel daemon and makes it to merge concurrent checkpoint requests as
much as possible to eliminate redundant checkpoint issues. Plus, we
can eliminate the sluggish issue caused by slow checkpoint operation
when the checkpoint is done in a process context in a cgroup having
low i/o budget and cpu shares, and The below verification result
explains this.
The basic idea has come from https://opensource.samsung.com.

[Verification]
Android Pixel Device(ARM64, 7GB RAM, 256GB UFS)
Create two I/O cgroups (fg w/ weight 100, bg w/ wight 20)
Set "strict_guarantees" to "1" in BFQ tunables

In "fg" cgroup,
- thread A => trigger 1000 checkpoint operations
  "for i in `seq 1 1000`; do touch test_dir1/file; fsync test_dir1;
   done"
- thread B => gererating async. I/O
  "fio --rw=write --numjobs=1 --bs=128k --runtime=3600 --time_based=1
   --filename=test_img --name=test"

In "bg" cgroup,
- thread C => trigger repeated checkpoint operations
  "echo $$ > /dev/blkio/bg/tasks; while true; do touch test_dir2/file;
   fsync test_dir2; done"

We've measured thread A's execution time.

[ w/o patch ]
Elapsed Time: Avg. 68 seconds
[ w/  patch ]
Elapsed Time: Avg. 48 seconds

Signed-off-by: Daeho Jeong 
Signed-off-by: Sungjong Seo 
---
v2:
- inlined ckpt_req_control into f2fs_sb_info and collected stastics
  of checkpoint merge operations
v3:
- fixed some minor errors and cleaned up f2fs_sync_fs()
---
 Documentation/filesystems/f2fs.rst |   6 +
 fs/f2fs/checkpoint.c   | 177 +
 fs/f2fs/debug.c|  12 ++
 fs/f2fs/f2fs.h |  27 +
 fs/f2fs/super.c|  55 +++--
 5 files changed, 269 insertions(+), 8 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index dae15c96e659..bccc021bf31a 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -247,6 +247,12 @@ checkpoint=%s[:%u[%]]   Set to "disable" to turn off 
checkpointing. Set to "enabl
 hide up to all remaining free space. The actual space 
that
 would be unusable can be viewed at 
/sys/fs/f2fs//unusable
 This space is reclaimed once checkpoint=enable.
+Here is another option "merge", which creates a kernel 
daemon
+and makes it to merge concurrent checkpoint requests 
as much
+as possible to eliminate redundant checkpoint issues. 
Plus,
+we can eliminate the sluggish issue caused by slow 
checkpoint
+operation when the checkpoint is done in a process 
context in
+a cgroup having low i/o budget and cpu shares.
 compress_algorithm=%s   Control compress algorithm, currently f2fs supports 
"lzo",
 "lz4", "zstd" and "lzo-rle" algorithm.
 compress_log_size=%uSupport configuring compress cluster size, the size 
will
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 897edb7c951a..ef6ad3d1957d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "f2fs.h"
 #include "node.h"
@@ -20,6 +21,8 @@
 #include "trace.h"
 #include 
 
+#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
+
 static struct kmem_cache *ino_entry_slab;
 struct kmem_cache *f2fs_inode_entry_slab;
 
@@ -1707,3 +1710,177 @@ void f2fs_destroy_checkpoint_caches(void)
kmem_cache_destroy(ino_entry_slab);
kmem_cache_destroy(f2fs_inode_entry_slab);
 }
+
+static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
+{
+   struct cp_control cpc = { .reason = CP_SYNC, };
+   int err;
+
+   down_write(&sbi->gc_lock);
+   err = f2fs_write_checkpoint(sbi, &cpc);
+   up_write(&sbi->gc_lock);
+
+   return err;
+}
+
+static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi)
+{
+   struct ckpt_req_control *cprc = &sbi->cprc_info;
+   struct ckpt_req *req, *next;
+   struct llist_node *dispatch_list;
+   u64 sum_diff = 0, diff, count = 0;
+   int ret;
+
+   dispatch_list = llist_del_all(&cprc->issue_list);
+   if (!dispatch_list)
+   return;
+   dispatch_list = llist_reverse_order(dispatch_list);
+
+   ret = __write_checkpoint_sync(sbi);
+   atomic_inc(&cprc->issued_ckpt);
+
+   llist_for_each_entry_safe(req, next, dispatch_list, llnode) {
+   diff = (u64)ktime_ms_delta(ktime_get(), req->queue_time);
+   req->ret = ret;
+   

Re: [f2fs-dev] [PATCH v2] f2fs: fix null page reference in redirty_blocks

2021-01-05 Thread Daeho Jeong
Got it~

Thanks,

2021년 1월 5일 (화) 오후 9:06, Markus Elfring 님이 작성:
>
> > Fixed null page reference when find_lock_page() fails in
> > redirty_blocks().
>
> I suggest to choose an other imperative wording for this change description.
>
> See also:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?id=36bbbd0e234d817938bdc52121a0f5473b3e58f5#n89
>
>
> > v2: changed error value and break the loop when error occurs
>
> I propose to use a return statement instead of a break in the second if branch
> for this function implementation.
>
> See also:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst?id=36bbbd0e234d817938bdc52121a0f5473b3e58f5#n481
>
> Regards,
> Markus
>
>
> ___
> Linux-f2fs-devel mailing list
> linux-f2fs-de...@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[PATCH v2] f2fs: fix null page reference in redirty_blocks

2021-01-05 Thread Daeho Jeong
From: Daeho Jeong 

By Colin's static analysis, we found out there is a null page reference
under low memory situation in redirty_blocks. I've made the page finding
loop stop immediately and return an error not to cause further memory
pressure when we run into a failure to find a page under low memory
condition.

Signed-off-by: Daeho Jeong 
Reported-by: Colin Ian King 
Fixes: 5fdb322ff2c2 ("f2fs: add F2FS_IOC_DECOMPRESS_FILE and 
F2FS_IOC_COMPRESS_FILE")
---
v2: changed error value and quit the page finding loop immediately
when error occurs
---
 fs/f2fs/file.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9e5275716be8..d27173c24391 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4060,8 +4060,10 @@ static int redirty_blocks(struct inode *inode, pgoff_t 
page_idx, int len)
 
for (i = 0; i < page_len; i++, redirty_idx++) {
page = find_lock_page(mapping, redirty_idx);
-   if (!page)
-   ret = -ENOENT;
+   if (!page) {
+   ret = -ENOMEM;
+   break;
+   }
set_page_dirty(page);
f2fs_put_page(page, 1);
f2fs_put_page(page, 0);
-- 
2.29.2.729.g45daf8777d-goog



Re: [f2fs-dev] [PATCH v2] f2fs: fix null page reference in redirty_blocks

2021-01-05 Thread Daeho Jeong
Thanks for the notice~

2021년 1월 6일 (수) 오전 10:04, Chao Yu 님이 작성:
>
> Daeho,
>
> FYI
>
> https://www.spinics.net/lists/kernel/msg3595944.html
>
> On 2021/1/5 20:04, Markus Elfring wrote:
> >> Fixed null page reference when find_lock_page() fails in
> >> redirty_blocks().
> >
> > I suggest to choose an other imperative wording for this change description.
> >
> > See also:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?id=36bbbd0e234d817938bdc52121a0f5473b3e58f5#n89
> >
> >
> >> v2: changed error value and break the loop when error occurs
> >
> > I propose to use a return statement instead of a break in the second if 
> > branch
> > for this function implementation.
> >
> > See also:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst?id=36bbbd0e234d817938bdc52121a0f5473b3e58f5#n481
> >
> > Regards,
> > Markus
> >
> >
> > ___
> > Linux-f2fs-devel mailing list
> > linux-f2fs-de...@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > .
> >
>
>
> ___
> Linux-f2fs-devel mailing list
> linux-f2fs-de...@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH 2/2] f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

2020-11-26 Thread Daeho Jeong
Chao,

Got it~

Eric,

Actually, I wanted to detour the internal readahead mechanism using
page_cache_ra_unbounded() to generate cluster size aligned read
requests.
But, page_cache_async_readahead() or page_cache_sync_readahead() can
be also good enough, since those can compensate for the misaligned
reads reading more pages in advance.

Thanks,

2020년 11월 27일 (금) 오전 2:49, Eric Biggers 님이 작성:
>
> On Thu, Nov 26, 2020 at 02:04:41PM +0900, Daeho Jeong wrote:
> > Eric,
> >
> > do_page_cache_ra() is defined in mm/internal.h for internal use
> > between in mm, so we cannot use this one right now.
> > So, I think we could use page_cache_ra_unbounded(), because we already
> > check i_size boundary on our own.
> > What do you think?
>
> What about page_cache_async_readahead() or page_cache_sync_readahead()?
>
> - Eric


Re: [f2fs-dev] [PATCH 2/2] f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

2020-11-26 Thread Daeho Jeong
Re-thinking about this, page_cache_sync_readahead() is not good for
our situation, it might end up with cluster misaligned reads which
trigger internal duplicated cluster reads.

2020년 11월 27일 (금) 오전 8:46, Daeho Jeong 님이 작성:
>
> Chao,
>
> Got it~
>
> Eric,
>
> Actually, I wanted to detour the internal readahead mechanism using
> page_cache_ra_unbounded() to generate cluster size aligned read
> requests.
> But, page_cache_async_readahead() or page_cache_sync_readahead() can
> be also good enough, since those can compensate for the misaligned
> reads reading more pages in advance.
>
> Thanks,
>
> 2020년 11월 27일 (금) 오전 2:49, Eric Biggers 님이 작성:
> >
> > On Thu, Nov 26, 2020 at 02:04:41PM +0900, Daeho Jeong wrote:
> > > Eric,
> > >
> > > do_page_cache_ra() is defined in mm/internal.h for internal use
> > > between in mm, so we cannot use this one right now.
> > > So, I think we could use page_cache_ra_unbounded(), because we already
> > > check i_size boundary on our own.
> > > What do you think?
> >
> > What about page_cache_async_readahead() or page_cache_sync_readahead()?
> >
> > - Eric


Re: [f2fs-dev] [PATCH] f2fs: add compr_inode and compr_blocks sysfs nodes

2020-11-29 Thread Daeho Jeong
Sure, but I don't think we need to expose compr_inode and compr_block right now.

2020년 11월 27일 (금) 오후 6:44, Chao Yu 님이 작성:
>
> Daeho,
>
> How about updating this patch based on below patch?
>
> f2fs: introduce a new per-sb directory in sysfs
>
> On 2020/10/22 10:53, Daeho Jeong wrote:
> > Yep, It sounds good to me.
> >
> > 2020년 10월 21일 (수) 오후 3:08, Chao Yu 님이 작성:
> >>
> >> On 2020/10/16 13:14, Daeho Jeong wrote:
> >>> From: Daeho Jeong 
> >>>
> >>> Added compr_inode to show compressed inode count and compr_blocks to
> >>> show compressed block count in sysfs.
> >>
> >> As there are so many entries in ../f2fs// directory, it looks a mess
> >> there, I suggest that we can add a new directory 'stats' in 
> >> ../f2fs//,
> >> in where we can store all readonly stats related entries there later.
> >>
> >> How do you think?
> >>
> >> Thanks,
> >>
> >>>
> >>> Signed-off-by: Daeho Jeong 
> >>> ---
> >>>Documentation/ABI/testing/sysfs-fs-f2fs | 10 ++
> >>>fs/f2fs/sysfs.c | 17 +
> >>>2 files changed, 27 insertions(+)
> >>>
> >>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
> >>> b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> index 834d0becae6d..a01c26484c69 100644
> >>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> @@ -350,3 +350,13 @@ Date:April 2020
> >>>Contact:"Daeho Jeong" 
> >>>Description:Give a way to change iostat_period time. 3secs by 
> >>> default.
> >>>The new iostat trace gives stats gap given the period.
> >>> +
> >>> +What:/sys/fs/f2fs//compr_inode
> >>> +Date:October 2020
> >>> +Contact: "Daeho Jeong" 
> >>> +Description: Show compressed inode count
> >>> +
> >>> +What:/sys/fs/f2fs//compr_blocks
> >>> +Date:October 2020
> >>> +Contact: "Daeho Jeong" 
> >>> +Description: Show compressed block count
> >>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >>> index 94c98e412aa1..7139a29a00d3 100644
> >>> --- a/fs/f2fs/sysfs.c
> >>> +++ b/fs/f2fs/sysfs.c
> >>> @@ -223,6 +223,19 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a,
> >>>f2fs_update_sit_info(sbi);
> >>>return sprintf(buf, "%llu\n", (unsigned long 
> >>> long)(si->avg_vblocks));
> >>>}
> >>> +
> >>> +static ssize_t compr_inode_show(struct f2fs_attr *a,
> >>> + struct f2fs_sb_info *sbi, char *buf)
> >>> +{
> >>> + return sprintf(buf, "%u\n", atomic_read(&sbi->compr_inode));
> >>> +}
> >>> +
> >>> +static ssize_t compr_blocks_show(struct f2fs_attr *a,
> >>> + struct f2fs_sb_info *sbi, char *buf)
> >>> +{
> >>> + return sprintf(buf, "%llu\n", atomic64_read(&sbi->compr_blocks));
> >>> +}
> >>> +
> >>>#endif
> >>>
> >>>static ssize_t main_blkaddr_show(struct f2fs_attr *a,
> >>> @@ -591,6 +604,8 @@ F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, 
> >>> gc_background_calls, bg_gc);
> >>>F2FS_GENERAL_RO_ATTR(moved_blocks_background);
> >>>F2FS_GENERAL_RO_ATTR(moved_blocks_foreground);
> >>>F2FS_GENERAL_RO_ATTR(avg_vblocks);
> >>> +F2FS_GENERAL_RO_ATTR(compr_inode);
> >>> +F2FS_GENERAL_RO_ATTR(compr_blocks);
> >>>#endif
> >>>
> >>>#ifdef CONFIG_FS_ENCRYPTION
> >>> @@ -675,6 +690,8 @@ static struct attribute *f2fs_attrs[] = {
> >>>ATTR_LIST(moved_blocks_foreground),
> >>>ATTR_LIST(moved_blocks_background),
> >>>ATTR_LIST(avg_vblocks),
> >>> + ATTR_LIST(compr_inode),
> >>> + ATTR_LIST(compr_blocks),
> >>>#endif
> >>>NULL,
> >>>};
> >>>
> > .
> >


[PATCH 1/2] f2fs: add compress_mode mount option

2020-11-22 Thread Daeho Jeong
From: Daeho Jeong 

We will add a new "compress_mode" mount option to control file
compression mode. This supports "fs-based" and "user-based".
In "fs-based" mode (default), f2fs does automatic compression on
the compression enabled files. In "user-based" mode, f2fs disables
the automaic compression and gives the user discretion of choosing
the target file and the timing. It means the user can do manual
compression/decompression on the compression enabled files using ioctls.

Signed-off-by: Daeho Jeong 
---
 Documentation/filesystems/f2fs.rst |  7 +++
 fs/f2fs/data.c | 10 +-
 fs/f2fs/f2fs.h | 30 ++
 fs/f2fs/segment.c  |  2 +-
 fs/f2fs/super.c| 23 +++
 5 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst 
b/Documentation/filesystems/f2fs.rst
index b8ee761c9922..0679c53d5012 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -260,6 +260,13 @@ compress_extension=%s   Support adding specified 
extension, so that f2fs can enab
 For other files, we can still enable compression via 
ioctl.
 Note that, there is one reserved special extension 
'*', it
 can be set to enable compression for all files.
+compress_mode=%sControl file compression mode. This supports 
"fs-based" and
+"user-based". In "fs-based" mode (default), f2fs does
+automatic compression on the compression enabled files.
+In "user-based" mode, f2fs disables the automaic 
compression
+and gives the user discretion of choosing the target 
file and
+the timing. The user can do manual 
compression/decompression
+on the compression enabled files using ioctls.
 inlinecrypt When possible, encrypt/decrypt the contents of 
encrypted
 files using the blk-crypto framework rather than
 filesystem-layer encryption. This allows the use of
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index be4da52604ed..69370f0073dd 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2896,7 +2896,7 @@ static int f2fs_write_data_page(struct page *page,
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode
goto out;
 
-   if (f2fs_compressed_file(inode)) {
+   if (f2fs_need_compress_write(inode)) {
if (f2fs_is_compressed_cluster(inode, page->index)) {
redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
@@ -2988,7 +2988,7 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
 readd:
need_readd = false;
 #ifdef CONFIG_F2FS_FS_COMPRESSION
-   if (f2fs_compressed_file(inode)) {
+   if (f2fs_need_compress_write(inode)) {
ret = f2fs_init_compress_ctx(&cc);
if (ret) {
done = 1;
@@ -3067,7 +3067,7 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
goto continue_unlock;
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
-   if (f2fs_compressed_file(inode)) {
+   if (f2fs_need_compress_write(inode)) {
get_page(page);
f2fs_compress_ctx_add_page(&cc, page);
continue;
@@ -3120,7 +3120,7 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
}
 #ifdef CONFIG_F2FS_FS_COMPRESSION
/* flush remained pages in compress cluster */
-   if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
+   if (f2fs_need_compress_write(inode) && !f2fs_cluster_is_empty(&cc)) {
ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
nwritten += submitted;
wbc->nr_to_write -= submitted;
@@ -3164,7 +3164,7 @@ static inline bool __should_serialize_io(struct inode 
*inode,
if (IS_NOQUOTA(inode))
return false;
 
-   if (f2fs_compressed_file(inode))
+   if (f2fs_need_compress_write(inode))
return true;
if (wbc->sync_mode != WB_SYNC_ALL)
return true;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e0826779a101..88e012d07ad5 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -149,6 +149,7 @@ struct f2fs_mount_info {
unsigned char compress_algorithm;   /* algorithm type */
unsigned compress_log_size; /* cluster 

[PATCH 2/2] f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

2020-11-22 Thread Daeho Jeong
From: Daeho Jeong 

Added two ioctl to decompress/compress explicitly the compression
enabled file in "compress_mode=user-based" mount option.

Using these two ioctls, the users can make a control of compression
and decompression of their files.

Signed-off-by: Daeho Jeong 
---
 fs/f2fs/file.c| 181 +-
 include/uapi/linux/f2fs.h |   2 +
 2 files changed, 182 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index be8db06aca27..e8f142470e87 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4026,6 +4026,180 @@ static int f2fs_ioc_set_compress_option(struct file 
*filp, unsigned long arg)
return ret;
 }
 
+static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
+{
+   DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx);
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+   struct address_space *mapping = inode->i_mapping;
+   struct page *page;
+   pgoff_t redirty_idx = page_idx;
+   int i, page_len = 0, ret = 0;
+
+   page_cache_ra_unbounded(&ractl, len, 0);
+
+   for (i = 0; i < len; i++, page_idx++) {
+   page = read_cache_page(mapping, page_idx, NULL, NULL);
+   if (IS_ERR(page)) {
+   ret = PTR_ERR(page);
+   f2fs_warn(sbi, "%s: inode (%lu) : page_index (%lu) "
+   "couldn't be read (errno:%d).\n",
+   __func__, inode->i_ino, page_idx, ret);
+   break;
+   }
+   page_len++;
+   }
+
+   for (i = 0; i < page_len; i++, redirty_idx++) {
+   page = find_lock_page(mapping, redirty_idx);
+   if (!page) {
+   ret = -ENOENT;
+   f2fs_warn(sbi, "%s: inode (%lu) : page_index (%lu) "
+   "couldn't be found (errno:%d).\n",
+   __func__, inode->i_ino, redirty_idx, ret);
+   }
+   set_page_dirty(page);
+   f2fs_put_page(page, 1);
+   f2fs_put_page(page, 0);
+   }
+
+   return ret;
+}
+
+static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
+{
+   struct inode *inode = file_inode(filp);
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+   struct f2fs_inode_info *fi = F2FS_I(inode);
+   pgoff_t page_idx = 0, last_idx;
+   int cluster_size = F2FS_I(inode)->i_cluster_size;
+   int count, ret;
+
+   if (!f2fs_sb_has_compression(sbi))
+   return -EOPNOTSUPP;
+
+   if (!(filp->f_mode & FMODE_WRITE))
+   return -EBADF;
+
+   if (!f2fs_compressed_file(inode))
+   return -EINVAL;
+
+   f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+   file_start_write(filp);
+   inode_lock(inode);
+
+   if (f2fs_is_mmap_file(inode)) {
+   ret = -EBUSY;
+   goto out;
+   }
+
+   ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+   if (ret)
+   goto out;
+
+   if (!atomic_read(&fi->i_compr_blocks))
+   goto out;
+
+   last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+   count = last_idx - page_idx;
+   while (count) {
+   int len = min(cluster_size, count);
+
+   ret = redirty_blocks(inode, page_idx, len);
+
+   if (ret < 0)
+   break;
+
+   page_idx += len;
+   count -= len;
+   }
+
+   if (!ret)
+   ret = filemap_write_and_wait_range(inode->i_mapping, 0,
+   LLONG_MAX);
+
+   if (!ret) {
+   stat_sub_compr_blocks(inode, atomic_read(&fi->i_compr_blocks));
+   atomic_set(&fi->i_compr_blocks, 0);
+   f2fs_mark_inode_dirty_sync(inode, true);
+   } else {
+   f2fs_warn(sbi, "%s: The file might be partially decompressed "
+   "(errno=%d). Please delete the file.\n",
+   __func__, ret);
+   }
+out:
+   inode_unlock(inode);
+   file_end_write(filp);
+
+   return ret;
+}
+
+static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg)
+{
+   struct inode *inode = file_inode(filp);
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+   pgoff_t page_idx = 0, last_idx;
+   int cluster_size = F2FS_I(inode)->i_cluster_size;
+   int count, ret;
+
+   if (!f2fs_sb_has_compression(sbi))
+   return -EOPNOTSUPP;
+
+   if (!(filp->f_mode & FMODE_WRITE))
+   return -EBADF;
+
+   if (!f2fs_compressed_file(inode))
+   return -EINVAL;
+
+   f2fs_balance_fs(F2FS_I_SB(inode), true);
+

Re: [PATCH 3.2 16/52] ext4, jbd2: ensure entering into panic after recording an error in superblock

2015-11-26 Thread Daeho Jeong
It looks good. Thank you. :-)

--- Original Message ---
Sender : Ben Hutchings
Date : 2015-11-25 07:33 (GMT+09:00)
Title : [PATCH 3.2 16/52] ext4, jbd2: ensure entering into panic after 
recording an error in superblock

3.2.74-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Daeho Jeong 

commit 4327ba52afd03fc4b5afa0ee1d774c9c5b0e85c5 upstream.

If a EXT4 filesystem utilizes JBD2 journaling and an error occurs, the
journaling will be aborted first and the error number will be recorded
into JBD2 superblock and, finally, the system will enter into the
panic state in "errors=panic" option.  But, in the rare case, this
sequence is little twisted like the below figure and it will happen
that the system enters into panic state, which means the system reset
in mobile environment, before completion of recording an error in the
journal superblock. In this case, e2fsck cannot recognize that the
filesystem failure occurred in the previous run and the corruption
wouldn't be fixed.

Task ATask B
ext4_handle_error()
-> jbd2_journal_abort()
  -> __journal_abort_soft()
-> __jbd2_journal_abort_hard()
| -> journal->j_flags |= JBD2_ABORT;
|
| __ext4_abort()
| -> jbd2_journal_abort()
| | -> __journal_abort_soft()
| |   -> if (journal->j_flags & JBD2_ABORT)
| |   return;
| -> panic()
|
-> jbd2_journal_update_sb_errno()

Tested-by: Hobin Woo 
Signed-off-by: Daeho Jeong 
Signed-off-by: Theodore Ts'o 
Signed-off-by: Ben Hutchings 
---
fs/ext4/super.c  | 12 ++--
fs/jbd2/journal.c|  6 +-
include/linux/jbd2.h |  1 +
3 files changed, 16 insertions(+), 3 deletions(-)

--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -463,9 +463,13 @@ static void ext4_handle_error(struct sup
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
sb->s_flags |= MS_RDONLY;
}
- if (test_opt(sb, ERRORS_PANIC))
+ if (test_opt(sb, ERRORS_PANIC)) {
+ if (EXT4_SB(sb)->s_journal &&
+   !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+ return;
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
+ }
}

void __ext4_error(struct super_block *sb, const char *function,
@@ -628,8 +632,12 @@ void __ext4_abort(struct super_block *sb
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
save_error_info(sb, function, line);
}
- if (test_opt(sb, ERRORS_PANIC))
+ if (test_opt(sb, ERRORS_PANIC)) {
+ if (EXT4_SB(sb)->s_journal &&
+   !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+ return;
panic("EXT4-fs panic from previous error\n");
+ }
}

void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1956,8 +1956,12 @@ static void __journal_abort_soft (journa

__jbd2_journal_abort_hard(journal);

- if (errno)
+ if (errno) {
jbd2_journal_update_sb_errno(journal);
+ write_lock(&journal->j_state_lock);
+ journal->j_flags |= JBD2_REC_ERR;
+ write_unlock(&journal->j_state_lock);
+ }
}

/**
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -954,6 +954,7 @@ struct journal_s
#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
* data write error in ordered
* mode */
+#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */

/*
  * Function declarations for the journaling transaction and buffer

Question about percpu_free_rwsem function

2016-03-06 Thread Daeho Jeong
Hi,

I got a trouble while I am developing a patch for EXT4 filesytem.
Actually, I used a percpu rw semaphore in my patch and there was no
problem when I built EXT4 filesystem as built-in mode. However, when
kbuild auto build robot built EXT4 filesystem with my patch as a "module",
a bulid error occurred because percpu_free_rwsem() function is not exported.

I noticed that other functions except percpu_free_rwsem() were exported in 2015.
Is there any reason for percpu_free_rwsem() not to be exported?
If I need the function to be exported, should I do that in my patch? :-)

Best Regards,

Re: [f2fs-dev] [PATCH] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl

2020-06-10 Thread Daeho Jeong
> > > > > +
> > > > > + if (f2fs_readonly(sbi->sb))
> > > > > + return -EROFS;
> > > >
> > > > Isn't this redundant with mnt_want_write_file()?
> > > >
> > > > Also, shouldn't write access to the file be required, i.e.
> > > > (filp->f_mode & FMODE_WRITE)?  Then the f2fs_readonly() and
> > > > mnt_want_write_file() checks would be unnecessary.
> > > >
> > >
> > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > modify the data. But I think mnt_want_write_file() is still required
> > > to prevent the filesystem from freezing or something else.
> >
> > Right, the freezing check is actually still necessary.  But getting write 
> > access
> > to the mount is not necessary.  I think you should use file_start_write() 
> > and
> > file_end_write(), like vfs_write() does.

I've checked this again.

But I think mnt_want_write_file() looks better than the combination of
checking FMODE_WRITE and file_start_write(), because
mnt_want_write_file() handles all the things we need.
It checks FMODE_WRITER, which is set in do_dentry_open() when
FMODE_WRITE is already set, and does the stuff that file_start_write()
is doing. This is why the other filesystem system calls use it.

What do you think?

2020년 6월 10일 (수) 오후 12:55, Daeho Jeong 님이 작성:
>
> > >
> > > To prevent the file data from garbage collecting, the user needs to
> > > use pinfile ioctl and fallocate system call after creating the file.
> > > The sequence is like below.
> > > 1. create an empty file
> > > 2. pinfile
> > > 3. fallocate()
> >
> > Is that persistent?  So the file will never be moved afterwards?
> >
> > Is there a place where this is (or should be) documented?
>
> Yes, this is persistent. F2FS_IOC_SET_PIN_FILE ioctl is to prevent
> file data from moving and being garbage collected, and further update
> to the file will be handled in in-place update manner.
> I don't see any document on this, but you can find the below in
> Documentation/filesystems/f2fs.rst
>
> However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
> fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addresses having
> zero or random data, which is useful to the below scenario where:
>
>  1. create(fd)
>  2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
>  3. fallocate(fd, 0, 0, size)
>  4. address = fibmap(fd, offset)
>  5. open(blkdev)
>  6. write(blkdev, address)
>
> > Right, the freezing check is actually still necessary.  But getting write 
> > access
> > to the mount is not necessary.  I think you should use file_start_write() 
> > and
> > file_end_write(), like vfs_write() does.
>
> Yes, agreed.
>
> 2020년 6월 10일 (수) 오후 12:15, Eric Biggers 님이 작성:
> >
> > On Wed, Jun 10, 2020 at 11:05:46AM +0900, Daeho Jeong wrote:
> > > > > Added a new ioctl to send discard commands or/and zero out
> > > > > to whole data area of a regular file for security reason.
> > > >
> > > > With this ioctl available, what is the exact procedure to write and 
> > > > then later
> > > > securely erase a file on f2fs?  In particular, how can the user prevent 
> > > > f2fs
> > > > from making multiple copies of file data blocks as part of garbage 
> > > > collection?
> > > >
> > >
> > > To prevent the file data from garbage collecting, the user needs to
> > > use pinfile ioctl and fallocate system call after creating the file.
> > > The sequence is like below.
> > > 1. create an empty file
> > > 2. pinfile
> > > 3. fallocate()
> >
> > Is that persistent?  So the file will never be moved afterwards?
> >
> > Is there a place where this is (or should be) documented?
> >
> > > > > +
> > > > > + if (f2fs_readonly(sbi->sb))
> > > > > + return -EROFS;
> > > >
> > > > Isn't this redundant with mnt_want_write_file()?
> > > >
> > > > Also, shouldn't write access to the file be required, i.e.
> > > > (filp->f_mode & FMODE_WRITE)?  Then the f2fs_readonly() and
> > > > mnt_want_write_file() checks would be unnecessary.
> > > >
> > >
> > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > modify the data. But I think mnt_want_write_file() is still required
> > > to prevent the filesyste

Re: [f2fs-dev] [PATCH] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl

2020-06-10 Thread Daeho Jeong
> > > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > > modify the data. But I think mnt_want_write_file() is still required
> > > > to prevent the filesystem from freezing or something else.
> > >
> > > Right, the freezing check is actually still necessary.  But getting write 
> > > access
> > > to the mount is not necessary.  I think you should use file_start_write() 
> > > and
> > > file_end_write(), like vfs_write() does.
>
> I've checked this again.
>
> But I think mnt_want_write_file() looks better than the combination of
> checking FMODE_WRITE and file_start_write(), because
> mnt_want_write_file() handles all the things we need.
> It checks FMODE_WRITER, which is set in do_dentry_open() when
> FMODE_WRITE is already set, and does the stuff that file_start_write()
> is doing. This is why the other filesystem system calls use it.
>
> What do you think?

Hmm, we still need FMODE_WRITE check.
But mnt_want_write_file() looks better, because it'll call
mnt_clone_write() internally, if the file is open for write already.

in ext4/ioctl.c
case EXT4_IOC_SWAP_BOOT:
{
int err;
if (!(filp->f_mode & FMODE_WRITE))
return -EBADF;
    err = mnt_want_write_file(filp);
if (err)
return err;2020년 6월 11일 (목) 오전 8:31, Daeho
Jeong 님이 작성:
>
> > > > > > +
> > > > > > + if (f2fs_readonly(sbi->sb))
> > > > > > + return -EROFS;
> > > > >
> > > > > Isn't this redundant with mnt_want_write_file()?
> > > > >
> > > > > Also, shouldn't write access to the file be required, i.e.
> > > > > (filp->f_mode & FMODE_WRITE)?  Then the f2fs_readonly() and
> > > > > mnt_want_write_file() checks would be unnecessary.
> > > > >
> > > >
> > > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > > modify the data. But I think mnt_want_write_file() is still required
> > > > to prevent the filesystem from freezing or something else.
> > >
> > > Right, the freezing check is actually still necessary.  But getting write 
> > > access
> > > to the mount is not necessary.  I think you should use file_start_write() 
> > > and
> > > file_end_write(), like vfs_write() does.
>
> I've checked this again.
>
> But I think mnt_want_write_file() looks better than the combination of
> checking FMODE_WRITE and file_start_write(), because
> mnt_want_write_file() handles all the things we need.
> It checks FMODE_WRITER, which is set in do_dentry_open() when
> FMODE_WRITE is already set, and does the stuff that file_start_write()
> is doing. This is why the other filesystem system calls use it.
>
> What do you think?
>
> 2020년 6월 10일 (수) 오후 12:55, Daeho Jeong 님이 작성:
> >
> > > >
> > > > To prevent the file data from garbage collecting, the user needs to
> > > > use pinfile ioctl and fallocate system call after creating the file.
> > > > The sequence is like below.
> > > > 1. create an empty file
> > > > 2. pinfile
> > > > 3. fallocate()
> > >
> > > Is that persistent?  So the file will never be moved afterwards?
> > >
> > > Is there a place where this is (or should be) documented?
> >
> > Yes, this is persistent. F2FS_IOC_SET_PIN_FILE ioctl is to prevent
> > file data from moving and being garbage collected, and further update
> > to the file will be handled in in-place update manner.
> > I don't see any document on this, but you can find the below in
> > Documentation/filesystems/f2fs.rst
> >
> > However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
> > fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addresses having
> > zero or random data, which is useful to the below scenario where:
> >
> >  1. create(fd)
> >  2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
> >  3. fallocate(fd, 0, 0, size)
> >  4. address = fibmap(fd, offset)
> >  5. open(blkdev)
> >  6. write(blkdev, address)
> >
> > > Right, the freezing check is actually still necessary.  But getting write 
> > > access
> > > to the mount is not necessary.  I think you should use file_start_write() 
> > > and
> > > file_end_write(), like vfs_write() does.
> >
> > Yes, agreed.
> >
> > 2020년 6월 10일 (수) 오후 12:15, Eric Biggers 

Re: [f2fs-dev] [PATCH] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl

2020-06-10 Thread Daeho Jeong
Yes, I saw the implementation in vfs_write().
But if we use mnt_want_write_file() here, it'll call mnt_clone_write()
internally if the file is already open in write mode.
Don't you think the below thing is needed? We can increase the counter
each of them, open and ioctl, like other filesystems such as ext4.

int mnt_clone_write(struct vfsmount *mnt)
{
/* superblock may be r/o */
if (__mnt_is_readonly(mnt))
return -EROFS;
preempt_disable();
mnt_inc_writers(real_mount(mnt));
preempt_enable();
return 0;
}

2020년 6월 11일 (목) 오전 9:00, Eric Biggers 님이 작성:
>
> On Thu, Jun 11, 2020 at 08:53:10AM +0900, Daeho Jeong wrote:
> > > > > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > > > > modify the data. But I think mnt_want_write_file() is still required
> > > > > > to prevent the filesystem from freezing or something else.
> > > > >
> > > > > Right, the freezing check is actually still necessary.  But getting 
> > > > > write access
> > > > > to the mount is not necessary.  I think you should use 
> > > > > file_start_write() and
> > > > > file_end_write(), like vfs_write() does.
> > >
> > > I've checked this again.
> > >
> > > But I think mnt_want_write_file() looks better than the combination of
> > > checking FMODE_WRITE and file_start_write(), because
> > > mnt_want_write_file() handles all the things we need.
> > > It checks FMODE_WRITER, which is set in do_dentry_open() when
> > > FMODE_WRITE is already set, and does the stuff that file_start_write()
> > > is doing. This is why the other filesystem system calls use it.
> > >
> > > What do you think?
> >
> > Hmm, we still need FMODE_WRITE check.
> > But mnt_want_write_file() looks better, because it'll call
> > mnt_clone_write() internally, if the file is open for write already.
>
> There's no need to get write access to the mount if you already have a 
> writable
> fd.  You just need file_start_write() for the freeze protection.  Again, see
> vfs_write().
>
> - Eric


Re: [f2fs-dev] [PATCH] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl

2020-06-10 Thread Daeho Jeong
Ok, I got it. Thanks for quick response~ :)

2020년 6월 11일 (목) 오전 10:56, Eric Biggers 님이 작성:
>
> On Thu, Jun 11, 2020 at 09:23:23AM +0900, Daeho Jeong wrote:
> > Yes, I saw the implementation in vfs_write().
> > But if we use mnt_want_write_file() here, it'll call mnt_clone_write()
> > internally if the file is already open in write mode.
> > Don't you think the below thing is needed? We can increase the counter
> > each of them, open and ioctl, like other filesystems such as ext4.
> >
> > int mnt_clone_write(struct vfsmount *mnt)
> > {
> > /* superblock may be r/o */
> > if (__mnt_is_readonly(mnt))
> > return -EROFS;
> > preempt_disable();
> > mnt_inc_writers(real_mount(mnt));
> > preempt_enable();
> > return 0;
> > }
>
> No, this seems to be left over from when mnt_want_write_file() was paired with
> mnt_drop_write() instead of mnt_drop_write_file().  I sent a patch to remove 
> it:
> https://lkml.kernel.org/r/20200611014945.237210-1-ebigg...@kernel.org
>
> - Eric


[PATCH] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl

2020-06-10 Thread Daeho Jeong
From: Daeho Jeong 

Added a new ioctl to send discard commands or/and zero out
to whole data area of a regular file for security reason.

Signed-off-by: Daeho Jeong 
---
 fs/f2fs/f2fs.h |   8 +++
 fs/f2fs/file.c | 143 +
 2 files changed, 151 insertions(+)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c812fb8e2d9c..ca139fac5a73 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -434,6 +434,7 @@ static inline bool __has_cursum_space(struct f2fs_journal 
*journal,
_IOR(F2FS_IOCTL_MAGIC, 18, __u64)
 #define F2FS_IOC_RESERVE_COMPRESS_BLOCKS   \
_IOR(F2FS_IOCTL_MAGIC, 19, __u64)
+#define F2FS_IOC_SEC_TRIM_FILE _IOW(F2FS_IOCTL_MAGIC, 20, __u32)
 
 #define F2FS_IOC_GET_VOLUME_NAME   FS_IOC_GETFSLABEL
 #define F2FS_IOC_SET_VOLUME_NAME   FS_IOC_SETFSLABEL
@@ -453,6 +454,13 @@ static inline bool __has_cursum_space(struct f2fs_journal 
*journal,
 #define F2FS_GOING_DOWN_METAFLUSH  0x3 /* going down with meta flush */
 #define F2FS_GOING_DOWN_NEED_FSCK  0x4 /* going down to trigger fsck */
 
+/*
+ * Flags used by F2FS_IOC_SEC_TRIM_FILE
+ */
+#define F2FS_TRIM_FILE_DISCARD 0x1 /* send discard command */
+#define F2FS_TRIM_FILE_ZEROOUT 0x2 /* zero out */
+#define F2FS_TRIM_FILE_MASK0x3
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
  * ioctl commands in 32 bit emulation
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index dfa1ac2d751a..ba9b7ec5d6bf 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3749,6 +3749,146 @@ static int f2fs_reserve_compress_blocks(struct file 
*filp, unsigned long arg)
return ret;
 }
 
+static int f2fs_secure_erase(struct block_device *bdev, block_t block,
+   block_t len, u32 flags)
+{
+   struct request_queue *q = bdev_get_queue(bdev);
+   sector_t sector = SECTOR_FROM_BLOCK(block);
+   sector_t nr_sects = SECTOR_FROM_BLOCK(len);
+   int ret = 0;
+
+   if (!q)
+   return -ENXIO;
+
+   if (flags & F2FS_TRIM_FILE_DISCARD)
+   ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
+   blk_queue_secure_erase(q) ?
+   BLKDEV_DISCARD_SECURE : 0);
+
+   if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT))
+   ret = blkdev_issue_zeroout(bdev, sector, nr_sects, GFP_NOFS, 0);
+
+   return ret;
+}
+
+static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
+{
+   struct inode *inode = file_inode(filp);
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+   struct address_space *mapping = inode->i_mapping;
+   struct block_device *prev_bdev = NULL;
+   pgoff_t index, pg_start = 0, pg_end;
+   block_t prev_block = 0, len = 0;
+   u32 flags;
+   int ret = 0;
+
+   if (!(filp->f_mode & FMODE_WRITE))
+   return -EBADF;
+
+   if (get_user(flags, (u32 __user *)arg))
+   return -EFAULT;
+   if (flags == 0 || (flags & ~F2FS_TRIM_FILE_MASK))
+   return -EINVAL;
+
+   if ((flags & F2FS_TRIM_FILE_DISCARD) && !f2fs_hw_support_discard(sbi))
+   return -EOPNOTSUPP;
+
+   file_start_write(filp);
+   inode_lock(inode);
+
+   if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode) ||
+   f2fs_compressed_file(inode)) {
+   ret = -EINVAL;
+   goto err;
+   }
+
+   if (!inode->i_size)
+   goto err;
+   pg_end = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+
+   ret = f2fs_convert_inline_inode(inode);
+   if (ret)
+   goto err;
+
+   down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+   down_write(&F2FS_I(inode)->i_mmap_sem);
+
+   ret = filemap_write_and_wait(mapping);
+   if (ret)
+   goto out;
+
+   truncate_inode_pages(mapping, 0);
+
+   for (index = pg_start; index < pg_end;) {
+   struct dnode_of_data dn;
+   unsigned int end_offset;
+
+   set_new_dnode(&dn, inode, NULL, NULL, 0);
+   ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+   if (ret)
+   goto out;
+
+   end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+   if (pg_end < end_offset + index)
+   end_offset = pg_end - index;
+
+   for (; dn.ofs_in_node < end_offset;
+   dn.ofs_in_node++, index++) {
+   struct block_device *cur_bdev;
+   block_t blkaddr = f2fs_data_blkaddr(&dn);
+
+   if (__is_valid_data_blkaddr(blkaddr)) {
+   if (!f2fs_is_valid_b

  1   2   3   >