On Mon, 2025-06-16 at 20:33 +0100, Lorenzo Stoakes wrote:
> Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file
> callback"), the f_op->mmap() hook has been deprecated in favour of
> f_op->mmap_prepare().
> 
> This callback is invoked in the mmap() logic far earlier, so error handling
> can be performed more safely without complicated and bug-prone state
> unwinding required should an error arise.
> 
> This hook also avoids passing a pointer to a not-yet-correctly-established
> VMA avoiding any issues with referencing this data structure.
> 
> It rather provides a pointer to the new struct vm_area_desc descriptor type
> which contains all required state and allows easy setting of required
> parameters without any consideration needing to be paid to locking or
> reference counts.
> 
> Note that nested filesystems like overlayfs are compatible with an
> .mmap_prepare() callback since commit bb666b7c2707 ("mm: add mmap_prepare()
> compatibility layer for nested file systems").
> 
> In this patch we apply this change to file systems with relatively simple
> mmap() hook logic - exfat, ceph, f2fs, bcachefs, zonefs, btrfs, ocfs2,
> orangefs, nilfs2, romfs, ramfs and aio.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoa...@oracle.com>
> ---
>  fs/aio.c              |  8 ++++----
>  fs/bcachefs/fs.c      |  8 ++++----
>  fs/btrfs/file.c       |  7 ++++---
>  fs/ceph/addr.c        |  5 +++--
>  fs/ceph/file.c        |  2 +-
>  fs/ceph/super.h       |  2 +-
>  fs/exfat/file.c       |  7 ++++---
>  fs/f2fs/file.c        |  7 ++++---
>  fs/nilfs2/file.c      |  8 ++++----
>  fs/ocfs2/file.c       |  4 ++--
>  fs/ocfs2/mmap.c       |  5 +++--
>  fs/ocfs2/mmap.h       |  2 +-
>  fs/orangefs/file.c    | 10 ++++++----
>  fs/ramfs/file-nommu.c | 12 ++++++------
>  fs/romfs/mmap-nommu.c |  6 +++---
>  fs/zonefs/file.c      | 10 ++++++----
>  16 files changed, 56 insertions(+), 47 deletions(-)
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 793b7b15ec4b..7fc7b6221312 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -392,15 +392,15 @@ static const struct vm_operations_struct 
> aio_ring_vm_ops = {
>  #endif
>  };
>  
> -static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
> +static int aio_ring_mmap_prepare(struct vm_area_desc *desc)
>  {
> -     vm_flags_set(vma, VM_DONTEXPAND);
> -     vma->vm_ops = &aio_ring_vm_ops;
> +     desc->vm_flags |= VM_DONTEXPAND;
> +     desc->vm_ops = &aio_ring_vm_ops;
>       return 0;
>  }
>  
>  static const struct file_operations aio_ring_fops = {
> -     .mmap = aio_ring_mmap,
> +     .mmap_prepare = aio_ring_mmap_prepare,
>  };
>  
>  #if IS_ENABLED(CONFIG_MIGRATION)
> diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
> index 3063a8ddc2df..9c2238edc0e3 100644
> --- a/fs/bcachefs/fs.c
> +++ b/fs/bcachefs/fs.c
> @@ -1553,11 +1553,11 @@ static const struct vm_operations_struct bch_vm_ops = 
> {
>       .page_mkwrite   = bch2_page_mkwrite,
>  };
>  
> -static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
> +static int bch2_mmap_prepare(struct vm_area_desc *desc)
>  {
> -     file_accessed(file);
> +     file_accessed(desc->file);
>  
> -     vma->vm_ops = &bch_vm_ops;
> +     desc->vm_ops = &bch_vm_ops;
>       return 0;
>  }
>  
> @@ -1740,7 +1740,7 @@ static const struct file_operations bch_file_operations 
> = {
>       .llseek         = bch2_llseek,
>       .read_iter      = bch2_read_iter,
>       .write_iter     = bch2_write_iter,
> -     .mmap           = bch2_mmap,
> +     .mmap_prepare   = bch2_mmap_prepare,
>       .get_unmapped_area = thp_get_unmapped_area,
>       .fsync          = bch2_fsync,
>       .splice_read    = filemap_splice_read,
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 8ce6f45f45e0..06bd30b35b95 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -1978,15 +1978,16 @@ static const struct vm_operations_struct 
> btrfs_file_vm_ops = {
>       .page_mkwrite   = btrfs_page_mkwrite,
>  };
>  
> -static int btrfs_file_mmap(struct file       *filp, struct vm_area_struct 
> *vma)
> +static int btrfs_file_mmap_prepare(struct vm_area_desc *desc)
>  {
> +     struct file *filp = desc->file;
>       struct address_space *mapping = filp->f_mapping;
>  
>       if (!mapping->a_ops->read_folio)
>               return -ENOEXEC;
>  
>       file_accessed(filp);
> -     vma->vm_ops = &btrfs_file_vm_ops;
> +     desc->vm_ops = &btrfs_file_vm_ops;
>  
>       return 0;
>  }
> @@ -3765,7 +3766,7 @@ const struct file_operations btrfs_file_operations = {
>       .splice_read    = filemap_splice_read,
>       .write_iter     = btrfs_file_write_iter,
>       .splice_write   = iter_file_splice_write,
> -     .mmap           = btrfs_file_mmap,
> +     .mmap_prepare   = btrfs_file_mmap_prepare,
>       .open           = btrfs_file_open,
>       .release        = btrfs_release_file,
>       .get_unmapped_area = thp_get_unmapped_area,
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 60a621b00c65..37522137c380 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -2330,13 +2330,14 @@ static const struct vm_operations_struct ceph_vmops = 
> {
>       .page_mkwrite   = ceph_page_mkwrite,
>  };
>  
> -int ceph_mmap(struct file *file, struct vm_area_struct *vma)
> +int ceph_mmap_prepare(struct vm_area_desc *desc)
>  {
> +     struct file *file = desc->file;
>       struct address_space *mapping = file->f_mapping;
>  
>       if (!mapping->a_ops->read_folio)
>               return -ENOEXEC;
> -     vma->vm_ops = &ceph_vmops;
> +     desc->vm_ops = &ceph_vmops;
>       return 0;
>  }
>  
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index d5c674d2ba8a..41b8ec33e864 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -3170,7 +3170,7 @@ const struct file_operations ceph_file_fops = {
>       .llseek = ceph_llseek,
>       .read_iter = ceph_read_iter,
>       .write_iter = ceph_write_iter,
> -     .mmap = ceph_mmap,
> +     .mmap_prepare = ceph_mmap_prepare,
>       .fsync = ceph_fsync,
>       .lock = ceph_lock,
>       .setlease = simple_nosetlease,
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index bb0db0cc8003..cf176aab0f82 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1286,7 +1286,7 @@ extern void __ceph_touch_fmode(struct ceph_inode_info 
> *ci,
>  /* addr.c */
>  extern const struct address_space_operations ceph_aops;
>  extern const struct netfs_request_ops ceph_netfs_ops;
> -extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
> +int ceph_mmap_prepare(struct vm_area_desc *desc);
>  extern int ceph_uninline_data(struct file *file);
>  extern int ceph_pool_perm_check(struct inode *inode, int need);
>  extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);

Looks good for CephFS.

Reviewed-by: Viacheslav Dubeyko <slava.dube...@ibm.com>

Thanks,
Slava.

> diff --git a/fs/exfat/file.c b/fs/exfat/file.c
> index 841a5b18e3df..d63213c8a823 100644
> --- a/fs/exfat/file.c
> +++ b/fs/exfat/file.c
> @@ -683,13 +683,14 @@ static const struct vm_operations_struct 
> exfat_file_vm_ops = {
>       .page_mkwrite   = exfat_page_mkwrite,
>  };
>  
> -static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma)
> +static int exfat_file_mmap_prepare(struct vm_area_desc *desc)
>  {
> +     struct file *file = desc->file;
>       if (unlikely(exfat_forced_shutdown(file_inode(file)->i_sb)))
>               return -EIO;
>  
>       file_accessed(file);
> -     vma->vm_ops = &exfat_file_vm_ops;
> +     desc->vm_ops = &exfat_file_vm_ops;
>       return 0;
>  }
>  
> @@ -710,7 +711,7 @@ const struct file_operations exfat_file_operations = {
>  #ifdef CONFIG_COMPAT
>       .compat_ioctl = exfat_compat_ioctl,
>  #endif
> -     .mmap           = exfat_file_mmap,
> +     .mmap_prepare   = exfat_file_mmap_prepare,
>       .fsync          = exfat_file_fsync,
>       .splice_read    = exfat_splice_read,
>       .splice_write   = iter_file_splice_write,
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 6bd3de64f2a8..7af2b49b7e8a 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -532,8 +532,9 @@ static loff_t f2fs_llseek(struct file *file, loff_t 
> offset, int whence)
>       return -EINVAL;
>  }
>  
> -static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
> +static int f2fs_file_mmap_prepare(struct vm_area_desc *desc)
>  {
> +     struct file *file = desc->file;
>       struct inode *inode = file_inode(file);
>  
>       if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
> @@ -543,7 +544,7 @@ static int f2fs_file_mmap(struct file *file, struct 
> vm_area_struct *vma)
>               return -EOPNOTSUPP;
>  
>       file_accessed(file);
> -     vma->vm_ops = &f2fs_file_vm_ops;
> +     desc->vm_ops = &f2fs_file_vm_ops;
>  
>       f2fs_down_read(&F2FS_I(inode)->i_sem);
>       set_inode_flag(inode, FI_MMAP_FILE);
> @@ -5376,7 +5377,7 @@ const struct file_operations f2fs_file_operations = {
>       .iopoll         = iocb_bio_iopoll,
>       .open           = f2fs_file_open,
>       .release        = f2fs_release_file,
> -     .mmap           = f2fs_file_mmap,
> +     .mmap_prepare   = f2fs_file_mmap_prepare,
>       .flush          = f2fs_file_flush,
>       .fsync          = f2fs_sync_file,
>       .fallocate      = f2fs_fallocate,
> diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
> index 0e3fc5ba33c7..1b8d754db44d 100644
> --- a/fs/nilfs2/file.c
> +++ b/fs/nilfs2/file.c
> @@ -125,10 +125,10 @@ static const struct vm_operations_struct 
> nilfs_file_vm_ops = {
>       .page_mkwrite   = nilfs_page_mkwrite,
>  };
>  
> -static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
> +static int nilfs_file_mmap_prepare(struct vm_area_desc *desc)
>  {
> -     file_accessed(file);
> -     vma->vm_ops = &nilfs_file_vm_ops;
> +     file_accessed(desc->file);
> +     desc->vm_ops = &nilfs_file_vm_ops;
>       return 0;
>  }
>  
> @@ -144,7 +144,7 @@ const struct file_operations nilfs_file_operations = {
>  #ifdef CONFIG_COMPAT
>       .compat_ioctl   = nilfs_compat_ioctl,
>  #endif       /* CONFIG_COMPAT */
> -     .mmap           = nilfs_file_mmap,
> +     .mmap_prepare   = nilfs_file_mmap_prepare,
>       .open           = generic_file_open,
>       /* .release     = nilfs_release_file, */
>       .fsync          = nilfs_sync_file,
> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
> index 2056cf08ac1e..21d797ccccd0 100644
> --- a/fs/ocfs2/file.c
> +++ b/fs/ocfs2/file.c
> @@ -2800,7 +2800,7 @@ const struct inode_operations ocfs2_special_file_iops = 
> {
>   */
>  const struct file_operations ocfs2_fops = {
>       .llseek         = ocfs2_file_llseek,
> -     .mmap           = ocfs2_mmap,
> +     .mmap_prepare   = ocfs2_mmap_prepare,
>       .fsync          = ocfs2_sync_file,
>       .release        = ocfs2_file_release,
>       .open           = ocfs2_file_open,
> @@ -2850,7 +2850,7 @@ const struct file_operations ocfs2_dops = {
>   */
>  const struct file_operations ocfs2_fops_no_plocks = {
>       .llseek         = ocfs2_file_llseek,
> -     .mmap           = ocfs2_mmap,
> +     .mmap_prepare   = ocfs2_mmap_prepare,
>       .fsync          = ocfs2_sync_file,
>       .release        = ocfs2_file_release,
>       .open           = ocfs2_file_open,
> diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
> index 6a314e9f2b49..50e2faf64c19 100644
> --- a/fs/ocfs2/mmap.c
> +++ b/fs/ocfs2/mmap.c
> @@ -159,8 +159,9 @@ static const struct vm_operations_struct 
> ocfs2_file_vm_ops = {
>       .page_mkwrite   = ocfs2_page_mkwrite,
>  };
>  
> -int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
> +int ocfs2_mmap_prepare(struct vm_area_desc *desc)
>  {
> +     struct file *file = desc->file;
>       int ret = 0, lock_level = 0;
>  
>       ret = ocfs2_inode_lock_atime(file_inode(file),
> @@ -171,7 +172,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct 
> *vma)
>       }
>       ocfs2_inode_unlock(file_inode(file), lock_level);
>  out:
> -     vma->vm_ops = &ocfs2_file_vm_ops;
> +     desc->vm_ops = &ocfs2_file_vm_ops;
>       return 0;
>  }
>  
> diff --git a/fs/ocfs2/mmap.h b/fs/ocfs2/mmap.h
> index 1051507cc684..d21c30de6b8c 100644
> --- a/fs/ocfs2/mmap.h
> +++ b/fs/ocfs2/mmap.h
> @@ -2,6 +2,6 @@
>  #ifndef OCFS2_MMAP_H
>  #define OCFS2_MMAP_H
>  
> -int ocfs2_mmap(struct file *file, struct vm_area_struct *vma);
> +int ocfs2_mmap_prepare(struct vm_area_desc *desc);
>  
>  #endif  /* OCFS2_MMAP_H */
> diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
> index 90c49c0de243..919f99b16834 100644
> --- a/fs/orangefs/file.c
> +++ b/fs/orangefs/file.c
> @@ -398,8 +398,9 @@ static const struct vm_operations_struct 
> orangefs_file_vm_ops = {
>  /*
>   * Memory map a region of a file.
>   */
> -static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
> +static int orangefs_file_mmap_prepare(struct vm_area_desc *desc)
>  {
> +     struct file *file = desc->file;
>       int ret;
>  
>       ret = orangefs_revalidate_mapping(file_inode(file));
> @@ -410,10 +411,11 @@ static int orangefs_file_mmap(struct file *file, struct 
> vm_area_struct *vma)
>                    "orangefs_file_mmap: called on %pD\n", file);
>  
>       /* set the sequential readahead hint */
> -     vm_flags_mod(vma, VM_SEQ_READ, VM_RAND_READ);
> +     desc->vm_flags |= VM_SEQ_READ;
> +     desc->vm_flags &= ~VM_RAND_READ;
>  
>       file_accessed(file);
> -     vma->vm_ops = &orangefs_file_vm_ops;
> +     desc->vm_ops = &orangefs_file_vm_ops;
>       return 0;
>  }
>  
> @@ -574,7 +576,7 @@ const struct file_operations orangefs_file_operations = {
>       .read_iter      = orangefs_file_read_iter,
>       .write_iter     = orangefs_file_write_iter,
>       .lock           = orangefs_lock,
> -     .mmap           = orangefs_file_mmap,
> +     .mmap_prepare   = orangefs_file_mmap_prepare,
>       .open           = generic_file_open,
>       .splice_read    = orangefs_file_splice_read,
>       .splice_write   = iter_file_splice_write,
> diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
> index 7a6d980e614d..77b8ca2757e0 100644
> --- a/fs/ramfs/file-nommu.c
> +++ b/fs/ramfs/file-nommu.c
> @@ -28,7 +28,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct 
> file *file,
>                                                  unsigned long len,
>                                                  unsigned long pgoff,
>                                                  unsigned long flags);
> -static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
> +static int ramfs_nommu_mmap_prepare(struct vm_area_desc *desc);
>  
>  static unsigned ramfs_mmap_capabilities(struct file *file)
>  {
> @@ -38,7 +38,7 @@ static unsigned ramfs_mmap_capabilities(struct file *file)
>  
>  const struct file_operations ramfs_file_operations = {
>       .mmap_capabilities      = ramfs_mmap_capabilities,
> -     .mmap                   = ramfs_nommu_mmap,
> +     .mmap_prepare           = ramfs_nommu_mmap_prepare,
>       .get_unmapped_area      = ramfs_nommu_get_unmapped_area,
>       .read_iter              = generic_file_read_iter,
>       .write_iter             = generic_file_write_iter,
> @@ -262,12 +262,12 @@ static unsigned long 
> ramfs_nommu_get_unmapped_area(struct file *file,
>  /*
>   * set up a mapping for shared memory segments
>   */
> -static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
> +static int ramfs_nommu_mmap_prepare(struct vm_area_desc *desc)
>  {
> -     if (!is_nommu_shared_mapping(vma->vm_flags))
> +     if (!is_nommu_shared_mapping(desc->vm_flags))
>               return -ENOSYS;
>  
> -     file_accessed(file);
> -     vma->vm_ops = &generic_file_vm_ops;
> +     file_accessed(desc->file);
> +     desc->vm_ops = &generic_file_vm_ops;
>       return 0;
>  }
> diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
> index 4520ca413867..4b77c6dc4418 100644
> --- a/fs/romfs/mmap-nommu.c
> +++ b/fs/romfs/mmap-nommu.c
> @@ -61,9 +61,9 @@ static unsigned long romfs_get_unmapped_area(struct file 
> *file,
>   * permit a R/O mapping to be made directly through onto an MTD device if
>   * possible
>   */
> -static int romfs_mmap(struct file *file, struct vm_area_struct *vma)
> +static int romfs_mmap_prepare(struct vm_area_desc *desc)
>  {
> -     return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS;
> +     return is_nommu_shared_mapping(desc->vm_flags) ? 0 : -ENOSYS;
>  }
>  
>  static unsigned romfs_mmap_capabilities(struct file *file)
> @@ -79,7 +79,7 @@ const struct file_operations romfs_ro_fops = {
>       .llseek                 = generic_file_llseek,
>       .read_iter              = generic_file_read_iter,
>       .splice_read            = filemap_splice_read,
> -     .mmap                   = romfs_mmap,
> +     .mmap_prepare           = romfs_mmap_prepare,
>       .get_unmapped_area      = romfs_get_unmapped_area,
>       .mmap_capabilities      = romfs_mmap_capabilities,
>  };
> diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
> index 42e2c0065bb3..c1848163b378 100644
> --- a/fs/zonefs/file.c
> +++ b/fs/zonefs/file.c
> @@ -312,8 +312,10 @@ static const struct vm_operations_struct 
> zonefs_file_vm_ops = {
>       .page_mkwrite   = zonefs_filemap_page_mkwrite,
>  };
>  
> -static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma)
> +static int zonefs_file_mmap_prepare(struct vm_area_desc *desc)
>  {
> +     struct file *file = desc->file;
> +
>       /*
>        * Conventional zones accept random writes, so their files can support
>        * shared writable mappings. For sequential zone files, only read
> @@ -321,11 +323,11 @@ static int zonefs_file_mmap(struct file *file, struct 
> vm_area_struct *vma)
>        * ordering between msync() and page cache writeback.
>        */
>       if (zonefs_inode_is_seq(file_inode(file)) &&
> -         (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
> +         (desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE))
>               return -EINVAL;
>  
>       file_accessed(file);
> -     vma->vm_ops = &zonefs_file_vm_ops;
> +     desc->vm_ops = &zonefs_file_vm_ops;
>  
>       return 0;
>  }
> @@ -850,7 +852,7 @@ const struct file_operations zonefs_file_operations = {
>       .open           = zonefs_file_open,
>       .release        = zonefs_file_release,
>       .fsync          = zonefs_file_fsync,
> -     .mmap           = zonefs_file_mmap,
> +     .mmap_prepare   = zonefs_file_mmap_prepare,
>       .llseek         = zonefs_file_llseek,
>       .read_iter      = zonefs_file_read_iter,
>       .write_iter     = zonefs_file_write_iter,

Reply via email to