* David Hildenbrand <da...@redhat.com> [250512 08:34]:
> Let's provide variants of track_pfn_remap() and untrack_pfn() that won't
> mess with VMAs, and replace the usage in mm/memremap.c.
> 
> Add some documentation.
> 
> Reviewed-by: Lorenzo Stoakes <lorenzo.stoa...@oracle.com>
> Acked-by: Ingo Molnar <mi...@kernel.org> # x86 bits
> Signed-off-by: David Hildenbrand <da...@redhat.com>

Small nit with this one, but either way:

Reviewed-by: Liam R. Howlett <liam.howl...@oracle.com>

> ---
>  arch/x86/mm/pat/memtype.c | 14 ++++++++++++++
>  include/linux/pgtable.h   | 39 +++++++++++++++++++++++++++++++++++++++
>  mm/memremap.c             |  8 ++++----
>  3 files changed, 57 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
> index fa78facc6f633..1ec8af6cad6bf 100644
> --- a/arch/x86/mm/pat/memtype.c
> +++ b/arch/x86/mm/pat/memtype.c
> @@ -1068,6 +1068,20 @@ int pfnmap_setup_cachemode(unsigned long pfn, unsigned 
> long size, pgprot_t *prot
>       return 0;
>  }
>  
> +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot)
> +{
> +     const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;

Here, the << PAGE_SHIFT isn't really needed, because..

> +
> +     return reserve_pfn_range(paddr, size, prot, 0);
> +}
> +
> +void pfnmap_untrack(unsigned long pfn, unsigned long size)
> +{
> +     const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
> +
> +     free_pfn_range(paddr, size);
> +}
> +
>  /*
>   * untrack_pfn is called while unmapping a pfnmap for a region.
>   * untrack can be called for a specific region indicated by pfn and size or
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index be1745839871c..90f72cd358390 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -1502,6 +1502,16 @@ static inline int pfnmap_setup_cachemode(unsigned long 
> pfn, unsigned long size,
>       return 0;
>  }
>  
> +static inline int pfnmap_track(unsigned long pfn, unsigned long size,
> +             pgprot_t *prot)
> +{
> +     return 0;
> +}
> +
> +static inline void pfnmap_untrack(unsigned long pfn, unsigned long size)
> +{
> +}
> +
>  /*
>   * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
>   * tables copied during copy_page_range(). Will store the pfn to be
> @@ -1575,6 +1585,35 @@ extern int track_pfn_remap(struct vm_area_struct *vma, 
> pgprot_t *prot,
>   */
>  int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
>               pgprot_t *prot);
> +
> +/**
> + * pfnmap_track - track a pfn range
> + * @pfn: the start of the pfn range
> + * @size: the size of the pfn range in bytes
> + * @prot: the pgprot to track
> + *
> + * Requested the pfn range to be 'tracked' by a hardware implementation and
> + * setup the cachemode in @prot similar to pfnmap_setup_cachemode().
> + *
> + * This allows for fine-grained control of memory cache behaviour at page
> + * level granularity. Tracking memory this way is persisted across VMA splits
> + * (VMA merging does not apply for VM_PFNMAP).
> + *
> + * Currently, there is only one implementation for this - x86 Page Attribute
> + * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
> + *
> + * Returns 0 on success and -EINVAL on error.
> + */
> +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot);
> +
> +/**
> + * pfnmap_untrack - untrack a pfn range
> + * @pfn: the start of the pfn range
> + * @size: the size of the pfn range in bytes
> + *
> + * Untrack a pfn range previously tracked through pfnmap_track().
> + */
> +void pfnmap_untrack(unsigned long pfn, unsigned long size);
>  extern int track_pfn_copy(struct vm_area_struct *dst_vma,
>               struct vm_area_struct *src_vma, unsigned long *pfn);
>  extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
> diff --git a/mm/memremap.c b/mm/memremap.c
> index 2aebc1b192da9..c417c843e9b1f 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -130,7 +130,7 @@ static void pageunmap_range(struct dev_pagemap *pgmap, 
> int range_id)
>       }
>       mem_hotplug_done();
>  
> -     untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
> +     pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
>       pgmap_array_delete(range);
>  }
>  
> @@ -211,8 +211,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, 
> struct mhp_params *params,
>       if (nid < 0)
>               nid = numa_mem_id();
>  
> -     error = track_pfn_remap(NULL, &params->pgprot, PHYS_PFN(range->start), 
> 0,
> -                     range_len(range));
> +     error = pfnmap_track(PHYS_PFN(range->start), range_len(range),

This user (of two) converts the range->start to the pfn.

The other user is pfnmap_track_ctx_alloc() in mm/memory.c which is
called from remap_pfn_range(), which also has addr.

Couldn't we just use the address directly?

I think the same holds for untrack as well.

> +                          &params->pgprot);
>       if (error)
>               goto err_pfn_remap;
>  
> @@ -277,7 +277,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, 
> struct mhp_params *params,
>       if (!is_private)
>               kasan_remove_zero_shadow(__va(range->start), range_len(range));
>  err_kasan:
> -     untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
> +     pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
>  err_pfn_remap:
>       pgmap_array_delete(range);
>       return error;
> -- 
> 2.49.0
> 

Reply via email to