On Thu, Mar 06, 2025 at 12:25:53AM +0900, Ryosuke Yasuoka wrote:
> Some drivers can use vmap in drm_panic, however, vmap is sleepable and
> takes locks. Since drm_panic will vmap in panic handler, atomic_vmap
> requests pages with GFP_ATOMIC and maps KVA without locks and sleep.
> 
> Signed-off-by: Ryosuke Yasuoka <ryasu...@redhat.com>
> ---
>  include/linux/vmalloc.h |   2 +
>  mm/internal.h           |   5 ++
>  mm/vmalloc.c            | 105 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 112 insertions(+)
> 
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index 31e9ffd936e3..c7a2a9a1976d 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -190,6 +190,8 @@ void * __must_check vrealloc_noprof(const void *p, size_t 
> size, gfp_t flags)
>  extern void vfree(const void *addr);
>  extern void vfree_atomic(const void *addr);
>  
> +extern void *atomic_vmap(struct page **pages, unsigned int count,
> +                      unsigned long flags, pgprot_t prot);
>  extern void *vmap(struct page **pages, unsigned int count,
>                       unsigned long flags, pgprot_t prot);
>  void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot);
> diff --git a/mm/internal.h b/mm/internal.h
> index 109ef30fee11..134b332bf5b9 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1278,6 +1278,11 @@ int numa_migrate_check(struct folio *folio, struct 
> vm_fault *vmf,
>  void free_zone_device_folio(struct folio *folio);
>  int migrate_device_coherent_folio(struct folio *folio);
>  
> +struct vm_struct *atomic_get_vm_area_node(unsigned long size, unsigned long 
> align,
> +                                       unsigned long shift, unsigned long 
> flags,
> +                                       unsigned long start, unsigned long 
> end, int node,
> +                                       gfp_t gfp_mask, const void *caller);
> +
>  struct vm_struct *__get_vm_area_node(unsigned long size,
>                                    unsigned long align, unsigned long shift,
>                                    unsigned long flags, unsigned long start,
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index a6e7acebe9ad..f5c93779c60a 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1945,6 +1945,57 @@ static inline void setup_vmalloc_vm(struct vm_struct 
> *vm,
>       va->vm = vm;
>  }
>  
> +static struct vmap_area *atomic_alloc_vmap_area(unsigned long size,
> +                                             unsigned long align,
> +                                             unsigned long vstart, unsigned 
> long vend,
> +                                             int node, gfp_t gfp_mask,
> +                                             unsigned long va_flags, struct 
> vm_struct *vm)
> +{
> +     struct vmap_node *vn;
> +     struct vmap_area *va;
> +     unsigned long addr;
> +
> +     if (unlikely(!size || offset_in_page(size) || !is_power_of_2(align)))
> +             return ERR_PTR(-EINVAL);
> +
> +     if (unlikely(!vmap_initialized))
> +             return ERR_PTR(-EBUSY);
> +
> +     va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
> +     if (unlikely(!va))
> +             return ERR_PTR(-ENOMEM);
> +
> +     /*
> +      * Only scan the relevant parts containing pointers to other objects
> +      * to avoid false negatives.
> +      */
> +     kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
> +
> +     addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
> +                              size, align, vstart, vend);
> +
> +     trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);
> +
> +     va->va_start = addr;
> +     va->va_end = addr + size;
> +     va->vm = NULL;
> +     va->flags = va_flags;
> +
> +     vm->addr = (void *)va->va_start;
> +     vm->size = va_size(va);
> +     va->vm = vm;
> +
> +     vn = addr_to_node(va->va_start);
> +
> +     insert_vmap_area(va, &vn->busy.root, &vn->busy.head);
> +
> +     BUG_ON(!IS_ALIGNED(va->va_start, align));
> +     BUG_ON(va->va_start < vstart);
> +     BUG_ON(va->va_end > vend);
> +
> +     return va;
> +}
> +
>  /*
>   * Allocate a region of KVA of the specified size and alignment, within the
>   * vstart and vend. If vm is passed in, the two will also be bound.
> @@ -3106,6 +3157,33 @@ static void clear_vm_uninitialized_flag(struct 
> vm_struct *vm)
>       vm->flags &= ~VM_UNINITIALIZED;
>  }
>  
> +struct vm_struct *atomic_get_vm_area_node(unsigned long size, unsigned long 
> align,
> +                                       unsigned long shift, unsigned long 
> flags,
> +                                       unsigned long start, unsigned long 
> end, int node,
> +                                       gfp_t gfp_mask, const void *caller)
> +{
> +     struct vmap_area *va;
> +     struct vm_struct *area;
> +
> +     size = ALIGN(size, 1ul << shift);
> +     if (unlikely(!size))
> +             return NULL;
> +
> +     area = kzalloc_node(sizeof(*area), gfp_mask, node);
> +     if (unlikely(!area))
> +             return NULL;
> +
> +     size += PAGE_SIZE;
> +     area->flags = flags;
> +     area->caller = caller;
> +
> +     va = atomic_alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, 
> area);
> +     if (IS_ERR(va))
> +             return NULL;
> +
> +     return area;
> +}
> +
>  struct vm_struct *__get_vm_area_node(unsigned long size,
>               unsigned long align, unsigned long shift, unsigned long flags,
>               unsigned long start, unsigned long end, int node,
> @@ -3418,6 +3496,33 @@ void vunmap(const void *addr)
>  }
>  EXPORT_SYMBOL(vunmap);
>  
> +void *atomic_vmap(struct page **pages, unsigned int count,
> +               unsigned long flags, pgprot_t prot)
> +{
> +     struct vm_struct *area;
> +     unsigned long addr;
> +     unsigned long size;             /* In bytes */
> +
> +     if (count > totalram_pages())
> +             return NULL;
> +
> +     size = (unsigned long)count << PAGE_SHIFT;
> +     area = atomic_get_vm_area_node(size, 1, PAGE_SHIFT, flags,
> +                                    VMALLOC_START, VMALLOC_END,
> +                                    NUMA_NO_NODE, GFP_ATOMIC,
> +                                    __builtin_return_address(0));
> +     if (!area)
> +             return NULL;
> +
> +     addr = (unsigned long)area->addr;
> +     if (vmap_pages_range(addr, addr + size, pgprot_nx(prot),
> +                          pages, PAGE_SHIFT) < 0) {
> +             return NULL;
> +     }
> +
> +     return area->addr;
> +}
> +
>  /**
>   * vmap - map an array of pages into virtually contiguous space
>   * @pages: array of page pointers
> -- 
> 2.48.1
> 
It is copy-paste code, so it is odd. The proposal is not a way forward
to me. Unfortunately vmalloc is not compatible with GFP_ATOMIC, there
is at least one place it is a page-table allocation entries where it is
hard-coded to the GFP_KERNEL.

Doing this without locks and synchronizations is not possible.

--
Uladzislau Rezki

Reply via email to