Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead

Felix Kuehling Tue, 13 Jul 2021 13:12:32 -0700

Am 2021-07-13 um 9:32 a.m. schrieb Christian König:
> For allocations larger than 48MiB we need more than a page for the
> housekeeping in the worst case resulting in the usual vmalloc overhead.
>
> Try to avoid this by assuming the good case and only falling back to the
> worst case if this didn't worked.
>
> Signed-off-by: Christian König <christian.koe...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 80 +++++++++++++++-----
>  1 file changed, 60 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index 2fd77c36a1ff..ab8c5e28df7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -361,19 +361,23 @@ static void amdgpu_vram_mgr_virt_start(struct 
> ttm_resource *mem,
>   * @man: TTM memory type manager
>   * @tbo: TTM BO we need this range for
>   * @place: placement flags and restrictions
> - * @mem: the resulting mem object
> + * @num_nodes: number of page nodes to use.
> + * @pages_per_node: number of pages per node to use.
> + * @res: the resulting mem object
>   *
>   * Allocate VRAM for the given BO.
>   */
>  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                              struct ttm_buffer_object *tbo,
>                              const struct ttm_place *place,
> +                            unsigned long num_nodes,
> +                            unsigned long pages_per_node,
>                              struct ttm_resource **res)
>  {
> -     unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>       struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>       struct amdgpu_device *adev = to_amdgpu_device(mgr);
>       uint64_t vis_usage = 0, mem_bytes, max_bytes;
> +     unsigned long lpfn, pages_left, pages;
>       struct ttm_range_mgr_node *node;
>       struct drm_mm *mm = &mgr->mm;
>       enum drm_mm_insert_mode mode;
> @@ -395,21 +399,6 @@ static int amdgpu_vram_mgr_new(struct 
> ttm_resource_manager *man,
>               goto error_sub;
>       }
>  
> -     if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> -             pages_per_node = ~0ul;
> -             num_nodes = 1;
> -     } else {
> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> -             pages_per_node = HPAGE_PMD_NR;
> -#else
> -             /* default to 2MB */
> -             pages_per_node = 2UL << (20UL - PAGE_SHIFT);
> -#endif
> -             pages_per_node = max_t(uint32_t, pages_per_node,
> -                                    tbo->page_alignment);
> -             num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
> -     }
> -
>       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>                       GFP_KERNEL | __GFP_ZERO);
>       if (!node) {
> @@ -431,10 +420,15 @@ static int amdgpu_vram_mgr_new(struct 
> ttm_resource_manager *man,
>       i = 0;
>       spin_lock(&mgr->lock);
>       while (pages_left) {
> -             uint32_t alignment = tbo->page_alignment;
> +             unsigned long alignment = tbo->page_alignment;
> +
> +             if (i >= num_nodes) {
> +                     r = -E2BIG;
> +                     goto error_free;
> +             }
>  
>               if (pages >= pages_per_node)
> -                     alignment = pages_per_node;
> +                     alignment = max(alignment, pages_per_node);


I don't understand this change. Is this an unrelated fix? pages_per_node
is already bumped up to tbo->page_alignment in amdgpu_vram_mgr_alloc. So
this "max" operation here seems redundant.

Other than that, the patch is

Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>

@JinHuiEric, can you confirm the performance improvement?

Thanks,
  Felix


>  
>               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>                                               alignment, 0, place->fpfn,
> @@ -483,6 +477,52 @@ static int amdgpu_vram_mgr_new(struct 
> ttm_resource_manager *man,
>       return r;
>  }
>  
> +/**
> + * amdgpu_vram_mgr_alloc - allocate new range
> + *
> + * @man: TTM memory type manager
> + * @tbo: TTM BO we need this range for
> + * @place: placement flags and restrictions
> + * @res: the resulting mem object
> + *
> + * Allocate VRAM for the given BO.
> + */
> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
> +                              struct ttm_buffer_object *tbo,
> +                              const struct ttm_place *place,
> +                              struct ttm_resource **res)
> +{
> +     unsigned long num_nodes, pages_per_node;
> +     struct ttm_range_mgr_node *node;
> +     int r;
> +
> +     if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
> +             return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, res);
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +     pages_per_node = HPAGE_PMD_NR;
> +#else
> +     /* default to 2MB */
> +     pages_per_node = 2UL << (20UL - PAGE_SHIFT);
> +#endif
> +     pages_per_node = max_t(uint32_t, pages_per_node, tbo->page_alignment);
> +     num_nodes = DIV_ROUND_UP_ULL(PFN_UP(tbo->base.size), pages_per_node);
> +
> +     if (struct_size(node, mm_nodes, num_nodes) > PAGE_SIZE) {
> +             size_t size = PAGE_SIZE;
> +
> +             size -= sizeof(struct ttm_range_mgr_node);
> +             size /= sizeof(struct drm_mm_node);
> +             r = amdgpu_vram_mgr_new(man, tbo, place, size, pages_per_node,
> +                                     res);
> +             if (r != -E2BIG)
> +                     return r;
> +     }
> +
> +     return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
> +                                res);
> +}
> +
>  /**
>   * amdgpu_vram_mgr_del - free ranges
>   *
> @@ -680,7 +720,7 @@ static void amdgpu_vram_mgr_debug(struct 
> ttm_resource_manager *man,
>  }
>  
>  static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
> -     .alloc  = amdgpu_vram_mgr_new,
> +     .alloc  = amdgpu_vram_mgr_alloc,
>       .free   = amdgpu_vram_mgr_del,
>       .debug  = amdgpu_vram_mgr_debug
>  };
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead

Reply via email to