amdgpu: Don't try moving BOs to preferred domain before submit

Friedrich Vock Wed, 24 Apr 2024 10:01:34 -0700

TTM now takes care of moving buffers to the best possible domain.

Signed-off-by: Friedrich Vock <friedrich.v...@gmx.de>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 191 +--------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h     |   4 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   7 -
 4 files changed, 3 insertions(+), 201 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cac0ca64367b3..3004adc6fa679 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1404,8 +1404,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev);
 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev);
 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);

-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
-                                 u64 num_vis_bytes);
 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
                                             const u32 *registers,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e9168677ef0a6..92a0cffc1adc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -638,196 +638,19 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
        return 0;
 }

-/* Convert microseconds to bytes. */
-static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
-{
-       if (us <= 0 || !adev->mm_stats.log2_max_MBps)
-               return 0;
-
-       /* Since accum_us is incremented by a million per second, just
-        * multiply it by the number of MB/s to get the number of bytes.
-        */
-       return us << adev->mm_stats.log2_max_MBps;
-}
-
-static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
-{
-       if (!adev->mm_stats.log2_max_MBps)
-               return 0;
-
-       return bytes >> adev->mm_stats.log2_max_MBps;
-}
-
-/* Returns how many bytes TTM can move right now. If no bytes can be moved,
- * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
- * which means it can go over the threshold once. If that happens, the driver
- * will be in debt and no other buffer migrations can be done until that debt
- * is repaid.
- *
- * This approach allows moving a buffer of any size (it's important to allow
- * that).
- *
- * The currency is simply time in microseconds and it increases as the clock
- * ticks. The accumulated microseconds (us) are converted to bytes and
- * returned.
- */
-static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
-                                             u64 *max_bytes,
-                                             u64 *max_vis_bytes)
-{
-       s64 time_us, increment_us;
-       u64 free_vram, total_vram, used_vram;
-       /* Allow a maximum of 200 accumulated ms. This is basically per-IB
-        * throttling.
-        *
-        * It means that in order to get full max MBps, at least 5 IBs per
-        * second must be submitted and not more than 200ms apart from each
-        * other.
-        */
-       const s64 us_upper_bound = 200000;
-
-       if (!adev->mm_stats.log2_max_MBps) {
-               *max_bytes = 0;
-               *max_vis_bytes = 0;
-               return;
-       }
-
-       total_vram = adev->gmc.real_vram_size - 
atomic64_read(&adev->vram_pin_size);
-       used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
-       free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
-
-       spin_lock(&adev->mm_stats.lock);
-
-       /* Increase the amount of accumulated us. */
-       time_us = ktime_to_us(ktime_get());
-       increment_us = time_us - adev->mm_stats.last_update_us;
-       adev->mm_stats.last_update_us = time_us;
-       adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
-                                     us_upper_bound);
-
-       /* This prevents the short period of low performance when the VRAM
-        * usage is low and the driver is in debt or doesn't have enough
-        * accumulated us to fill VRAM quickly.
-        *
-        * The situation can occur in these cases:
-        * - a lot of VRAM is freed by userspace
-        * - the presence of a big buffer causes a lot of evictions
-        *   (solution: split buffers into smaller ones)
-        *
-        * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
-        * accum_us to a positive number.
-        */
-       if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
-               s64 min_us;
-
-               /* Be more aggressive on dGPUs. Try to fill a portion of free
-                * VRAM now.
-                */
-               if (!(adev->flags & AMD_IS_APU))
-                       min_us = bytes_to_us(adev, free_vram / 4);
-               else
-                       min_us = 0; /* Reset accum_us on APUs. */
-
-               adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
-       }
-
-       /* This is set to 0 if the driver is in debt to disallow (optional)
-        * buffer moves.
-        */
-       *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
-
-       /* Do the same for visible VRAM if half of it is free */
-       if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
-               u64 total_vis_vram = adev->gmc.visible_vram_size;
-               u64 used_vis_vram =
-                 amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
-
-               if (used_vis_vram < total_vis_vram) {
-                       u64 free_vis_vram = total_vis_vram - used_vis_vram;
-
-                       adev->mm_stats.accum_us_vis = 
min(adev->mm_stats.accum_us_vis +
-                                                         increment_us, 
us_upper_bound);
-
-                       if (free_vis_vram >= total_vis_vram / 2)
-                               adev->mm_stats.accum_us_vis =
-                                       max(bytes_to_us(adev, free_vis_vram / 
2),
-                                           adev->mm_stats.accum_us_vis);
-               }
-
-               *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
-       } else {
-               *max_vis_bytes = 0;
-       }
-
-       spin_unlock(&adev->mm_stats.lock);
-}
-
-/* Report how many bytes have really been moved for the last command
- * submission. This can result in a debt that can stop buffer migrations
- * temporarily.
- */
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
-                                 u64 num_vis_bytes)
-{
-       spin_lock(&adev->mm_stats.lock);
-       adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
-       adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
-       spin_unlock(&adev->mm_stats.lock);
-}
-
 static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
 {
-       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       struct amdgpu_cs_parser *p = param;
        struct ttm_operation_ctx ctx = {
                .interruptible = true,
                .no_wait_gpu = false,
                .resv = bo->tbo.base.resv
        };
-       uint32_t domain;
-       int r;

        if (bo->tbo.pin_count)
                return 0;

-       /* Don't move this buffer if we have depleted our allowance
-        * to move it. Don't move anything if the threshold is zero.
-        */
-       if (p->bytes_moved < p->bytes_moved_threshold &&
-           (!bo->tbo.base.dma_buf ||
-           list_empty(&bo->tbo.base.dma_buf->attachments))) {
-               if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
-                   (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
-                       /* And don't move a CPU_ACCESS_REQUIRED BO to limited
-                        * visible VRAM if we've depleted our allowance to do
-                        * that.
-                        */
-                       if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
-                               domain = bo->preferred_domains;
-                       else
-                               domain = bo->allowed_domains;
-               } else {
-                       domain = bo->preferred_domains;
-               }
-       } else {
-               domain = bo->allowed_domains;
-       }
-
-retry:
-       amdgpu_bo_placement_from_domain(bo, domain);
-       r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-
-       p->bytes_moved += ctx.bytes_moved;
-       if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
-           amdgpu_res_cpu_visible(adev, bo->tbo.resource))
-               p->bytes_moved_vis += ctx.bytes_moved;
-
-       if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
-               domain = bo->allowed_domains;
-               goto retry;
-       }
-
-       return r;
+       amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+       return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 }

 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
@@ -947,13 +770,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                e->user_pages = NULL;
        }

-       amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
-                                         &p->bytes_moved_vis_threshold);
-       p->bytes_moved = 0;
-       p->bytes_moved_vis = 0;
-
        r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
-                              amdgpu_cs_bo_validate, p);
+                              amdgpu_cs_bo_validate, NULL);
        if (r) {
                DRM_ERROR("amdgpu_vm_validate() failed.\n");
                goto out_free_user_pages;
@@ -973,9 +791,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);
        }

-       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
-                                    p->bytes_moved_vis);
-
        for (i = 0; i < p->gang_size; ++i)
                amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
                                         p->bo_list->gws_obj,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
index 39c33ad100cb7..e3d04ac4764be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@@ -67,10 +67,6 @@ struct amdgpu_cs_parser {
        struct amdgpu_bo_list           *bo_list;
        struct amdgpu_mn                *mn;
        struct dma_fence                *fence;
-       uint64_t                        bytes_moved_threshold;
-       uint64_t                        bytes_moved_vis_threshold;
-       uint64_t                        bytes_moved;
-       uint64_t                        bytes_moved_vis;

        /* user fence */
        struct amdgpu_bo                *uf_bo;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 331b9ed8062c7..5834a95d680d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -621,13 +621,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
        if (unlikely(r != 0))
                return r;

-       if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
-           amdgpu_res_cpu_visible(adev, bo->tbo.resource))
-               amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
-                                            ctx.bytes_moved);
-       else
-               amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
-
        if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
            bo->tbo.resource->mem_type == TTM_PL_VRAM) {
                struct dma_fence *fence;
--
2.44.0

[RFC PATCH 08/18] drm/amdgpu: Don't try moving BOs to preferred domain before submit

Reply via email to