By moving the booleans to flags and shrinking some fields we can stop
spilling job allocation into the 1k SLAB even with two appended indirect
buffers.

End result for struct amdgpu_job:

        /* size: 448, cachelines: 7, members: 24 */
        /* forced alignments: 1 */

So appending two IB buffers, which seems to be a typical case when
running a game such as Cyberpunk 2077, makes every submission perfectly
fit into the 512 byte SLAB with no wastage.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@igalia.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 19 ++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c      |  7 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c     | 29 +++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  6 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h     | 36 +++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 17 +++++-----
 11 files changed, 72 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5cc5f59e3018..181296ea9df7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -108,13 +108,15 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
                           struct drm_amdgpu_cs_chunk_ib *chunk_ib,
                           unsigned int *num_ibs)
 {
+       struct amdgpu_job *job;
        int r;
 
        r = amdgpu_cs_job_idx(p, chunk_ib);
        if (r < 0)
                return r;
 
-       if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
+       if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type) ||
+           WARN_ON_ONCE(overflows_type(num_ibs[r], typeof(job->num_ibs))))
                return -EINVAL;
 
        ++(num_ibs[r]);
@@ -296,7 +298,8 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
                                       num_ibs[i], &p->jobs[i]);
                if (ret)
                        goto free_all_kdata;
-               p->jobs[i]->enforce_isolation = 
p->adev->enforce_isolation[fpriv->xcp_id];
+               if (p->adev->enforce_isolation[fpriv->xcp_id])
+                       p->jobs[i]->flags |= AMDGPU_ENFORCE_ISOLATION;
        }
        p->gang_leader = p->jobs[p->gang_leader_idx];
 
@@ -367,7 +370,7 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
        }
 
        if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
-               job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
+               job->flags |= AMDGPU_PREAMBLE_IB_PRESENT;
 
        r =  amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
                           chunk_ib->ib_bytes : 0,
@@ -583,8 +586,8 @@ static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
                p->jobs[i]->shadow_va = shadow->shadow_va;
                p->jobs[i]->csa_va = shadow->csa_va;
                p->jobs[i]->gds_va = shadow->gds_va;
-               p->jobs[i]->init_shadow =
-                       shadow->flags & 
AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+               if (shadow->flags & 
AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
+                       p->jobs[i]->flags |= AMDGPU_INIT_SHADOW;
        }
 
        return 0;
@@ -1004,7 +1007,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
*p,
 
 static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
 {
-       int i, j;
+       unsigned int i, j;
 
        if (!trace_amdgpu_cs_enabled())
                return;
@@ -1352,9 +1355,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
                                   p->fence);
        amdgpu_cs_post_dependencies(p);
 
-       if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+       if ((leader->flags & AMDGPU_PREAMBLE_IB_PRESENT) &&
            !p->ctx->preamble_presented) {
-               leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+               leader->flags |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
                p->ctx->preamble_presented = true;
        }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 49ca8c814455..447345cb94db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1904,7 +1904,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
amdgpu_ring *ring)
                job = to_amdgpu_job(s_job);
                if (preempted && (&job->hw_fence) == fence)
                        /* mark the job as preempted */
-                       job->preemption_status |= AMDGPU_IB_PREEMPTED;
+                       job->flags |= AMDGPU_IB_PREEMPTED;
        }
        spin_unlock(&sched->job_list_lock);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 784b03abb3a4..f224547b6863 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1430,7 +1430,7 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct 
amdgpu_ring *ring)
        if (r)
                goto err;
 
-       job->enforce_isolation = true;
+       job->flags |= AMDGPU_ENFORCE_ISOLATION;
 
        ib = &job->ibs[0];
        for (i = 0; i <= ring->funcs->align_mask; ++i)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 1c19a65e6553..56058f18e0ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -658,7 +658,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, 
uint32_t vmid,
                goto error_alloc;
 
        job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
-       job->vm_needs_flush = true;
+       job->flags |= AMDGPU_VM_NEEDS_FLUSH;
        job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
        fence = amdgpu_job_submit(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 2ea98ec60220..c6d11311dcd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -153,7 +153,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
                shadow_va = job->shadow_va;
                csa_va = job->csa_va;
                gds_va = job->gds_va;
-               init_shadow = job->init_shadow;
+               init_shadow = job->flags & AMDGPU_INIT_SHADOW;
        } else {
                vm = NULL;
                fence_ctx = 0;
@@ -235,8 +235,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
                status |= AMDGPU_HAVE_CTX_SWITCH;
 
        if (job && ring->funcs->emit_cntxcntl) {
-               status |= job->preamble_status;
-               status |= job->preemption_status;
+               status |= job->flags & (AMDGPU_PREAMBLE_IB_PRESENT |
+                                       AMDGPU_PREAMBLE_IB_PRESENT_FIRST |
+                                       AMDGPU_IB_PREEMPTED);
                amdgpu_ring_emit_cntxcntl(ring, status);
        }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 8e712a11aba5..b9e18a806b5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -276,8 +276,8 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
        struct amdgpu_device *adev = ring->adev;
        unsigned vmhub = ring->vm_hub;
        struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+       uint32_t flags = vm->use_cpu_for_update ? AMDGPU_VM_NEEDS_FLUSH : 0;
        uint64_t fence_context = adev->fence_context + ring->idx;
-       bool needs_flush = vm->use_cpu_for_update;
        uint64_t updates = amdgpu_vm_tlb_seq(vm);
        int r;
 
@@ -320,7 +320,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
                                return 0;
                        }
                }
-               needs_flush = true;
+               flags = AMDGPU_VM_NEEDS_FLUSH;
        }
 
        /* Good we can use this VMID. Remember this submission as
@@ -330,8 +330,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
        if (r)
                return r;
 
-       job->vm_needs_flush = needs_flush;
-       job->spm_update_needed = true;
+       job->flags = AMDGPU_SPM_UPDATE_NEEDED | flags;
        return 0;
 }
 
@@ -357,7 +356,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
        uint64_t updates = amdgpu_vm_tlb_seq(vm);
        int r;
 
-       job->vm_needs_flush = vm->use_cpu_for_update;
+       if (vm->use_cpu_for_update)
+               job->flags |= AMDGPU_VM_NEEDS_FLUSH;
 
        /* Check if we can use a VMID already assigned to this VM */
        list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
@@ -389,7 +389,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
                if (r)
                        return r;
 
-               job->vm_needs_flush |= needs_flush;
+               if (needs_flush)
+                       job->flags |= AMDGPU_VM_NEEDS_FLUSH;
                return 0;
        }
 
@@ -415,6 +416,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
        struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
        struct amdgpu_vmid *idle = NULL;
        struct amdgpu_vmid *id = NULL;
+       unsigned int vmid;
        int r = 0;
 
        mutex_lock(&id_mgr->lock);
@@ -441,19 +443,26 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
                        if (r)
                                goto error;
 
-                       job->vm_needs_flush = true;
+                       job->flags |= AMDGPU_VM_NEEDS_FLUSH;
                }
 
                list_move_tail(&id->list, &id_mgr->ids_lru);
        }
 
-       job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
-       if (job->vm_needs_flush) {
+       if (amdgpu_vmid_gds_switch_needed(id, job))
+               job->flags |= AMDGPU_GDS_SWITCH_NEEDED;
+       if (job->flags & AMDGPU_VM_NEEDS_FLUSH) {
                id->flushed_updates = amdgpu_vm_tlb_seq(vm);
                dma_fence_put(id->last_flush);
                id->last_flush = NULL;
        }
-       job->vmid = id - id_mgr->ids;
+
+       vmid = id - id_mgr->ids;
+       if (WARN_ON_ONCE(overflows_type(vmid, typeof(job->vmid)))) {
+               r = -ERANGE;
+               goto error;
+       }
+       job->vmid = vmid;
        job->pasid = vm->pasid;
 
        id->gds_base = job->gds_base;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1899c601c95c..dca54a47dfff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -231,13 +231,17 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
 void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
                              struct amdgpu_bo *gws, struct amdgpu_bo *oa)
 {
+       uint32_t size;
+
        if (gds) {
                job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
                job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
        }
        if (gws) {
                job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
-               job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
+               size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
+               WARN_ON_ONCE(overflows_type(size, job->gws_size));
+               job->gws_size = size;
        }
        if (oa) {
                job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 7e5fe3d73a06..edb18df42816 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -28,13 +28,18 @@
 #include "amdgpu_ring.h"
 
 /* bit set means command submit involves a preamble IB */
-#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0)
+#define AMDGPU_PREAMBLE_IB_PRESENT             (1 << 0)
 /* bit set means preamble IB is first presented in belonging context */
-#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1)
+#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST       (1 << 1)
 /* bit set means context switch occured */
-#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2)
+#define AMDGPU_HAVE_CTX_SWITCH                 (1 << 2)
 /* bit set means IB is preempted */
-#define AMDGPU_IB_PREEMPTED                 (1 << 3)
+#define AMDGPU_IB_PREEMPTED                    (1 << 3)
+#define AMDGPU_VM_NEEDS_FLUSH                  (1 << 4)
+#define AMDGPU_GDS_SWITCH_NEEDED               (1 << 5)
+#define AMDGPU_SPM_UPDATE_NEEDED               (1 << 6)
+#define AMDGPU_ENFORCE_ISOLATION               (1 << 7)
+#define AMDGPU_INIT_SHADOW                     (1 << 8)
 
 #define to_amdgpu_job(sched_job)               \
                container_of((sched_job), struct amdgpu_job, base)
@@ -50,21 +55,18 @@ struct amdgpu_job {
        struct amdgpu_sync      explicit_sync;
        struct dma_fence        hw_fence;
        struct dma_fence        *gang_submit;
-       bool                    vm_needs_flush;
-       bool                    gds_switch_needed;
-       bool                    spm_update_needed;
-       bool                    enforce_isolation;
-       bool                    init_shadow;
-       unsigned                vmid;
-       unsigned                pasid;
-       uint32_t                num_ibs;
-       uint32_t                preamble_status;
-       uint32_t                preemption_status;
+
+       uint32_t                pasid;
+       uint16_t                flags;
+       uint16_t                job_run_counter; /* >= 1 means a resubmit job */
+       uint8_t                 vmid;
+       uint8_t                 num_ibs;
+
+       uint16_t                gws_size; /* in pages so enough and prevents 
holes */
+       uint32_t                gws_base;
+
        uint32_t                gds_base, gds_size;
-       uint32_t                gws_base, gws_size;
        uint32_t                oa_base, oa_size;
-       /* job_run_counter >= 1 means a resubmit job */
-       uint32_t                job_run_counter;
        uint64_t                vm_pd_addr;
        uint64_t                generation;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 383fce40d4dd..913e142564bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -235,7 +235,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
                           __entry->vmid = job->vmid;
                           __entry->vm_hub = ring->vm_hub,
                           __entry->pd_addr = job->vm_pd_addr;
-                          __entry->needs_flush = job->vm_needs_flush;
+                          __entry->needs_flush = job->flags & 
AMDGPU_VM_NEEDS_FLUSH;
                           ),
            TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx 
needs_flush=%u",
                      __entry->pasid, __get_str(ring), __entry->vmid,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 01ae2f88dec8..1f2d252802a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2163,7 +2163,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device 
*adev,
                (*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
                                                        adev->gmc.pdb0_bo :
                                                        adev->gart.bo);
-               (*job)->vm_needs_flush = true;
+               (*job)->flags |= AMDGPU_VM_NEEDS_FLUSH;
        }
        if (!resv)
                return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5c07777d3239..04aa1c75cbc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -726,10 +726,11 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring 
*ring,
        if (job->vmid == 0)
                return false;
 
-       if (job->vm_needs_flush || ring->has_compute_vm_bug)
+       if ((job->flags & AMDGPU_VM_NEEDS_FLUSH) || ring->has_compute_vm_bug)
                return true;
 
-       if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
+       if (ring->funcs->emit_gds_switch &&
+           (job->flags & AMDGPU_GDS_SWITCH_NEEDED))
                return true;
 
        if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
@@ -756,11 +757,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
        struct amdgpu_device *adev = ring->adev;
        unsigned vmhub = ring->vm_hub;
        struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-       struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
-       bool spm_update_needed = job->spm_update_needed;
        bool gds_switch_needed = ring->funcs->emit_gds_switch &&
-               job->gds_switch_needed;
-       bool vm_flush_needed = job->vm_needs_flush;
+                                (job->flags & AMDGPU_GDS_SWITCH_NEEDED);
+       bool spm_update_needed = job->flags & AMDGPU_SPM_UPDATE_NEEDED;
+       bool vm_flush_needed = job->flags & AMDGPU_VM_NEEDS_FLUSH;
+       struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
        struct dma_fence *fence = NULL;
        bool pasid_mapping_needed = false;
        unsigned int patch;
@@ -786,7 +787,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
                ring->funcs->emit_wreg;
 
        if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
-           !(job->enforce_isolation && !job->vmid))
+           !((job->flags & AMDGPU_ENFORCE_ISOLATION) && !job->vmid))
                return 0;
 
        amdgpu_ring_ib_begin(ring);
@@ -799,7 +800,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
 
        if (adev->gfx.enable_cleaner_shader &&
            ring->funcs->emit_cleaner_shader &&
-           job->enforce_isolation)
+           (job->flags & AMDGPU_ENFORCE_ISOLATION))
                ring->funcs->emit_cleaner_shader(ring);
 
        if (vm_flush_needed) {
-- 
2.48.0

Reply via email to