From: Marek Olšák <marek.ol...@amd.com> This decreases sizeof(struct amdgpu_cs_buffer) from 24 to 16 bytes. --- src/gallium/drivers/radeon/radeon_winsys.h | 39 ++++++++++--------- src/gallium/drivers/radeonsi/si_debug.c | 2 +- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 +-- src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 4 +- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 2 +- 6 files changed, 28 insertions(+), 27 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index bcd6831ed35..10c63ae4d82 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -108,63 +108,64 @@ enum radeon_value_id { RADEON_VRAM_USAGE, RADEON_VRAM_VIS_USAGE, RADEON_GTT_USAGE, RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */ RADEON_CURRENT_SCLK, RADEON_CURRENT_MCLK, RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */ RADEON_CS_THREAD_TIME, }; -/* Each group of four has the same priority. */ enum radeon_bo_priority { + /* Each group of two has the same priority. */ RADEON_PRIO_FENCE = 0, RADEON_PRIO_TRACE, - RADEON_PRIO_SO_FILLED_SIZE, + + RADEON_PRIO_SO_FILLED_SIZE = 2, RADEON_PRIO_QUERY, RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */ RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */ - RADEON_PRIO_DRAW_INDIRECT, + + RADEON_PRIO_DRAW_INDIRECT = 6, RADEON_PRIO_INDEX_BUFFER, - RADEON_PRIO_CP_DMA = 12, + RADEON_PRIO_CP_DMA = 8, + RADEON_PRIO_BORDER_COLORS, - RADEON_PRIO_CONST_BUFFER = 16, + RADEON_PRIO_CONST_BUFFER = 10, RADEON_PRIO_DESCRIPTORS, - RADEON_PRIO_BORDER_COLORS, - RADEON_PRIO_SAMPLER_BUFFER = 20, + RADEON_PRIO_SAMPLER_BUFFER = 12, RADEON_PRIO_VERTEX_BUFFER, - RADEON_PRIO_SHADER_RW_BUFFER = 24, + RADEON_PRIO_SHADER_RW_BUFFER = 14, RADEON_PRIO_COMPUTE_GLOBAL, - RADEON_PRIO_SAMPLER_TEXTURE = 28, + RADEON_PRIO_SAMPLER_TEXTURE = 16, RADEON_PRIO_SHADER_RW_IMAGE, - RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 32, - - RADEON_PRIO_COLOR_BUFFER = 36, + RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18, + RADEON_PRIO_COLOR_BUFFER, - RADEON_PRIO_DEPTH_BUFFER = 40, + RADEON_PRIO_DEPTH_BUFFER = 20, - RADEON_PRIO_COLOR_BUFFER_MSAA = 44, + RADEON_PRIO_COLOR_BUFFER_MSAA = 22, - RADEON_PRIO_DEPTH_BUFFER_MSAA = 48, + RADEON_PRIO_DEPTH_BUFFER_MSAA = 24, - RADEON_PRIO_SEPARATE_META = 52, + RADEON_PRIO_SEPARATE_META = 26, RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */ - RADEON_PRIO_SHADER_RINGS = 56, + RADEON_PRIO_SHADER_RINGS = 28, - RADEON_PRIO_SCRATCH_BUFFER = 60, + RADEON_PRIO_SCRATCH_BUFFER = 30, /* 63 is the maximum value */ }; struct winsys_handle; struct radeon_winsys_ctx; struct radeon_cmdbuf_chunk { unsigned cdw; /* Number of used dwords. */ unsigned max_dw; /* Maximum number of dwords. */ uint32_t *buf; /* The base pointer of the chunk. */ @@ -216,21 +217,21 @@ struct radeon_bo_metadata { }; enum radeon_feature_id { RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */ RADEON_FID_R300_CMASK_ACCESS, }; struct radeon_bo_list_item { uint64_t bo_size; uint64_t vm_address; - uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */ + uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */ }; struct radeon_winsys { /** * The screen object this winsys was created for */ struct pipe_screen *screen; /** * Decrement the winsys reference count. diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 50375ce7cbe..d6207e68d12 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -562,21 +562,21 @@ static void si_dump_bo_list(struct si_context *sctx, (va - previous_va_end) / page_size); } } /* Print the buffer. */ fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ", size / page_size, va / page_size, (va + size) / page_size); /* Print the usage. */ for (j = 0; j < 64; j++) { - if (!(saved->bo_list[i].priority_usage & (1ull << j))) + if (!(saved->bo_list[i].priority_usage & (1u << j))) continue; fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j)); hit = true; } fprintf(f, "\n"); } fprintf(f, "\nNote: The holes represent memory not used by the IB.\n" " Other buffers can still be allocated there.\n\n"); } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index ec164175dbc..872e67a790a 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -622,21 +622,21 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs, struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; struct amdgpu_cs_buffer *buffer; int index; /* Fast exit for no-op calls. * This is very effective with suballocators and linear uploaders that * are outside of the winsys. */ if (bo == cs->last_added_bo && (usage & cs->last_added_bo_usage) == usage && - (1ull << priority) & cs->last_added_bo_priority_usage) + (1u << priority) & cs->last_added_bo_priority_usage) return cs->last_added_bo_index; if (!bo->sparse) { if (!bo->bo) { index = amdgpu_lookup_or_add_slab_buffer(acs, bo); if (index < 0) return 0; buffer = &cs->slab_buffers[index]; buffer->usage |= usage; @@ -651,21 +651,21 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs, buffer = &cs->real_buffers[index]; } else { index = amdgpu_lookup_or_add_sparse_buffer(acs, bo); if (index < 0) return 0; buffer = &cs->sparse_buffers[index]; } - buffer->u.real.priority_usage |= 1ull << priority; + buffer->u.real.priority_usage |= 1u << priority; buffer->usage |= usage; cs->last_added_bo = bo; cs->last_added_bo_index = index; cs->last_added_bo_usage = buffer->usage; cs->last_added_bo_priority_usage = buffer->u.real.priority_usage; return index; } static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib, @@ -1332,21 +1332,21 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) num_handles = 0; for (i = 0; i < cs->num_real_buffers; ++i) { struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i]; if (buffer->bo->is_local) continue; assert(buffer->u.real.priority_usage != 0); handles[num_handles] = buffer->bo->bo; - flags[num_handles] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4; + flags[num_handles] = (util_last_bit(buffer->u.real.priority_usage) - 1) / 2; ++num_handles; } if (num_handles) { r = amdgpu_bo_list_create(ws->dev, num_handles, handles, flags, &bo_list); if (r) { fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r); amdgpu_fence_signalled(cs->fence); cs->error_code = r; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 3b10cc66c21..9f5a4fd991a 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -39,21 +39,21 @@ struct amdgpu_ctx { uint64_t *user_fence_cpu_address_base; int refcount; unsigned initial_num_total_rejected_cs; unsigned num_rejected_cs; }; struct amdgpu_cs_buffer { struct amdgpu_winsys_bo *bo; union { struct { - uint64_t priority_usage; + uint32_t priority_usage; } real; struct { uint32_t real_idx; /* index of underlying real BO */ } slab; } u; enum radeon_bo_usage usage; }; enum ib_type { IB_MAIN, @@ -87,21 +87,21 @@ struct amdgpu_cs_context { unsigned num_sparse_buffers; unsigned max_sparse_buffers; struct amdgpu_cs_buffer *sparse_buffers; int buffer_indices_hashlist[4096]; struct amdgpu_winsys_bo *last_added_bo; unsigned last_added_bo_index; unsigned last_added_bo_usage; - uint64_t last_added_bo_priority_usage; + uint32_t last_added_bo_priority_usage; struct pipe_fence_handle **fence_dependencies; unsigned num_fence_dependencies; unsigned max_fence_dependencies; struct pipe_fence_handle **syncobj_to_signal; unsigned num_syncobj_to_signal; unsigned max_syncobj_to_signal; struct pipe_fence_handle *fence; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 90386027235..798be78504c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -359,21 +359,21 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs, index = cs->csc->slab_buffers[index].u.slab.real_idx; } else { index = radeon_lookup_or_add_real_buffer(cs, bo); } reloc = &cs->csc->relocs[index]; added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain); reloc->read_domains |= rd; reloc->write_domain |= wd; reloc->flags = MAX2(reloc->flags, priority); - cs->csc->relocs_bo[index].u.real.priority_usage |= 1ull << priority; + cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority; if (added_domains & RADEON_DOMAIN_VRAM) cs->base.used_vram += bo->base.size; else if (added_domains & RADEON_DOMAIN_GTT) cs->base.used_gart += bo->base.size; return index; } static int radeon_drm_cs_lookup_buffer(struct radeon_cmdbuf *rcs, diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 75fb09bd001..f4c6cbe1fa7 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -26,21 +26,21 @@ #ifndef RADEON_DRM_CS_H #define RADEON_DRM_CS_H #include "radeon_drm_bo.h" struct radeon_bo_item { struct radeon_bo *bo; union { struct { - uint64_t priority_usage; + uint32_t priority_usage; } real; struct { unsigned real_idx; } slab; } u; }; struct radeon_cs_context { uint32_t buf[16 * 1024]; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev