On Sun, May 20, 2018 at 3:27 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > > On 05/19/2018 06:22 PM, Bas Nieuwenhuizen wrote: >> >> On Fri, May 18, 2018 at 6:00 PM, Samuel Pitoiset >> <samuel.pitoi...@gmail.com> wrote: >>> >>> We still use 64-bit GPU pointers for all ring buffers because >>> llvm.amdgcn.implicit.buffer.ptr doesn't seem to support 32-bit >>> GPU pointers for now. This can be improved later anyways. >>> >>> Vega10: >>> Totals from affected shaders: >>> SGPRS: 1008722 -> 1026710 (1.78 %) >>> VGPRS: 706580 -> 707136 (0.08 %) >>> Spilled SGPRs: 22555 -> 22209 (-1.53 %) >>> Spilled VGPRs: 75 -> 75 (0.00 %) >>> Code Size: 34819208 -> 35202140 (1.10 %) bytes >>> Max Waves: 175423 -> 175086 (-0.19 %) >>> >>> Polaris10: >>> Totals from affected shaders: >>> SGPRS: 1029849 -> 1036517 (0.65 %) >>> VGPRS: 709984 -> 708872 (-0.16 %) >>> Spilled SGPRs: 22672 -> 22309 (-1.60 %) >>> Spilled VGPRs: 82 -> 66 (-19.51 %) >>> Scratch size: 76 -> 60 (-21.05 %) dwords per thread >>> Code Size: 34915336 -> 35309752 (1.13 %) bytes >>> Max Waves: 151221 -> 151677 (0.30 %) >>> >>> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> >>> --- >>> src/amd/vulkan/radv_cmd_buffer.c | 13 +++++++------ >>> src/amd/vulkan/radv_device.c | 6 ++++-- >>> src/amd/vulkan/radv_nir_to_llvm.c | 24 +++++++++++++++--------- >>> src/amd/vulkan/radv_private.h | 18 ++++++++++++++---- >>> 4 files changed, 40 insertions(+), 21 deletions(-) >>> >>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c >>> b/src/amd/vulkan/radv_cmd_buffer.c >>> index 3636b2c8d9..5ab577b4c5 100644 >>> --- a/src/amd/vulkan/radv_cmd_buffer.c >>> +++ b/src/amd/vulkan/radv_cmd_buffer.c >>> @@ -586,11 +586,12 @@ radv_emit_userdata_address(struct radv_cmd_buffer >>> *cmd_buffer, >>> uint32_t base_reg = pipeline->user_data_0[stage]; >>> if (loc->sgpr_idx == -1) >>> return; >>> - assert(loc->num_sgprs == 2); >>> + >>> + assert(loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2)); >>> assert(!loc->indirect); >>> >>> - radv_emit_shader_pointer(cmd_buffer->cs, >>> - base_reg + loc->sgpr_idx * 4, va); >>> + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, >>> + base_reg + loc->sgpr_idx * 4, va, >>> false); >>> } >>> >>> static void >>> @@ -1442,10 +1443,10 @@ emit_stage_descriptor_set_userdata(struct >>> radv_cmd_buffer *cmd_buffer, >>> return; >>> >>> assert(!desc_set_loc->indirect); >>> - assert(desc_set_loc->num_sgprs == 2); >>> + assert(desc_set_loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2)); >>> >>> - radv_emit_shader_pointer(cmd_buffer->cs, >>> - base_reg + desc_set_loc->sgpr_idx * 4, >>> va); >>> + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, >>> + base_reg + desc_set_loc->sgpr_idx * 4, >>> va, false); >>> } >>> >>> static void >>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c >>> index c52b3a591f..ee38077235 100644 >>> --- a/src/amd/vulkan/radv_device.c >>> +++ b/src/amd/vulkan/radv_device.c >>> @@ -1963,7 +1963,8 @@ radv_emit_global_shader_pointers(struct radv_queue >>> *queue, >>> >>> R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; >>> >>> for (int i = 0; i < ARRAY_SIZE(regs); ++i) { >>> - radv_emit_shader_pointer(cs, regs[i], va); >>> + radv_emit_shader_pointer(queue->device, cs, >>> regs[i], >>> + va, true); >>> } >>> } else { >>> uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, >>> @@ -1974,7 +1975,8 @@ radv_emit_global_shader_pointers(struct radv_queue >>> *queue, >>> R_00B530_SPI_SHADER_USER_DATA_LS_0}; >>> >>> for (int i = 0; i < ARRAY_SIZE(regs); ++i) { >>> - radv_emit_shader_pointer(cs, regs[i], va); >>> + radv_emit_shader_pointer(queue->device, cs, >>> regs[i], >>> + va, true); >> >> >> Any reason we don't allocate the global descriptor bo in 32-bit space? > > > Because it looks like that amdgcn.implicit.buffer.ptr doesn't seem to work > with 32-bit addr space. Or maybe I did something wrong when I tried this.
oh right. The series is Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> > > >> >>> } >>> } >>> } >>> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c >>> b/src/amd/vulkan/radv_nir_to_llvm.c >>> index d9d89ac56d..10114cbe18 100644 >>> --- a/src/amd/vulkan/radv_nir_to_llvm.c >>> +++ b/src/amd/vulkan/radv_nir_to_llvm.c >>> @@ -569,7 +569,10 @@ set_loc_shader(struct radv_shader_context *ctx, int >>> idx, uint8_t *sgpr_idx, >>> static void >>> set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t >>> *sgpr_idx) >>> { >>> - set_loc_shader(ctx, idx, sgpr_idx, 2); >>> + bool use_32bit_pointers = HAVE_32BIT_POINTERS && >>> + idx != AC_UD_SCRATCH_RING_OFFSETS; >>> + >>> + set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); >>> } >>> >>> static void >>> @@ -580,7 +583,7 @@ set_loc_desc(struct radv_shader_context *ctx, int >>> idx, uint8_t *sgpr_idx, >>> &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx]; >>> assert(ud_info); >>> >>> - set_loc(ud_info, sgpr_idx, 2, indirect_offset); >>> + set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, >>> indirect_offset); >>> } >>> >>> struct user_sgpr_info { >>> @@ -618,7 +621,8 @@ count_vs_user_sgprs(struct radv_shader_context *ctx) >>> { >>> uint8_t count = 0; >>> >>> - count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0; >>> + if (ctx->shader_info->info.vs.has_vertex_buffers) >>> + count += HAVE_32BIT_POINTERS ? 1 : 2; >>> count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2; >>> >>> return count; >>> @@ -685,13 +689,13 @@ static void allocate_user_sgprs(struct >>> radv_shader_context *ctx, >>> user_sgpr_info->sgpr_count++; >>> >>> if (ctx->shader_info->info.loads_push_constants) >>> - user_sgpr_info->sgpr_count += 2; >>> + user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : >>> 2; >>> >>> uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 >>> : 16; >>> uint32_t remaining_sgprs = available_sgprs - >>> user_sgpr_info->sgpr_count; >>> >>> if (remaining_sgprs / 2 < >>> util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { >>> - user_sgpr_info->sgpr_count += 2; >>> + user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : >>> 2; >>> user_sgpr_info->indirect_all_descriptor_sets = true; >>> } else { >>> user_sgpr_info->sgpr_count += >>> util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; >>> @@ -707,7 +711,7 @@ declare_global_input_sgprs(struct radv_shader_context >>> *ctx, >>> struct arg_info *args, >>> LLVMValueRef *desc_sets) >>> { >>> - LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8); >>> + LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8); >>> unsigned num_sets = ctx->options->layout ? >>> ctx->options->layout->num_sets : 0; >>> unsigned stage_mask = 1 << stage; >>> @@ -725,7 +729,7 @@ declare_global_input_sgprs(struct radv_shader_context >>> *ctx, >>> } >>> } >>> } else { >>> - add_array_arg(args, ac_array_in_const_addr_space(type), >>> desc_sets); >>> + add_array_arg(args, ac_array_in_const32_addr_space(type), >>> desc_sets); >>> } >>> >>> if (ctx->shader_info->info.loads_push_constants) { >>> @@ -745,7 +749,8 @@ declare_vs_specific_input_sgprs(struct >>> radv_shader_context *ctx, >>> (stage == MESA_SHADER_VERTEX || >>> (has_previous_stage && previous_stage == >>> MESA_SHADER_VERTEX))) { >>> if (ctx->shader_info->info.vs.has_vertex_buffers) { >>> - add_arg(args, ARG_SGPR, >>> ac_array_in_const_addr_space(ctx->ac.v4i32), >>> + add_arg(args, ARG_SGPR, >>> + >>> ac_array_in_const32_addr_space(ctx->ac.v4i32), >>> &ctx->vertex_buffers); >>> } >>> add_arg(args, ARG_SGPR, ctx->ac.i32, >>> &ctx->abi.base_vertex); >>> @@ -1878,7 +1883,8 @@ static LLVMValueRef radv_get_sampler_desc(struct >>> ac_shader_abi *abi, >>> index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, >>> stride / type_size, 0), ""); >>> >>> list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, >>> offset, 0)); >>> - list = LLVMBuildPointerCast(builder, list, >>> ac_array_in_const_addr_space(type), ""); >>> + list = LLVMBuildPointerCast(builder, list, >>> + ac_array_in_const32_addr_space(type), >>> ""); >>> >>> return ac_build_load_to_sgpr(&ctx->ac, list, index); >>> } >>> diff --git a/src/amd/vulkan/radv_private.h >>> b/src/amd/vulkan/radv_private.h >>> index adfd75c2a8..e2fa58d8d1 100644 >>> --- a/src/amd/vulkan/radv_private.h >>> +++ b/src/amd/vulkan/radv_private.h >>> @@ -57,6 +57,7 @@ >>> #include "ac_nir_to_llvm.h" >>> #include "ac_gpu_info.h" >>> #include "ac_surface.h" >>> +#include "ac_llvm_build.h" >>> #include "radv_descriptor_set.h" >>> #include "radv_extensions.h" >>> #include "radv_cs.h" >>> @@ -1130,12 +1131,21 @@ bool radv_get_memory_fd(struct radv_device >>> *device, >>> int *pFD); >>> >>> static inline void >>> -radv_emit_shader_pointer(struct radeon_winsys_cs *cs, >>> - uint32_t sh_offset, uint64_t va) >>> +radv_emit_shader_pointer(struct radv_device *device, >>> + struct radeon_winsys_cs *cs, >>> + uint32_t sh_offset, uint64_t va, bool global) >>> { >>> - radeon_set_sh_reg_seq(cs, sh_offset, 2); >>> + bool use_32bit_pointers = HAVE_32BIT_POINTERS && !global; >>> + >>> + radeon_set_sh_reg_seq(cs, sh_offset, use_32bit_pointers ? 1 : 2); >>> radeon_emit(cs, va); >>> - radeon_emit(cs, va >> 32); >>> + >>> + if (use_32bit_pointers) { >>> + assert(va == 0 || >>> + (va >> 32) == >>> device->physical_device->rad_info.address32_hi); >>> + } else { >>> + radeon_emit(cs, va >> 32); >>> + } >>> } >>> >>> static inline struct radv_descriptor_state * >>> -- >>> 2.17.0 >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev