On Sun, May 20, 2018 at 3:27 PM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
>
>
> On 05/19/2018 06:22 PM, Bas Nieuwenhuizen wrote:
>>
>> On Fri, May 18, 2018 at 6:00 PM, Samuel Pitoiset
>> <samuel.pitoi...@gmail.com> wrote:
>>>
>>> We still use 64-bit GPU pointers for all ring buffers because
>>> llvm.amdgcn.implicit.buffer.ptr doesn't seem to support 32-bit
>>> GPU pointers for now. This can be improved later anyways.
>>>
>>> Vega10:
>>> Totals from affected shaders:
>>> SGPRS: 1008722 -> 1026710 (1.78 %)
>>> VGPRS: 706580 -> 707136 (0.08 %)
>>> Spilled SGPRs: 22555 -> 22209 (-1.53 %)
>>> Spilled VGPRs: 75 -> 75 (0.00 %)
>>> Code Size: 34819208 -> 35202140 (1.10 %) bytes
>>> Max Waves: 175423 -> 175086 (-0.19 %)
>>>
>>> Polaris10:
>>> Totals from affected shaders:
>>> SGPRS: 1029849 -> 1036517 (0.65 %)
>>> VGPRS: 709984 -> 708872 (-0.16 %)
>>> Spilled SGPRs: 22672 -> 22309 (-1.60 %)
>>> Spilled VGPRs: 82 -> 66 (-19.51 %)
>>> Scratch size: 76 -> 60 (-21.05 %) dwords per thread
>>> Code Size: 34915336 -> 35309752 (1.13 %) bytes
>>> Max Waves: 151221 -> 151677 (0.30 %)
>>>
>>> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
>>> ---
>>>   src/amd/vulkan/radv_cmd_buffer.c  | 13 +++++++------
>>>   src/amd/vulkan/radv_device.c      |  6 ++++--
>>>   src/amd/vulkan/radv_nir_to_llvm.c | 24 +++++++++++++++---------
>>>   src/amd/vulkan/radv_private.h     | 18 ++++++++++++++----
>>>   4 files changed, 40 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
>>> b/src/amd/vulkan/radv_cmd_buffer.c
>>> index 3636b2c8d9..5ab577b4c5 100644
>>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>>> @@ -586,11 +586,12 @@ radv_emit_userdata_address(struct radv_cmd_buffer
>>> *cmd_buffer,
>>>          uint32_t base_reg = pipeline->user_data_0[stage];
>>>          if (loc->sgpr_idx == -1)
>>>                  return;
>>> -       assert(loc->num_sgprs == 2);
>>> +
>>> +       assert(loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2));
>>>          assert(!loc->indirect);
>>>
>>> -       radv_emit_shader_pointer(cmd_buffer->cs,
>>> -                                base_reg + loc->sgpr_idx * 4, va);
>>> +       radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
>>> +                                base_reg + loc->sgpr_idx * 4, va,
>>> false);
>>>   }
>>>
>>>   static void
>>> @@ -1442,10 +1443,10 @@ emit_stage_descriptor_set_userdata(struct
>>> radv_cmd_buffer *cmd_buffer,
>>>                  return;
>>>
>>>          assert(!desc_set_loc->indirect);
>>> -       assert(desc_set_loc->num_sgprs == 2);
>>> +       assert(desc_set_loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2));
>>>
>>> -       radv_emit_shader_pointer(cmd_buffer->cs,
>>> -                                base_reg + desc_set_loc->sgpr_idx * 4,
>>> va);
>>> +       radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
>>> +                                base_reg + desc_set_loc->sgpr_idx * 4,
>>> va, false);
>>>   }
>>>
>>>   static void
>>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>>> index c52b3a591f..ee38077235 100644
>>> --- a/src/amd/vulkan/radv_device.c
>>> +++ b/src/amd/vulkan/radv_device.c
>>> @@ -1963,7 +1963,8 @@ radv_emit_global_shader_pointers(struct radv_queue
>>> *queue,
>>>
>>> R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
>>>
>>>                  for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
>>> -                       radv_emit_shader_pointer(cs, regs[i], va);
>>> +                       radv_emit_shader_pointer(queue->device, cs,
>>> regs[i],
>>> +                                                va, true);
>>>                  }
>>>          } else {
>>>                  uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
>>> @@ -1974,7 +1975,8 @@ radv_emit_global_shader_pointers(struct radv_queue
>>> *queue,
>>>                                     R_00B530_SPI_SHADER_USER_DATA_LS_0};
>>>
>>>                  for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
>>> -                       radv_emit_shader_pointer(cs, regs[i], va);
>>> +                       radv_emit_shader_pointer(queue->device, cs,
>>> regs[i],
>>> +                                                va, true);
>>
>>
>> Any reason we don't allocate the global descriptor bo  in 32-bit space?
>
>
> Because it looks like that amdgcn.implicit.buffer.ptr doesn't seem to work
> with 32-bit addr space. Or maybe I did something wrong when I tried this.

oh right.  The series is

Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>

>
>
>>
>>>                  }
>>>          }
>>>   }
>>> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c
>>> b/src/amd/vulkan/radv_nir_to_llvm.c
>>> index d9d89ac56d..10114cbe18 100644
>>> --- a/src/amd/vulkan/radv_nir_to_llvm.c
>>> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
>>> @@ -569,7 +569,10 @@ set_loc_shader(struct radv_shader_context *ctx, int
>>> idx, uint8_t *sgpr_idx,
>>>   static void
>>>   set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t
>>> *sgpr_idx)
>>>   {
>>> -       set_loc_shader(ctx, idx, sgpr_idx, 2);
>>> +       bool use_32bit_pointers = HAVE_32BIT_POINTERS &&
>>> +                                 idx != AC_UD_SCRATCH_RING_OFFSETS;
>>> +
>>> +       set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
>>>   }
>>>
>>>   static void
>>> @@ -580,7 +583,7 @@ set_loc_desc(struct radv_shader_context *ctx, int
>>> idx,  uint8_t *sgpr_idx,
>>>                  &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx];
>>>          assert(ud_info);
>>>
>>> -       set_loc(ud_info, sgpr_idx, 2, indirect_offset);
>>> +       set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2,
>>> indirect_offset);
>>>   }
>>>
>>>   struct user_sgpr_info {
>>> @@ -618,7 +621,8 @@ count_vs_user_sgprs(struct radv_shader_context *ctx)
>>>   {
>>>          uint8_t count = 0;
>>>
>>> -       count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
>>> +       if (ctx->shader_info->info.vs.has_vertex_buffers)
>>> +               count += HAVE_32BIT_POINTERS ? 1 : 2;
>>>          count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2;
>>>
>>>          return count;
>>> @@ -685,13 +689,13 @@ static void allocate_user_sgprs(struct
>>> radv_shader_context *ctx,
>>>                  user_sgpr_info->sgpr_count++;
>>>
>>>          if (ctx->shader_info->info.loads_push_constants)
>>> -               user_sgpr_info->sgpr_count += 2;
>>> +               user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 :
>>> 2;
>>>
>>>          uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32
>>> : 16;
>>>          uint32_t remaining_sgprs = available_sgprs -
>>> user_sgpr_info->sgpr_count;
>>>
>>>          if (remaining_sgprs / 2 <
>>> util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
>>> -               user_sgpr_info->sgpr_count += 2;
>>> +               user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 :
>>> 2;
>>>                  user_sgpr_info->indirect_all_descriptor_sets = true;
>>>          } else {
>>>                  user_sgpr_info->sgpr_count +=
>>> util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
>>> @@ -707,7 +711,7 @@ declare_global_input_sgprs(struct radv_shader_context
>>> *ctx,
>>>                             struct arg_info *args,
>>>                             LLVMValueRef *desc_sets)
>>>   {
>>> -       LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8);
>>> +       LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8);
>>>          unsigned num_sets = ctx->options->layout ?
>>>                              ctx->options->layout->num_sets : 0;
>>>          unsigned stage_mask = 1 << stage;
>>> @@ -725,7 +729,7 @@ declare_global_input_sgprs(struct radv_shader_context
>>> *ctx,
>>>                          }
>>>                  }
>>>          } else {
>>> -               add_array_arg(args, ac_array_in_const_addr_space(type),
>>> desc_sets);
>>> +               add_array_arg(args, ac_array_in_const32_addr_space(type),
>>> desc_sets);
>>>          }
>>>
>>>          if (ctx->shader_info->info.loads_push_constants) {
>>> @@ -745,7 +749,8 @@ declare_vs_specific_input_sgprs(struct
>>> radv_shader_context *ctx,
>>>              (stage == MESA_SHADER_VERTEX ||
>>>               (has_previous_stage && previous_stage ==
>>> MESA_SHADER_VERTEX))) {
>>>                  if (ctx->shader_info->info.vs.has_vertex_buffers) {
>>> -                       add_arg(args, ARG_SGPR,
>>> ac_array_in_const_addr_space(ctx->ac.v4i32),
>>> +                       add_arg(args, ARG_SGPR,
>>> +
>>> ac_array_in_const32_addr_space(ctx->ac.v4i32),
>>>                                  &ctx->vertex_buffers);
>>>                  }
>>>                  add_arg(args, ARG_SGPR, ctx->ac.i32,
>>> &ctx->abi.base_vertex);
>>> @@ -1878,7 +1883,8 @@ static LLVMValueRef radv_get_sampler_desc(struct
>>> ac_shader_abi *abi,
>>>          index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32,
>>> stride / type_size, 0), "");
>>>
>>>          list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32,
>>> offset, 0));
>>> -       list = LLVMBuildPointerCast(builder, list,
>>> ac_array_in_const_addr_space(type), "");
>>> +       list = LLVMBuildPointerCast(builder, list,
>>> +                                   ac_array_in_const32_addr_space(type),
>>> "");
>>>
>>>          return ac_build_load_to_sgpr(&ctx->ac, list, index);
>>>   }
>>> diff --git a/src/amd/vulkan/radv_private.h
>>> b/src/amd/vulkan/radv_private.h
>>> index adfd75c2a8..e2fa58d8d1 100644
>>> --- a/src/amd/vulkan/radv_private.h
>>> +++ b/src/amd/vulkan/radv_private.h
>>> @@ -57,6 +57,7 @@
>>>   #include "ac_nir_to_llvm.h"
>>>   #include "ac_gpu_info.h"
>>>   #include "ac_surface.h"
>>> +#include "ac_llvm_build.h"
>>>   #include "radv_descriptor_set.h"
>>>   #include "radv_extensions.h"
>>>   #include "radv_cs.h"
>>> @@ -1130,12 +1131,21 @@ bool radv_get_memory_fd(struct radv_device
>>> *device,
>>>                          int *pFD);
>>>
>>>   static inline void
>>> -radv_emit_shader_pointer(struct radeon_winsys_cs *cs,
>>> -                        uint32_t sh_offset, uint64_t va)
>>> +radv_emit_shader_pointer(struct radv_device *device,
>>> +                        struct radeon_winsys_cs *cs,
>>> +                        uint32_t sh_offset, uint64_t va, bool global)
>>>   {
>>> -       radeon_set_sh_reg_seq(cs, sh_offset, 2);
>>> +       bool use_32bit_pointers = HAVE_32BIT_POINTERS && !global;
>>> +
>>> +       radeon_set_sh_reg_seq(cs, sh_offset, use_32bit_pointers ? 1 : 2);
>>>          radeon_emit(cs, va);
>>> -       radeon_emit(cs, va >> 32);
>>> +
>>> +       if (use_32bit_pointers) {
>>> +               assert(va == 0 ||
>>> +                      (va >> 32) ==
>>> device->physical_device->rad_info.address32_hi);
>>> +       } else {
>>> +               radeon_emit(cs, va >> 32);
>>> +       }
>>>   }
>>>
>>>   static inline struct radv_descriptor_state *
>>> --
>>> 2.17.0
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to