Re: [Mesa-dev] [PATCH 4/4] radeonsi: Enable VGPR spilling for all shader types

Tom Stellard Thu, 08 Jan 2015 10:18:09 -0800

On Thu, Jan 08, 2015 at 07:00:11PM +0100, Marek Olšák wrote:
> On Wed, Jan 7, 2015 at 10:03 PM, Tom Stellard <thomas.stell...@amd.com> wrote:
> > ---
> >  src/gallium/drivers/radeonsi/si_compute.c       | 40 ++------------
> >  src/gallium/drivers/radeonsi/si_shader.c        | 69 
> > ++++++++++++++++++++++++-
> >  src/gallium/drivers/radeonsi/si_shader.h        |  7 ++-
> >  src/gallium/drivers/radeonsi/si_state_shaders.c | 36 +++++++++++--
> >  4 files changed, 108 insertions(+), 44 deletions(-)
> >
> > diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> > b/src/gallium/drivers/radeonsi/si_compute.c
> > index 37e5c42..e5f158d 100644
> > --- a/src/gallium/drivers/radeonsi/si_compute.c
> > +++ b/src/gallium/drivers/radeonsi/si_compute.c
> > @@ -42,12 +42,6 @@
> >  #define NUM_USER_SGPRS 4
> >  #endif
> >
> > -static const char *scratch_rsrc_dword0_symbol =
> > -       "SCRATCH_RSRC_DWORD0";
> > -
> > -static const char *scratch_rsrc_dword1_symbol =
> > -       "SCRATCH_RSRC_DWORD1";
> > -
> >  struct si_compute {
> >         struct si_context *ctx;
> >
> > @@ -183,35 +177,6 @@ static unsigned compute_num_waves_for_scratch(
> >         return scratch_waves;
> >  }
> >
> > -static void apply_scratch_relocs(const struct si_screen *sscreen,
> > -                       const struct radeon_shader_binary *binary,
> > -                       struct si_shader *shader, uint64_t scratch_va) {
> > -       unsigned i;
> > -       char *ptr;
> > -       uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
> > -       uint32_t scratch_rsrc_dword1 =
> > -               S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
> > -               |  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
> > -
> > -       if (!binary->reloc_count) {
> > -               return;
> > -       }
> > -
> > -       ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
> > -                                       PIPE_TRANSFER_READ_WRITE);
> > -       for (i = 0 ; i < binary->reloc_count; i++) {
> > -               const struct radeon_shader_reloc *reloc = 
> > &binary->relocs[i];
> > -               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
> > -                       util_memcpy_cpu_to_le32(ptr + reloc->offset,
> > -                               &scratch_rsrc_dword0, 4);
> > -               } else if (!strcmp(scratch_rsrc_dword1_symbol, 
> > reloc->name)) {
> > -                       util_memcpy_cpu_to_le32(ptr + reloc->offset,
> > -                               &scratch_rsrc_dword1, 4);
> > -               }
> > -       }
> > -       sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
> > -}
> > -
> >  static void si_launch_grid(
> >                 struct pipe_context *ctx,
> >                 const uint *block_layout, const uint *grid_layout,
> > @@ -256,7 +221,8 @@ static void si_launch_grid(
> >
> >  #if HAVE_LLVM >= 0x0306
> >         /* Read the config information */
> > -       si_shader_binary_read_config(&program->binary, &program->program, 
> > pc);
> > +       si_shader_binary_read_config(sctx->screen, &program->binary,
> > +                                       &program->program, pc);
> >  #endif
> >
> >         /* Upload the kernel arguments */
> > @@ -295,7 +261,7 @@ static void si_launch_grid(
> >                                 RADEON_PRIO_SHADER_RESOURCE_RW);
> >
> >                 /* Patch the shader with the scratch buffer address. */
> > -               apply_scratch_relocs(sctx->screen,
> > +               si_shader_apply_scratch_relocs(sctx->screen,
> >                         &program->binary, shader, scratch_buffer_va);
> >
> >         }
> > diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> > b/src/gallium/drivers/radeonsi/si_shader.c
> > index cf28860..d59e736 100644
> > --- a/src/gallium/drivers/radeonsi/si_shader.c
> > +++ b/src/gallium/drivers/radeonsi/si_shader.c
> > @@ -46,6 +46,12 @@
> >
> >  #include <errno.h>
> >
> > +static const char *scratch_rsrc_dword0_symbol =
> > +       "SCRATCH_RSRC_DWORD0";
> > +
> > +static const char *scratch_rsrc_dword1_symbol =
> > +       "SCRATCH_RSRC_DWORD1";
> > +
> >  struct si_shader_output_values
> >  {
> >         LLVMValueRef values[4];
> > @@ -2517,7 +2523,8 @@ static void preload_ring_buffers(struct 
> > si_shader_context *si_shader_ctx)
> >         }
> >  }
> >
> > -void si_shader_binary_read_config(const struct radeon_shader_binary 
> > *binary,
> > +void si_shader_binary_read_config(const struct si_screen *sscreen,
> > +                               const struct radeon_shader_binary *binary,
> >                                 struct si_shader *shader,
> >                                 unsigned symbol_offset)
> >  {
> > @@ -2549,6 +2556,14 @@ void si_shader_binary_read_config(const struct 
> > radeon_shader_binary *binary,
> >                 case R_0286CC_SPI_PS_INPUT_ENA:
> >                         shader->spi_ps_input_ena = value;
> >                         break;
> > +               case R_0286E8_SPI_TMPRING_SIZE:
> > +                       /* XXX: This is the maximum value allowed.  I'm not 
> > sure
> > +                        * how compute this for non-cs shaders.
> > +                        */
> > +                       shader->scratch_waves =
> > +                               32 * sscreen->b.info.max_compute_units;
> > +                       /* Fall-through */
> > +
> >                 case R_00B860_COMPUTE_TMPRING_SIZE:
> >                         /* WAVESIZE is in units of 256 dwords. */
> >                         shader->scratch_bytes_per_wave =
> > @@ -2562,6 +2577,36 @@ void si_shader_binary_read_config(const struct 
> > radeon_shader_binary *binary,
> >         }
> >  }
> >
> > +void si_shader_apply_scratch_relocs(const struct si_screen *sscreen,
> > +                       const struct radeon_shader_binary *binary,
> > +                       struct si_shader *shader, uint64_t scratch_va) {
> > +       unsigned i;
> > +       char *ptr;
> > +       uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
> > +       uint32_t scratch_rsrc_dword1 =
> > +               S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
> > +               |  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
> > +
> > +       if (!binary->reloc_count) {
> > +               return;
> > +       }
> > +
> > +       ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
> > +                                       PIPE_TRANSFER_READ_WRITE);
> > +       for (i = 0 ; i < binary->reloc_count; i++) {
> > +               const struct radeon_shader_reloc *reloc = 
> > &binary->relocs[i];
> > +               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
> > +                       util_memcpy_cpu_to_le32(ptr + reloc->offset,
> > +                               &scratch_rsrc_dword0, 4);
> > +               } else if (!strcmp(scratch_rsrc_dword1_symbol, 
> > reloc->name)) {
> > +                       util_memcpy_cpu_to_le32(ptr + reloc->offset,
> > +                               &scratch_rsrc_dword1, 4);
> > +               }
> > +       }
> > +       sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
> > +}
> > +
> > +
> >  int si_shader_binary_read(struct si_screen *sscreen,
> >                         struct si_shader *shader,
> >                         const struct radeon_shader_binary *binary)
> > @@ -2582,7 +2627,7 @@ int si_shader_binary_read(struct si_screen *sscreen,
> >                 }
> >         }
> >
> > -       si_shader_binary_read_config(binary, shader, 0);
> > +       si_shader_binary_read_config(sscreen, binary, shader, 0);
> >
> >         /* copy new shader */
> >         code_size = binary->code_size + binary->rodata_size;
> > @@ -2601,6 +2646,7 @@ int si_shader_binary_read(struct si_screen *sscreen,
> >                 util_memcpy_cpu_to_le32(ptr, binary->rodata, 
> > binary->rodata_size);
> >         }
> >
> > +
> 
> Unintentional new line?
> 
> >         sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
> >
> >         return 0;
> > @@ -2621,6 +2667,25 @@ int si_compile_llvm(struct si_screen *sscreen, 
> > struct si_shader *shader,
> >                 return r;
> >         }
> >         r = si_shader_binary_read(sscreen, shader, &binary);
> > +
> > +       if (shader->scratch_bytes_per_wave > 0) {
> > +               uint64_t scratch_buffer_va;
> > +               unsigned scratch_bytes = shader->scratch_bytes_per_wave *
> > +                                       shader->scratch_waves;
> > +               /* It's possible for different shader variants to have
> > +                * different scratch buffer sizes.  The scratch buffer sizes
> > +                * won't vary by too much, so allocating double the scratch
> > +                * space need for the first variant should be enough for the
> > +                * rest if they need it.
> > +                */
> > +               shader->scratch_bo = (struct r600_resource*)
> > +                               si_resource_create_custom(&sscreen->b.b,
> > +                               PIPE_USAGE_DEFAULT, scratch_bytes * 2);
> > +               scratch_buffer_va = shader->scratch_bo->gpu_address;
> > +               si_shader_apply_scratch_relocs(sscreen, &binary, shader,
> > +                       scratch_buffer_va);
> > +       }
> > +
> >         FREE(binary.code);
> >         FREE(binary.config);
> >         FREE(binary.rodata);
> > diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
> > b/src/gallium/drivers/radeonsi/si_shader.h
> > index 08e344a..c5b274e 100644
> > --- a/src/gallium/drivers/radeonsi/si_shader.h
> > +++ b/src/gallium/drivers/radeonsi/si_shader.h
> > @@ -147,6 +147,7 @@ struct si_shader {
> >         unsigned                        lds_size;
> >         unsigned                        spi_ps_input_ena;
> >         unsigned                        scratch_bytes_per_wave;
> > +       unsigned                        scratch_waves;
> >         unsigned                        spi_shader_col_format;
> >         unsigned                        spi_shader_z_format;
> >         unsigned                        db_shader_control;
> > @@ -185,7 +186,11 @@ void si_shader_destroy(struct pipe_context *ctx, 
> > struct si_shader *shader);
> >  unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned 
> > index);
> >  int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
> > *shader,
> >                 const struct radeon_shader_binary *binary);
> > -void si_shader_binary_read_config(const struct radeon_shader_binary 
> > *binary,
> > +void si_shader_apply_scratch_relocs(const struct si_screen *sscreen,
> > +                       const struct radeon_shader_binary *binary,
> > +                       struct si_shader *shader, uint64_t scratch_va);
> > +void si_shader_binary_read_config(const struct si_screen *sscreen,
> > +                               const struct radeon_shader_binary *binary,
> >                                 struct si_shader *shader,
> >                                 unsigned symbol_offset);
> >
> > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
> > b/src/gallium/drivers/radeonsi/si_state_shaders.c
> > index 817a990..b6a0f32 100644
> > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> > @@ -33,6 +33,22 @@
> >  #include "util/u_memory.h"
> >  #include "util/u_simple_shaders.h"
> >
> > +static void si_shader_setup_scratch_buffer(struct si_shader *shader,
> > +                                       struct si_pm4_state *pm4) {
> > +       if (shader->scratch_bytes_per_wave == 0) {
> > +               return;
> > +       }
> > +
> > +       assert(shader->scratch_bo);
> > +
> > +       si_pm4_set_reg(pm4, R_0286E8_SPI_TMPRING_SIZE,
> > +                       S_0286E8_WAVES(shader->scratch_waves)
> > +                       | S_0286E8_WAVESIZE(shader->scratch_bytes_per_wave 
> > >> 10));
> 
> What happens if all VS, GS, and PS spill VGPRs? Will they share the
> scratch buffer? Will the scratch buffer be large enough for all of
> them? Will the WAVES and WAVESIZE parameters be the same for all of them?
>


Each shader will have its own scratch buffer.  WAVES will always be the
same, but WAVESIZE depends on the number of VGPRs spilled.

-Tom

> Marek
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] radeonsi: Enable VGPR spilling for all shader types

Reply via email to