On Thu, Jan 08, 2015 at 07:00:11PM +0100, Marek Olšák wrote: > On Wed, Jan 7, 2015 at 10:03 PM, Tom Stellard <thomas.stell...@amd.com> wrote: > > --- > > src/gallium/drivers/radeonsi/si_compute.c | 40 ++------------ > > src/gallium/drivers/radeonsi/si_shader.c | 69 > > ++++++++++++++++++++++++- > > src/gallium/drivers/radeonsi/si_shader.h | 7 ++- > > src/gallium/drivers/radeonsi/si_state_shaders.c | 36 +++++++++++-- > > 4 files changed, 108 insertions(+), 44 deletions(-) > > > > diff --git a/src/gallium/drivers/radeonsi/si_compute.c > > b/src/gallium/drivers/radeonsi/si_compute.c > > index 37e5c42..e5f158d 100644 > > --- a/src/gallium/drivers/radeonsi/si_compute.c > > +++ b/src/gallium/drivers/radeonsi/si_compute.c > > @@ -42,12 +42,6 @@ > > #define NUM_USER_SGPRS 4 > > #endif > > > > -static const char *scratch_rsrc_dword0_symbol = > > - "SCRATCH_RSRC_DWORD0"; > > - > > -static const char *scratch_rsrc_dword1_symbol = > > - "SCRATCH_RSRC_DWORD1"; > > - > > struct si_compute { > > struct si_context *ctx; > > > > @@ -183,35 +177,6 @@ static unsigned compute_num_waves_for_scratch( > > return scratch_waves; > > } > > > > -static void apply_scratch_relocs(const struct si_screen *sscreen, > > - const struct radeon_shader_binary *binary, > > - struct si_shader *shader, uint64_t scratch_va) { > > - unsigned i; > > - char *ptr; > > - uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff; > > - uint32_t scratch_rsrc_dword1 = > > - S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) > > - | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64); > > - > > - if (!binary->reloc_count) { > > - return; > > - } > > - > > - ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, > > - PIPE_TRANSFER_READ_WRITE); > > - for (i = 0 ; i < binary->reloc_count; i++) { > > - const struct radeon_shader_reloc *reloc = > > &binary->relocs[i]; > > - if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) { > > - util_memcpy_cpu_to_le32(ptr + reloc->offset, > > - &scratch_rsrc_dword0, 4); > > - } else if (!strcmp(scratch_rsrc_dword1_symbol, > > reloc->name)) { > > - util_memcpy_cpu_to_le32(ptr + reloc->offset, > > - &scratch_rsrc_dword1, 4); > > - } > > - } > > - sscreen->b.ws->buffer_unmap(shader->bo->cs_buf); > > -} > > - > > static void si_launch_grid( > > struct pipe_context *ctx, > > const uint *block_layout, const uint *grid_layout, > > @@ -256,7 +221,8 @@ static void si_launch_grid( > > > > #if HAVE_LLVM >= 0x0306 > > /* Read the config information */ > > - si_shader_binary_read_config(&program->binary, &program->program, > > pc); > > + si_shader_binary_read_config(sctx->screen, &program->binary, > > + &program->program, pc); > > #endif > > > > /* Upload the kernel arguments */ > > @@ -295,7 +261,7 @@ static void si_launch_grid( > > RADEON_PRIO_SHADER_RESOURCE_RW); > > > > /* Patch the shader with the scratch buffer address. */ > > - apply_scratch_relocs(sctx->screen, > > + si_shader_apply_scratch_relocs(sctx->screen, > > &program->binary, shader, scratch_buffer_va); > > > > } > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > > b/src/gallium/drivers/radeonsi/si_shader.c > > index cf28860..d59e736 100644 > > --- a/src/gallium/drivers/radeonsi/si_shader.c > > +++ b/src/gallium/drivers/radeonsi/si_shader.c > > @@ -46,6 +46,12 @@ > > > > #include <errno.h> > > > > +static const char *scratch_rsrc_dword0_symbol = > > + "SCRATCH_RSRC_DWORD0"; > > + > > +static const char *scratch_rsrc_dword1_symbol = > > + "SCRATCH_RSRC_DWORD1"; > > + > > struct si_shader_output_values > > { > > LLVMValueRef values[4]; > > @@ -2517,7 +2523,8 @@ static void preload_ring_buffers(struct > > si_shader_context *si_shader_ctx) > > } > > } > > > > -void si_shader_binary_read_config(const struct radeon_shader_binary > > *binary, > > +void si_shader_binary_read_config(const struct si_screen *sscreen, > > + const struct radeon_shader_binary *binary, > > struct si_shader *shader, > > unsigned symbol_offset) > > { > > @@ -2549,6 +2556,14 @@ void si_shader_binary_read_config(const struct > > radeon_shader_binary *binary, > > case R_0286CC_SPI_PS_INPUT_ENA: > > shader->spi_ps_input_ena = value; > > break; > > + case R_0286E8_SPI_TMPRING_SIZE: > > + /* XXX: This is the maximum value allowed. I'm not > > sure > > + * how compute this for non-cs shaders. > > + */ > > + shader->scratch_waves = > > + 32 * sscreen->b.info.max_compute_units; > > + /* Fall-through */ > > + > > case R_00B860_COMPUTE_TMPRING_SIZE: > > /* WAVESIZE is in units of 256 dwords. */ > > shader->scratch_bytes_per_wave = > > @@ -2562,6 +2577,36 @@ void si_shader_binary_read_config(const struct > > radeon_shader_binary *binary, > > } > > } > > > > +void si_shader_apply_scratch_relocs(const struct si_screen *sscreen, > > + const struct radeon_shader_binary *binary, > > + struct si_shader *shader, uint64_t scratch_va) { > > + unsigned i; > > + char *ptr; > > + uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff; > > + uint32_t scratch_rsrc_dword1 = > > + S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) > > + | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64); > > + > > + if (!binary->reloc_count) { > > + return; > > + } > > + > > + ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, > > + PIPE_TRANSFER_READ_WRITE); > > + for (i = 0 ; i < binary->reloc_count; i++) { > > + const struct radeon_shader_reloc *reloc = > > &binary->relocs[i]; > > + if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) { > > + util_memcpy_cpu_to_le32(ptr + reloc->offset, > > + &scratch_rsrc_dword0, 4); > > + } else if (!strcmp(scratch_rsrc_dword1_symbol, > > reloc->name)) { > > + util_memcpy_cpu_to_le32(ptr + reloc->offset, > > + &scratch_rsrc_dword1, 4); > > + } > > + } > > + sscreen->b.ws->buffer_unmap(shader->bo->cs_buf); > > +} > > + > > + > > int si_shader_binary_read(struct si_screen *sscreen, > > struct si_shader *shader, > > const struct radeon_shader_binary *binary) > > @@ -2582,7 +2627,7 @@ int si_shader_binary_read(struct si_screen *sscreen, > > } > > } > > > > - si_shader_binary_read_config(binary, shader, 0); > > + si_shader_binary_read_config(sscreen, binary, shader, 0); > > > > /* copy new shader */ > > code_size = binary->code_size + binary->rodata_size; > > @@ -2601,6 +2646,7 @@ int si_shader_binary_read(struct si_screen *sscreen, > > util_memcpy_cpu_to_le32(ptr, binary->rodata, > > binary->rodata_size); > > } > > > > + > > Unintentional new line? > > > sscreen->b.ws->buffer_unmap(shader->bo->cs_buf); > > > > return 0; > > @@ -2621,6 +2667,25 @@ int si_compile_llvm(struct si_screen *sscreen, > > struct si_shader *shader, > > return r; > > } > > r = si_shader_binary_read(sscreen, shader, &binary); > > + > > + if (shader->scratch_bytes_per_wave > 0) { > > + uint64_t scratch_buffer_va; > > + unsigned scratch_bytes = shader->scratch_bytes_per_wave * > > + shader->scratch_waves; > > + /* It's possible for different shader variants to have > > + * different scratch buffer sizes. The scratch buffer sizes > > + * won't vary by too much, so allocating double the scratch > > + * space need for the first variant should be enough for the > > + * rest if they need it. > > + */ > > + shader->scratch_bo = (struct r600_resource*) > > + si_resource_create_custom(&sscreen->b.b, > > + PIPE_USAGE_DEFAULT, scratch_bytes * 2); > > + scratch_buffer_va = shader->scratch_bo->gpu_address; > > + si_shader_apply_scratch_relocs(sscreen, &binary, shader, > > + scratch_buffer_va); > > + } > > + > > FREE(binary.code); > > FREE(binary.config); > > FREE(binary.rodata); > > diff --git a/src/gallium/drivers/radeonsi/si_shader.h > > b/src/gallium/drivers/radeonsi/si_shader.h > > index 08e344a..c5b274e 100644 > > --- a/src/gallium/drivers/radeonsi/si_shader.h > > +++ b/src/gallium/drivers/radeonsi/si_shader.h > > @@ -147,6 +147,7 @@ struct si_shader { > > unsigned lds_size; > > unsigned spi_ps_input_ena; > > unsigned scratch_bytes_per_wave; > > + unsigned scratch_waves; > > unsigned spi_shader_col_format; > > unsigned spi_shader_z_format; > > unsigned db_shader_control; > > @@ -185,7 +186,11 @@ void si_shader_destroy(struct pipe_context *ctx, > > struct si_shader *shader); > > unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned > > index); > > int si_shader_binary_read(struct si_screen *sscreen, struct si_shader > > *shader, > > const struct radeon_shader_binary *binary); > > -void si_shader_binary_read_config(const struct radeon_shader_binary > > *binary, > > +void si_shader_apply_scratch_relocs(const struct si_screen *sscreen, > > + const struct radeon_shader_binary *binary, > > + struct si_shader *shader, uint64_t scratch_va); > > +void si_shader_binary_read_config(const struct si_screen *sscreen, > > + const struct radeon_shader_binary *binary, > > struct si_shader *shader, > > unsigned symbol_offset); > > > > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > > b/src/gallium/drivers/radeonsi/si_state_shaders.c > > index 817a990..b6a0f32 100644 > > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > > @@ -33,6 +33,22 @@ > > #include "util/u_memory.h" > > #include "util/u_simple_shaders.h" > > > > +static void si_shader_setup_scratch_buffer(struct si_shader *shader, > > + struct si_pm4_state *pm4) { > > + if (shader->scratch_bytes_per_wave == 0) { > > + return; > > + } > > + > > + assert(shader->scratch_bo); > > + > > + si_pm4_set_reg(pm4, R_0286E8_SPI_TMPRING_SIZE, > > + S_0286E8_WAVES(shader->scratch_waves) > > + | S_0286E8_WAVESIZE(shader->scratch_bytes_per_wave > > >> 10)); > > What happens if all VS, GS, and PS spill VGPRs? Will they share the > scratch buffer? Will the scratch buffer be large enough for all of > them? Will the WAVES and WAVESIZE parameters be the same for all of them? >
Each shader will have its own scratch buffer. WAVES will always be the same, but WAVESIZE depends on the number of VGPRs spilled. -Tom > Marek > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev