Signed-off-by: Glenn Kennard <glenn.kenn...@gmail.com> --- src/gallium/drivers/r600/evergreen_state.c | 24 +++++++ src/gallium/drivers/r600/r600_pipe.c | 3 + src/gallium/drivers/r600/r600_pipe.h | 14 ++++ src/gallium/drivers/r600/r600_shader.h | 1 + src/gallium/drivers/r600/r600_state_common.c | 104 +++++++++++++++++++++++++++ 5 files changed, 146 insertions(+)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index c5dd9f7..8e984b9 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1976,6 +1976,30 @@ static void evergreen_emit_tcs_constant_buffers(struct r600_context *rctx, struc 0); } +void evergreen_setup_scratch_buffers(struct r600_context *rctx) { + static const struct { + unsigned ring_base; + unsigned item_size; + unsigned ring_size; + } regs[EG_NUM_HW_STAGES] = { + [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_028914_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE }, + [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_028910_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE }, + [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_02890C_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE }, + [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_028908_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE }, + [EG_HW_STAGE_LS] = { R_008E10_SQ_LSTMP_RING_BASE, R_028830_SQ_LSTMP_RING_ITEMSIZE, R_008E14_SQ_LSTMP_RING_SIZE }, + [EG_HW_STAGE_HS] = { R_008E18_SQ_HSTMP_RING_BASE, R_028834_SQ_HSTMP_RING_ITEMSIZE, R_008E1C_SQ_HSTMP_RING_SIZE } + }; + + for (unsigned i = 0; i < EG_NUM_HW_STAGES; i++) { + struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader; + + if (stage && unlikely(stage->scratch_space_needed)) { + r600_setup_scratch_area_for_shader(rctx, stage, + &rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size); + } + } +} + static void evergreen_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, unsigned resource_id_base, unsigned pkt_flags) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 1803c26..fc03990 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -71,6 +71,9 @@ static void r600_destroy_context(struct pipe_context *context) r600_sb_context_destroy(rctx->sb_context); + for (sh = 0; sh < (rctx->b.chip_class < EVERGREEN ? R600_NUM_HW_STAGES : EG_NUM_HW_STAGES); sh++) { + r600_resource_reference(&rctx->scratch_buffers[sh].buffer, NULL); + } r600_resource_reference(&rctx->dummy_cmask, NULL); r600_resource_reference(&rctx->dummy_fmask, NULL); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index cf8eba3..c8cf87f 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -413,6 +413,13 @@ struct r600_shader_state { struct r600_pipe_shader *shader; }; +/* Used to spill shader temps */ +struct r600_scratch_buffer { + struct r600_resource *buffer; + unsigned size; + unsigned item_size; +}; + struct r600_context { struct r600_common_context b; struct r600_screen *screen; @@ -522,6 +529,8 @@ struct r600_context { struct r600_pipe_shader_selector *last_tcs; unsigned last_num_tcs_input_cp; unsigned lds_alloc; + + struct r600_scratch_buffer scratch_buffers[MAX2(R600_NUM_HW_STAGES, EG_NUM_HW_STAGES)]; }; static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs, @@ -621,6 +630,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx, struct r600_surface *surf); void evergreen_update_db_shader_control(struct r600_context * rctx); bool evergreen_adjust_gprs(struct r600_context *rctx); +void evergreen_setup_scratch_buffers(struct r600_context *rctx); /* r600_blit.c */ void r600_init_blit_functions(struct r600_context *rctx); void r600_decompress_depth_textures(struct r600_context *rctx, @@ -665,6 +675,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen, unsigned sample_count, unsigned usage); void r600_update_db_shader_control(struct r600_context * rctx); +void r600_setup_scratch_buffers(struct r600_context *rctx); /* r600_hw_context.c */ void r600_context_gfx_flush(void *context, unsigned flags, @@ -730,6 +741,9 @@ void r600_sampler_states_dirty(struct r600_context *rctx, struct r600_sampler_states *state); void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state); void r600_set_sample_locations_constant_buffer(struct r600_context *rctx); +void r600_setup_scratch_area_for_shader(struct r600_context *rctx, + struct r600_pipe_shader *shader, struct r600_scratch_buffer *scratch, + unsigned ring_base_reg, unsigned item_size_reg, unsigned ring_size_reg); uint32_t r600_translate_stencil_op(int s_op); uint32_t r600_translate_fill(uint32_t func); unsigned r600_tex_wrap(unsigned wrap); diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index cfdb020..e94230f 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -148,6 +148,7 @@ struct r600_pipe_shader { unsigned db_shader_control; unsigned ps_depth_export; unsigned enabled_stream_buffers_mask; + unsigned scratch_space_needed; /* size of scratch space (if > 0) counted in vec4 */ }; /* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 1fbe392..1bec4f6 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1405,6 +1405,103 @@ static void r600_generate_fixed_func_tcs(struct r600_context *rctx) ureg_create_shader_and_destroy(ureg, &rctx->b.b); } +/* update MEM_SCRATCH buffers if needed */ +void r600_setup_scratch_area_for_shader(struct r600_context *rctx, + struct r600_pipe_shader *shader, struct r600_scratch_buffer *scratch, + unsigned ring_base_reg, unsigned item_size_reg, unsigned ring_size_reg) +{ + unsigned num_ses = rctx->screen->b.info.max_se; + unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes; + unsigned nthreads = 128; + + unsigned itemsize = shader->scratch_space_needed * 4; + unsigned size = align(itemsize * nthreads * num_pipes * num_ses * 4, 256); + + if (unlikely(shader->scratch_space_needed != scratch->item_size || + size > scratch->size)) { + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; + + if (size > scratch->size) { + // Release prior one if any + if (scratch->buffer) { + pipe_resource_reference((struct pipe_resource**)&scratch->buffer, NULL); + } + + scratch->buffer = (struct r600_resource *)pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_DEFAULT, size); + if (scratch->buffer) { + scratch->size = size; + } + printf("allocated scratch buffer %d bytes\n", scratch->size); + } + + scratch->item_size = shader->scratch_space_needed; + + radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); + + // multi-SE chips need programming per SE + for (unsigned se = 0; se < num_ses; se++) { + struct r600_resource *rbuffer = scratch->buffer; + unsigned size_per_se = size / num_ses; + + // Direct to particular SE + if (num_ses > 1) { + radeon_set_config_reg(cs, EG_0802C_GRBM_GFX_INDEX, + S_0802C_INSTANCE_INDEX(0) | + S_0802C_SE_INDEX(se) | + S_0802C_INSTANCE_BROADCAST_WRITES(1) | + S_0802C_SE_BROADCAST_WRITES(0)); + } + + radeon_set_config_reg(cs, ring_base_reg, (rbuffer->gpu_address + size_per_se * se) >> 8); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, + RADEON_USAGE_READWRITE, + RADEON_PRIO_SCRATCH_BUFFER)); + radeon_set_context_reg(cs, item_size_reg, itemsize); + radeon_set_config_reg(cs, ring_size_reg, size_per_se >> 8); + printf("set scratch buffer: se=%d gpu_address=%zd se_offset=%d itemsize=%d se_size=%d\n", se, rbuffer->gpu_address, size_per_se * se, itemsize, size_per_se); + } + + // Restore broadcast mode + if (num_ses > 1) { + radeon_set_config_reg(cs, EG_0802C_GRBM_GFX_INDEX, + S_0802C_INSTANCE_INDEX(0) | + S_0802C_SE_INDEX(0) | + S_0802C_INSTANCE_BROADCAST_WRITES(1) | + S_0802C_SE_BROADCAST_WRITES(1)); + } + + radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); + } +} + +void r600_setup_scratch_buffers(struct r600_context *rctx) { + static const struct { + unsigned ring_base; + unsigned item_size; + unsigned ring_size; + } regs[R600_NUM_HW_STAGES] = { + [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE }, + [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE }, + [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE }, + [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE } + }; + + for (unsigned i = 0; i < R600_NUM_HW_STAGES; i++) { + struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader; + + if (stage && unlikely(stage->scratch_space_needed)) { + r600_setup_scratch_area_for_shader(rctx, stage, + &rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size); + } + } +} + #define SELECT_SHADER_OR_FAIL(x) do { \ r600_shader_select(ctx, rctx->x##_shader, &x##_dirty); \ if (unlikely(!rctx->x##_shader->current)) \ @@ -1585,6 +1682,13 @@ static bool r600_update_derived_state(struct r600_context *rctx) r600_update_db_shader_control(rctx); } + /* For each shader stage that needs to spill, set up buffer for MEM_SCRATCH */ + if (rctx->b.chip_class >= EVERGREEN) { + evergreen_setup_scratch_buffers(rctx); + } else { + r600_setup_scratch_buffers(rctx); + } + /* on R600 we stuff masks + txq info into one constant buffer */ /* on evergreen we only need a txq info one */ if (rctx->ps_shader) { -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev