Signed-off-by: Glenn Kennard <glenn.kenn...@gmail.com>
---
 src/gallium/drivers/r600/evergreen_state.c   |  24 +++++++
 src/gallium/drivers/r600/r600_pipe.c         |   3 +
 src/gallium/drivers/r600/r600_pipe.h         |  14 ++++
 src/gallium/drivers/r600/r600_shader.h       |   1 +
 src/gallium/drivers/r600/r600_state_common.c | 104 +++++++++++++++++++++++++++
 5 files changed, 146 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index c5dd9f7..8e984b9 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1976,6 +1976,30 @@ static void evergreen_emit_tcs_constant_buffers(struct 
r600_context *rctx, struc
                                        0);
 }
 
+void evergreen_setup_scratch_buffers(struct r600_context *rctx) {
+       static const struct {
+               unsigned ring_base;
+               unsigned item_size;
+               unsigned ring_size;
+       } regs[EG_NUM_HW_STAGES] = {
+               [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, 
R_028914_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE },
+               [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, 
R_028910_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE },
+               [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, 
R_02890C_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE },
+               [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, 
R_028908_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE },
+               [EG_HW_STAGE_LS] = { R_008E10_SQ_LSTMP_RING_BASE, 
R_028830_SQ_LSTMP_RING_ITEMSIZE, R_008E14_SQ_LSTMP_RING_SIZE },
+               [EG_HW_STAGE_HS] = { R_008E18_SQ_HSTMP_RING_BASE, 
R_028834_SQ_HSTMP_RING_ITEMSIZE, R_008E1C_SQ_HSTMP_RING_SIZE }
+       };
+
+       for (unsigned i = 0; i < EG_NUM_HW_STAGES; i++) {
+               struct r600_pipe_shader *stage = 
rctx->hw_shader_stages[i].shader;
+
+               if (stage && unlikely(stage->scratch_space_needed)) {
+                       r600_setup_scratch_area_for_shader(rctx, stage,
+                               &rctx->scratch_buffers[i], regs[i].ring_base, 
regs[i].item_size, regs[i].ring_size);
+               }
+       }
+}
+
 static void evergreen_emit_sampler_views(struct r600_context *rctx,
                                         struct r600_samplerview_state *state,
                                         unsigned resource_id_base, unsigned 
pkt_flags)
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 1803c26..fc03990 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -71,6 +71,9 @@ static void r600_destroy_context(struct pipe_context *context)
 
        r600_sb_context_destroy(rctx->sb_context);
 
+       for (sh = 0; sh < (rctx->b.chip_class < EVERGREEN ? R600_NUM_HW_STAGES 
: EG_NUM_HW_STAGES); sh++) {
+               r600_resource_reference(&rctx->scratch_buffers[sh].buffer, 
NULL);
+       }
        r600_resource_reference(&rctx->dummy_cmask, NULL);
        r600_resource_reference(&rctx->dummy_fmask, NULL);
 
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index cf8eba3..c8cf87f 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -413,6 +413,13 @@ struct r600_shader_state {
        struct r600_pipe_shader *shader;
 };
 
+/* Used to spill shader temps */
+struct r600_scratch_buffer {
+       struct r600_resource            *buffer;
+       unsigned                                size;
+       unsigned                                item_size;
+};
+
 struct r600_context {
        struct r600_common_context      b;
        struct r600_screen              *screen;
@@ -522,6 +529,8 @@ struct r600_context {
        struct r600_pipe_shader_selector *last_tcs;
        unsigned last_num_tcs_input_cp;
        unsigned lds_alloc;
+
+       struct r600_scratch_buffer scratch_buffers[MAX2(R600_NUM_HW_STAGES, 
EG_NUM_HW_STAGES)];
 };
 
 static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
@@ -621,6 +630,7 @@ void evergreen_init_color_surface_rat(struct r600_context 
*rctx,
                                        struct r600_surface *surf);
 void evergreen_update_db_shader_control(struct r600_context * rctx);
 bool evergreen_adjust_gprs(struct r600_context *rctx);
+void evergreen_setup_scratch_buffers(struct r600_context *rctx);
 /* r600_blit.c */
 void r600_init_blit_functions(struct r600_context *rctx);
 void r600_decompress_depth_textures(struct r600_context *rctx,
@@ -665,6 +675,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
                                 unsigned sample_count,
                                 unsigned usage);
 void r600_update_db_shader_control(struct r600_context * rctx);
+void r600_setup_scratch_buffers(struct r600_context *rctx);
 
 /* r600_hw_context.c */
 void r600_context_gfx_flush(void *context, unsigned flags,
@@ -730,6 +741,9 @@ void r600_sampler_states_dirty(struct r600_context *rctx,
                               struct r600_sampler_states *state);
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct 
r600_constbuf_state *state);
 void r600_set_sample_locations_constant_buffer(struct r600_context *rctx);
+void r600_setup_scratch_area_for_shader(struct r600_context *rctx,
+       struct r600_pipe_shader *shader, struct r600_scratch_buffer *scratch,
+       unsigned ring_base_reg, unsigned item_size_reg, unsigned ring_size_reg);
 uint32_t r600_translate_stencil_op(int s_op);
 uint32_t r600_translate_fill(uint32_t func);
 unsigned r600_tex_wrap(unsigned wrap);
diff --git a/src/gallium/drivers/r600/r600_shader.h 
b/src/gallium/drivers/r600/r600_shader.h
index cfdb020..e94230f 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -148,6 +148,7 @@ struct r600_pipe_shader {
        unsigned                db_shader_control;
        unsigned                ps_depth_export;
        unsigned                enabled_stream_buffers_mask;
+       unsigned                scratch_space_needed; /* size of scratch space 
(if > 0) counted in vec4 */
 };
 
 /* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 1fbe392..1bec4f6 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1405,6 +1405,103 @@ static void r600_generate_fixed_func_tcs(struct 
r600_context *rctx)
                ureg_create_shader_and_destroy(ureg, &rctx->b.b);
 }
 
+/* update MEM_SCRATCH buffers if needed */
+void r600_setup_scratch_area_for_shader(struct r600_context *rctx,
+       struct r600_pipe_shader *shader, struct r600_scratch_buffer *scratch,
+       unsigned ring_base_reg, unsigned item_size_reg, unsigned ring_size_reg)
+{
+       unsigned num_ses = rctx->screen->b.info.max_se;
+       unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
+       unsigned nthreads = 128;
+
+       unsigned itemsize = shader->scratch_space_needed * 4;
+       unsigned size = align(itemsize * nthreads * num_pipes * num_ses * 4, 
256);
+
+       if (unlikely(shader->scratch_space_needed != scratch->item_size ||
+               size > scratch->size)) {
+               struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+
+               if (size > scratch->size) {
+                       // Release prior one if any
+                       if (scratch->buffer) {
+                               pipe_resource_reference((struct 
pipe_resource**)&scratch->buffer, NULL);
+                       }
+
+                       scratch->buffer = (struct r600_resource 
*)pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM,
+                               PIPE_USAGE_DEFAULT, size);
+                       if (scratch->buffer) {
+                               scratch->size = size;
+                       }
+                       printf("allocated scratch buffer %d bytes\n", 
scratch->size);
+               }
+
+               scratch->item_size = shader->scratch_space_needed;
+
+               radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, 
S_008040_WAIT_3D_IDLE(1));
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
+
+               // multi-SE chips need programming per SE
+               for (unsigned se = 0; se < num_ses; se++) {
+                       struct r600_resource *rbuffer = scratch->buffer;
+                       unsigned size_per_se = size / num_ses;
+
+                       // Direct to particular SE
+                       if (num_ses > 1) {
+                               radeon_set_config_reg(cs, 
EG_0802C_GRBM_GFX_INDEX,
+                                       S_0802C_INSTANCE_INDEX(0) |
+                                       S_0802C_SE_INDEX(se) |
+                                       S_0802C_INSTANCE_BROADCAST_WRITES(1) |
+                                       S_0802C_SE_BROADCAST_WRITES(0));
+                       }
+
+                       radeon_set_config_reg(cs, ring_base_reg, 
(rbuffer->gpu_address + size_per_se * se) >> 8);
+                       radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+                       radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, 
&rctx->b.gfx, rbuffer,
+                               RADEON_USAGE_READWRITE,
+                               RADEON_PRIO_SCRATCH_BUFFER));
+                       radeon_set_context_reg(cs, item_size_reg, itemsize);
+                       radeon_set_config_reg(cs, ring_size_reg, size_per_se >> 
8);
+                       printf("set scratch buffer: se=%d gpu_address=%zd 
se_offset=%d itemsize=%d se_size=%d\n", se, rbuffer->gpu_address, size_per_se * 
se, itemsize, size_per_se);
+               }
+
+               // Restore broadcast mode
+               if (num_ses > 1) {
+                       radeon_set_config_reg(cs, EG_0802C_GRBM_GFX_INDEX,
+                               S_0802C_INSTANCE_INDEX(0) |
+                               S_0802C_SE_INDEX(0) |
+                               S_0802C_INSTANCE_BROADCAST_WRITES(1) |
+                               S_0802C_SE_BROADCAST_WRITES(1));
+               }
+
+               radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, 
S_008040_WAIT_3D_IDLE(1));
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
+       }
+}
+
+void r600_setup_scratch_buffers(struct r600_context *rctx) {
+       static const struct {
+               unsigned ring_base;
+               unsigned item_size;
+               unsigned ring_size;
+       } regs[R600_NUM_HW_STAGES] = {
+               [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, 
R_0288BC_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE },
+               [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, 
R_0288B8_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE },
+               [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, 
R_0288B4_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE },
+               [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, 
R_0288B0_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE }
+       };
+
+       for (unsigned i = 0; i < R600_NUM_HW_STAGES; i++) {
+               struct r600_pipe_shader *stage = 
rctx->hw_shader_stages[i].shader;
+
+               if (stage && unlikely(stage->scratch_space_needed)) {
+                       r600_setup_scratch_area_for_shader(rctx, stage,
+                               &rctx->scratch_buffers[i], regs[i].ring_base, 
regs[i].item_size, regs[i].ring_size);
+               }
+       }
+}
+
 #define SELECT_SHADER_OR_FAIL(x) do {                                  \
                r600_shader_select(ctx, rctx->x##_shader, &x##_dirty);  \
                if (unlikely(!rctx->x##_shader->current))               \
@@ -1585,6 +1682,13 @@ static bool r600_update_derived_state(struct 
r600_context *rctx)
                r600_update_db_shader_control(rctx);
        }
 
+       /* For each shader stage that needs to spill, set up buffer for 
MEM_SCRATCH */
+       if (rctx->b.chip_class >= EVERGREEN) {
+               evergreen_setup_scratch_buffers(rctx);
+       } else {
+               r600_setup_scratch_buffers(rctx);
+       }
+
        /* on R600 we stuff masks + txq info into one constant buffer */
        /* on evergreen we only need a txq info one */
        if (rctx->ps_shader) {
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to