For the series: Reviewed-by: Marek Olšák <marek.ol...@amd.com>
Marek On Tue, Jan 28, 2014 at 10:46 AM, Michel Dänzer <mic...@daenzer.net> wrote: > From: Michel Dänzer <michel.daen...@amd.com> > > Signed-off-by: Michel Dänzer <michel.daen...@amd.com> > --- > src/gallium/drivers/radeonsi/si_descriptors.c | 93 > ++++++++++++++++----------- > src/gallium/drivers/radeonsi/si_pipe.h | 6 +- > src/gallium/drivers/radeonsi/si_shader.c | 22 ++++--- > src/gallium/drivers/radeonsi/si_shader.h | 72 +++++++++++---------- > src/gallium/drivers/radeonsi/si_state.h | 6 +- > 5 files changed, 115 insertions(+), 84 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c > b/src/gallium/drivers/radeonsi/si_descriptors.c > index 2a54fcb..9078c6c 100644 > --- a/src/gallium/drivers/radeonsi/si_descriptors.c > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c > @@ -516,7 +516,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint > shader, uint slot, > unsigned element_size, unsigned index_stride) > { > struct si_context *sctx = (struct si_context *)ctx; > - struct si_buffer_resources *buffers = &sctx->const_buffers[shader]; > + struct si_buffer_resources *buffers = &sctx->rw_buffers[shader]; > > if (shader >= SI_NUM_SHADERS) > return; > @@ -608,9 +608,9 @@ static void si_set_streamout_targets(struct pipe_context > *ctx, > unsigned append_bitmask) > { > struct si_context *sctx = (struct si_context *)ctx; > - struct si_buffer_resources *buffers = &sctx->streamout_buffers; > + struct si_buffer_resources *buffers = > &sctx->rw_buffers[PIPE_SHADER_VERTEX]; > unsigned old_num_targets = sctx->b.streamout.num_targets; > - unsigned i; > + unsigned i, bufidx; > > /* Streamout buffers must be bound in 2 places: > * 1) in VGT by setting the VGT_STRMOUT registers > @@ -622,12 +622,14 @@ static void si_set_streamout_targets(struct > pipe_context *ctx, > > /* Set the shader resources.*/ > for (i = 0; i < num_targets; i++) { > + bufidx = SI_RW_SO + i; > + > if (targets[i]) { > struct pipe_resource *buffer = targets[i]->buffer; > uint64_t va = r600_resource_va(ctx->screen, buffer); > > /* Set the descriptor. */ > - uint32_t *desc = buffers->desc_data[i]; > + uint32_t *desc = buffers->desc_data[bufidx]; > desc[0] = va; > desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); > desc[2] = 0xffffffff; > @@ -637,25 +639,29 @@ static void si_set_streamout_targets(struct > pipe_context *ctx, > S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); > > /* Set the resource. */ > - pipe_resource_reference(&buffers->buffers[i], buffer); > + pipe_resource_reference(&buffers->buffers[bufidx], > + buffer); > r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, > (struct r600_resource*)buffer, > buffers->shader_usage); > - buffers->desc.enabled_mask |= 1 << i; > + buffers->desc.enabled_mask |= 1 << bufidx; > } else { > /* Clear the descriptor and unset the resource. */ > - memset(buffers->desc_data[i], 0, sizeof(uint32_t) * > 4); > - pipe_resource_reference(&buffers->buffers[i], NULL); > - buffers->desc.enabled_mask &= ~(1 << i); > + memset(buffers->desc_data[bufidx], 0, > + sizeof(uint32_t) * 4); > + pipe_resource_reference(&buffers->buffers[bufidx], > + NULL); > + buffers->desc.enabled_mask &= ~(1 << bufidx); > } > - buffers->desc.dirty_mask |= 1 << i; > + buffers->desc.dirty_mask |= 1 << bufidx; > } > for (; i < old_num_targets; i++) { > + bufidx = SI_RW_SO + i; > /* Clear the descriptor and unset the resource. */ > - memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4); > - pipe_resource_reference(&buffers->buffers[i], NULL); > - buffers->desc.enabled_mask &= ~(1 << i); > - buffers->desc.dirty_mask |= 1 << i; > + memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4); > + pipe_resource_reference(&buffers->buffers[bufidx], NULL); > + buffers->desc.enabled_mask &= ~(1 << bufidx); > + buffers->desc.dirty_mask |= 1 << bufidx; > } > > si_update_descriptors(sctx, &buffers->desc); > @@ -712,25 +718,37 @@ static void si_invalidate_buffer(struct pipe_context > *ctx, struct pipe_resource > /* Vertex buffers. */ > /* Nothing to do. Vertex buffer bindings are updated before every > draw call. */ > > - /* Streamout buffers. */ > - for (i = 0; i < sctx->streamout_buffers.num_buffers; i++) { > - if (sctx->streamout_buffers.buffers[i] == buf) { > - /* Update the descriptor. */ > - si_desc_reset_buffer_offset(ctx, > sctx->streamout_buffers.desc_data[i], > - old_va, buf); > + /* Read/Write buffers. */ > + for (shader = 0; shader < SI_NUM_SHADERS; shader++) { > + struct si_buffer_resources *buffers = > &sctx->rw_buffers[shader]; > + bool found = false; > + uint32_t mask = buffers->desc.enabled_mask; > > - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, > - (struct r600_resource*)buf, > - > sctx->streamout_buffers.shader_usage); > - sctx->streamout_buffers.desc.dirty_mask |= 1 << i; > - si_update_descriptors(sctx, > &sctx->streamout_buffers.desc); > - > - /* Update the streamout state. */ > - if (sctx->b.streamout.begin_emitted) { > - r600_emit_streamout_end(&sctx->b); > + while (mask) { > + i = u_bit_scan(&mask); > + if (buffers->buffers[i] == buf) { > + si_desc_reset_buffer_offset(ctx, > buffers->desc_data[i], > + old_va, buf); > + > + r600_context_bo_reloc(&sctx->b, > &sctx->b.rings.gfx, > + rbuffer, > buffers->shader_usage); > + > + buffers->desc.dirty_mask |= 1 << i; > + found = true; > + > + if (i >= SI_RW_SO && shader == > PIPE_SHADER_VERTEX) { > + /* Update the streamout state. */ > + if (sctx->b.streamout.begin_emitted) { > + > r600_emit_streamout_end(&sctx->b); > + } > + sctx->b.streamout.append_bitmask = > + > sctx->b.streamout.enabled_mask; > + > r600_streamout_buffers_dirty(&sctx->b); > + } > } > - sctx->b.streamout.append_bitmask = > sctx->b.streamout.enabled_mask; > - r600_streamout_buffers_dirty(&sctx->b); > + } > + if (found) { > + si_update_descriptors(sctx, &buffers->desc); > } > } > > @@ -936,17 +954,20 @@ void si_init_all_descriptors(struct si_context *sctx) > for (i = 0; i < SI_NUM_SHADERS; i++) { > si_init_buffer_resources(sctx, &sctx->const_buffers[i], > NUM_CONST_BUFFERS, i, SI_SGPR_CONST, > + RADEON_USAGE_READ); > + si_init_buffer_resources(sctx, &sctx->rw_buffers[i], > + i == PIPE_SHADER_VERTEX ? > + SI_RW_SO + 4 : SI_RW_SO, > + i, SI_SGPR_RW_BUFFERS, > RADEON_USAGE_READWRITE); > > si_init_sampler_views(sctx, &sctx->samplers[i].views, i); > > sctx->atoms.const_buffers[i] = > &sctx->const_buffers[i].desc.atom; > + sctx->atoms.rw_buffers[i] = &sctx->rw_buffers[i].desc.atom; > sctx->atoms.sampler_views[i] = > &sctx->samplers[i].views.desc.atom; > } > > - si_init_buffer_resources(sctx, &sctx->streamout_buffers, 4, > PIPE_SHADER_VERTEX, > - SI_SGPR_SO_BUFFER, RADEON_USAGE_WRITE); > - sctx->atoms.streamout_buffers = &sctx->streamout_buffers.desc.atom; > > /* Set pipe_context functions. */ > sctx->b.b.set_constant_buffer = si_set_constant_buffer; > @@ -961,9 +982,9 @@ void si_release_all_descriptors(struct si_context *sctx) > > for (i = 0; i < SI_NUM_SHADERS; i++) { > si_release_buffer_resources(&sctx->const_buffers[i]); > + si_release_buffer_resources(&sctx->rw_buffers[i]); > si_release_sampler_views(&sctx->samplers[i].views); > } > - si_release_buffer_resources(&sctx->streamout_buffers); > } > > void si_all_descriptors_begin_new_cs(struct si_context *sctx) > @@ -972,7 +993,7 @@ void si_all_descriptors_begin_new_cs(struct si_context > *sctx) > > for (i = 0; i < SI_NUM_SHADERS; i++) { > si_buffer_resources_begin_new_cs(sctx, > &sctx->const_buffers[i]); > + si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]); > si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views); > } > - si_buffer_resources_begin_new_cs(sctx, &sctx->streamout_buffers); > } > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > b/src/gallium/drivers/radeonsi/si_pipe.h > index d63a52b..f97feb0 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.h > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > @@ -78,6 +78,8 @@ struct si_surface { > > #define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1) > > +#define SI_RW_SO 2 /* Streamout buffer descriptors after ring buffers */ > + > struct si_context { > struct r600_common_context b; > struct blitter_context *blitter; > @@ -93,8 +95,8 @@ struct si_context { > struct { > /* The order matters. */ > struct r600_atom *const_buffers[SI_NUM_SHADERS]; > + struct r600_atom *rw_buffers[SI_NUM_SHADERS]; > struct r600_atom *sampler_views[SI_NUM_SHADERS]; > - struct r600_atom *streamout_buffers; > /* Caches must be flushed after resource descriptors > are > * updated in memory. */ > struct r600_atom *cache_flush; > @@ -120,7 +122,7 @@ struct si_context { > unsigned sprite_coord_enable; > unsigned export_16bpc; > struct si_buffer_resources const_buffers[SI_NUM_SHADERS]; > - struct si_buffer_resources streamout_buffers; > + struct si_buffer_resources rw_buffers[SI_NUM_SHADERS]; > struct si_textures_info samplers[SI_NUM_SHADERS]; > struct r600_resource *border_color_table; > unsigned border_color_offset; > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 5b95c11..54270cd 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -319,7 +319,8 @@ static LLVMValueRef fetch_input_gs( > 4); > > /* Load the ESGS ring resource descriptor */ > - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, > SI_PARAM_CONST); > + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, > + SI_PARAM_RW_BUFFERS); > t_list = build_indexed_load(si_shader_ctx, t_list_ptr, > lp_build_const_int32(gallivm, > SI_RING_ESGS)); > > @@ -1202,7 +1203,8 @@ static void si_llvm_emit_es_epilogue(struct > lp_build_tgsi_context * bld_base) > } > > /* Load the ESGS ring resource descriptor */ > - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, > SI_PARAM_CONST); > + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, > + SI_PARAM_RW_BUFFERS); > t_list = build_indexed_load(si_shader_ctx, t_list_ptr, > lp_build_const_int32(gallivm, > SI_RING_ESGS)); > > @@ -1910,7 +1912,8 @@ static void si_llvm_emit_vertex( > int i; > > /* Load the GSVS ring resource descriptor */ > - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, > SI_PARAM_CONST); > + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, > + SI_PARAM_RW_BUFFERS); > t_list = build_indexed_load(si_shader_ctx, t_list_ptr, > lp_build_const_int32(gallivm, > SI_RING_GSVS)); > > @@ -2038,7 +2041,7 @@ static void create_function(struct si_shader_context > *si_shader_ctx) > struct lp_build_tgsi_context *bld_base = > &si_shader_ctx->radeon_bld.soa.bld_base; > struct gallivm_state *gallivm = bld_base->base.gallivm; > struct si_pipe_shader *shader = si_shader_ctx->shader; > - LLVMTypeRef params[21], f32, i8, i32, v2i32, v3i32; > + LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32; > unsigned i, last_sgpr, num_params; > > i8 = LLVMInt8TypeInContext(gallivm->context); > @@ -2049,6 +2052,8 @@ static void create_function(struct si_shader_context > *si_shader_ctx) > > params[SI_PARAM_CONST] = LLVMPointerType( > LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), > CONST_ADDR_SPACE); > + params[SI_PARAM_RW_BUFFERS] = params[SI_PARAM_CONST]; > + > /* We assume at most 16 textures per program at the moment. > * This need probably need to be changed to support bindless textures > */ > params[SI_PARAM_SAMPLER] = LLVMPointerType( > @@ -2059,7 +2064,6 @@ static void create_function(struct si_shader_context > *si_shader_ctx) > switch (si_shader_ctx->type) { > case TGSI_PROCESSOR_VERTEX: > params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST]; > - params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST]; > params[SI_PARAM_START_INSTANCE] = i32; > num_params = SI_PARAM_START_INSTANCE+1; > if (shader->key.vs.as_es) { > @@ -2257,12 +2261,13 @@ static void preload_streamout_buffers(struct > si_shader_context *si_shader_ctx) > return; > > LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, > - SI_PARAM_SO_BUFFER); > + SI_PARAM_RW_BUFFERS); > > /* Load the resources, we rely on the code sinking to do the rest */ > for (i = 0; i < 4; ++i) { > if (si_shader_ctx->shader->selector->so.stride[i]) { > - LLVMValueRef offset = lp_build_const_int32(gallivm, > i); > + LLVMValueRef offset = lp_build_const_int32(gallivm, > + SI_RW_SO + > i); > > si_shader_ctx->so_buffers[i] = > build_indexed_load(si_shader_ctx, buf_ptr, offset); > } > @@ -2371,7 +2376,8 @@ static int si_generate_gs_copy_shader(struct si_context > *sctx, > preload_streamout_buffers(si_shader_ctx); > > /* Load the GSVS ring resource descriptor */ > - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, > SI_PARAM_CONST); > + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, > + SI_PARAM_RW_BUFFERS); > t_list = build_indexed_load(si_shader_ctx, t_list_ptr, > lp_build_const_int32(gallivm, > SI_RING_GSVS)); > > diff --git a/src/gallium/drivers/radeonsi/si_shader.h > b/src/gallium/drivers/radeonsi/si_shader.h > index 63c19ec..d667baf 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.h > +++ b/src/gallium/drivers/radeonsi/si_shader.h > @@ -34,23 +34,23 @@ > #define SI_SGPR_CONST 0 > #define SI_SGPR_SAMPLER 2 > #define SI_SGPR_RESOURCE 4 > -#define SI_SGPR_VERTEX_BUFFER 6 /* VS only */ > -#define SI_SGPR_SO_BUFFER 8 /* VS only, stream-out */ > +#define SI_SGPR_RW_BUFFERS 6 /* rings (& stream-out, VS only) */ > +#define SI_SGPR_VERTEX_BUFFER 8 /* VS only */ > #define SI_SGPR_START_INSTANCE 10 /* VS only */ > -#define SI_SGPR_ALPHA_REF 6 /* PS only */ > +#define SI_SGPR_ALPHA_REF 8 /* PS only */ > > #define SI_VS_NUM_USER_SGPR 11 > -#define SI_GS_NUM_USER_SGPR 6 > -#define SI_PS_NUM_USER_SGPR 7 > +#define SI_GS_NUM_USER_SGPR 8 > +#define SI_PS_NUM_USER_SGPR 9 > > /* LLVM function parameter indices */ > #define SI_PARAM_CONST 0 > #define SI_PARAM_SAMPLER 1 > #define SI_PARAM_RESOURCE 2 > +#define SI_PARAM_RW_BUFFERS 3 > > /* VS only parameters */ > -#define SI_PARAM_VERTEX_BUFFER 3 > -#define SI_PARAM_SO_BUFFER 4 > +#define SI_PARAM_VERTEX_BUFFER 4 > #define SI_PARAM_START_INSTANCE 5 > /* the other VS parameters are assigned dynamically */ > > @@ -58,36 +58,38 @@ > #define SI_PARAM_ES2GS_OFFSET 6 > > /* GS only parameters */ > -#define SI_PARAM_GS2VS_OFFSET 3 > -#define SI_PARAM_GS_WAVE_ID 4 > -#define SI_PARAM_VTX0_OFFSET 5 > -#define SI_PARAM_VTX1_OFFSET 6 > -#define SI_PARAM_PRIMITIVE_ID 7 > -#define SI_PARAM_VTX2_OFFSET 8 > -#define SI_PARAM_VTX3_OFFSET 9 > -#define SI_PARAM_VTX4_OFFSET 10 > -#define SI_PARAM_VTX5_OFFSET 11 > -#define SI_PARAM_GS_INSTANCE_ID 12 > +#define SI_PARAM_GS2VS_OFFSET 4 > +#define SI_PARAM_GS_WAVE_ID 5 > +#define SI_PARAM_VTX0_OFFSET 6 > +#define SI_PARAM_VTX1_OFFSET 7 > +#define SI_PARAM_PRIMITIVE_ID 8 > +#define SI_PARAM_VTX2_OFFSET 9 > +#define SI_PARAM_VTX3_OFFSET 10 > +#define SI_PARAM_VTX4_OFFSET 11 > +#define SI_PARAM_VTX5_OFFSET 12 > +#define SI_PARAM_GS_INSTANCE_ID 13 > > /* PS only parameters */ > -#define SI_PARAM_ALPHA_REF 3 > -#define SI_PARAM_PRIM_MASK 4 > -#define SI_PARAM_PERSP_SAMPLE 5 > -#define SI_PARAM_PERSP_CENTER 6 > -#define SI_PARAM_PERSP_CENTROID 7 > -#define SI_PARAM_PERSP_PULL_MODEL 8 > -#define SI_PARAM_LINEAR_SAMPLE 9 > -#define SI_PARAM_LINEAR_CENTER 10 > -#define SI_PARAM_LINEAR_CENTROID 11 > -#define SI_PARAM_LINE_STIPPLE_TEX 12 > -#define SI_PARAM_POS_X_FLOAT 13 > -#define SI_PARAM_POS_Y_FLOAT 14 > -#define SI_PARAM_POS_Z_FLOAT 15 > -#define SI_PARAM_POS_W_FLOAT 16 > -#define SI_PARAM_FRONT_FACE 17 > -#define SI_PARAM_ANCILLARY 18 > -#define SI_PARAM_SAMPLE_COVERAGE 19 > -#define SI_PARAM_POS_FIXED_PT 20 > +#define SI_PARAM_ALPHA_REF 4 > +#define SI_PARAM_PRIM_MASK 5 > +#define SI_PARAM_PERSP_SAMPLE 6 > +#define SI_PARAM_PERSP_CENTER 7 > +#define SI_PARAM_PERSP_CENTROID 8 > +#define SI_PARAM_PERSP_PULL_MODEL 9 > +#define SI_PARAM_LINEAR_SAMPLE 10 > +#define SI_PARAM_LINEAR_CENTER 11 > +#define SI_PARAM_LINEAR_CENTROID 12 > +#define SI_PARAM_LINE_STIPPLE_TEX 13 > +#define SI_PARAM_POS_X_FLOAT 14 > +#define SI_PARAM_POS_Y_FLOAT 15 > +#define SI_PARAM_POS_Z_FLOAT 16 > +#define SI_PARAM_POS_W_FLOAT 17 > +#define SI_PARAM_FRONT_FACE 18 > +#define SI_PARAM_ANCILLARY 19 > +#define SI_PARAM_SAMPLE_COVERAGE 20 > +#define SI_PARAM_POS_FIXED_PT 21 > + > +#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) > > struct si_shader_input { > unsigned name; > diff --git a/src/gallium/drivers/radeonsi/si_state.h > b/src/gallium/drivers/radeonsi/si_state.h > index f7082f5..6922c88 100644 > --- a/src/gallium/drivers/radeonsi/si_state.h > +++ b/src/gallium/drivers/radeonsi/si_state.h > @@ -115,10 +115,10 @@ union si_state { > #define NUM_SAMPLER_STATES NUM_TEX_UNITS > > #define NUM_PIPE_CONST_BUFFERS 16 > -#define SI_RING_ESGS 17 > -#define SI_RING_GSVS 18 > -#define NUM_CONST_BUFFERS (SI_RING_GSVS + 1) > +#define NUM_CONST_BUFFERS (NUM_PIPE_CONST_BUFFERS + 1) > > +#define SI_RING_ESGS 0 > +#define SI_RING_GSVS 1 > > /* This represents resource descriptors in memory, such as buffer resources, > * image resources, and sampler states. > -- > 1.8.5.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev