Timothy Arceri <t_arc...@yahoo.com.au> writes: > This is more optimal as it means we no longer have to upload the same set > of Atomic Buffer Object surfaces to all stages in the program. > > This also fixes a bug where since commit c0cd5b var->data.binding was > being used as a replacement for atomic buffer index, but they don't have > to be the same value they just happened to end up the same when binding is 0. > > Cc: Francisco Jerez <curroje...@riseup.net> > Cc: Ilia Mirkin <imir...@alum.mit.edu> > Cc: Alejandro Piñeiro <apinhe...@igalia.com> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90175 > --- > src/glsl/ir_uniform.h | 8 ++++- > src/glsl/link_atomics.cpp | 43 > ++++++++++++++++++++++-- > src/glsl/nir/glsl_to_nir.cpp | 2 -- > src/glsl/nir/nir.h | 4 +-- > src/glsl/nir/nir_lower_atomics.c | 25 +++++++++++--- > src/mesa/drivers/dri/i965/brw_context.h | 2 +- > src/mesa/drivers/dri/i965/brw_gs_surface_state.c | 4 +-- > src/mesa/drivers/dri/i965/brw_nir.c | 6 ++-- > src/mesa/drivers/dri/i965/brw_shader.cpp | 4 +-- > src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 4 +-- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 37 ++++++++++---------- > src/mesa/main/mtypes.h | 5 ++- > 12 files changed, 103 insertions(+), 41 deletions(-) > > diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h > index 50fe76b..2265caf 100644 > --- a/src/glsl/ir_uniform.h > +++ b/src/glsl/ir_uniform.h > @@ -71,7 +71,9 @@ struct gl_uniform_driver_storage { > > struct gl_opaque_uniform_index { > /** > - * Base opaque uniform index > + * == For types other than atomics == > + * > + * This is the base opaque uniform index > * > * If \c gl_uniform_storage::base_type is an opaque type, this > * represents its uniform index. If \c > @@ -80,6 +82,10 @@ struct gl_opaque_uniform_index { > * gl_uniform_storage::array_elements - 1, inclusive. > * > * Note that the index may be different in each shader stage. > + * > + * == For atomics == > + * > + * This is the intra-stage stage index of gl_shader::AtomicBuffers[].
AFAICT this "index" field has the same semantics for atomics and non-atomics, it's an intra-stage surface index regardless of whether it's an atomic, sampler or image, the difference is just which array it indexes into so I doubt it makes sense to split the comment like this. With that and Samuel's comment taken into account this patch is: Reviewed-by: Francisco Jerez <curroje...@riseup.net> > */ > uint8_t index; > > diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp > index 100d03c..3073de0 100644 > --- a/src/glsl/link_atomics.cpp > +++ b/src/glsl/link_atomics.cpp > @@ -167,6 +167,7 @@ link_assign_atomic_counter_resources(struct gl_context > *ctx, > struct gl_shader_program *prog) > { > unsigned num_buffers; > + unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {}; > active_atomic_buffer *abs = > find_active_atomic_counters(ctx, prog, &num_buffers); > > @@ -211,13 +212,49 @@ link_assign_atomic_counter_resources(struct gl_context > *ctx, > } > > /* Assign stage-specific fields. */ > - for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) > - mab.StageReferences[j] = > - (ab.stage_references[j] ? GL_TRUE : GL_FALSE); > + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { > + if (ab.stage_references[j]) { > + mab.StageReferences[j] = GL_TRUE; > + num_atomic_buffers[j]++; > + } else { > + mab.StageReferences[j] = GL_FALSE; > + } > + } > > i++; > } > > + /* Store a list pointers to atomic buffers per stage and store the index > + * to this intra-stage buffer list along with the uniform. > + */ > + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { > + if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) { > + prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j]; > + prog->_LinkedShaders[j]->AtomicBuffers = > + rzalloc_array(prog, gl_active_atomic_buffer *, > + num_atomic_buffers[j]); > + > + unsigned intra_stage_idx = 0; > + for (unsigned i = 0; i < num_buffers; i++) { > + struct gl_active_atomic_buffer *atomic_buffer = > + &prog->AtomicBuffers[i]; > + if (atomic_buffer->StageReferences[j]) { > + prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] = > + atomic_buffer; > + > + for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) { > + > prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index = > + intra_stage_idx; > + > prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active = > + true; > + } > + > + intra_stage_idx++; > + } > + } > + } > + } > + > delete [] abs; > assert(i == num_buffers); > } > diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp > index 4dd6287..0e12a5b 100644 > --- a/src/glsl/nir/glsl_to_nir.cpp > +++ b/src/glsl/nir/glsl_to_nir.cpp > @@ -357,8 +357,6 @@ nir_visitor::visit(ir_variable *ir) > > var->data.index = ir->data.index; > var->data.binding = ir->data.binding; > - /* XXX Get rid of buffer_index */ > - var->data.atomic.buffer_index = ir->data.binding; > var->data.atomic.offset = ir->data.atomic.offset; > var->data.image.read_only = ir->data.image_read_only; > var->data.image.write_only = ir->data.image_write_only; > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index 268fbc2..aa3224d 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -308,7 +308,6 @@ typedef struct { > * Location an atomic counter is stored at. > */ > struct { > - unsigned buffer_index; > unsigned offset; > } atomic; > > @@ -1920,7 +1919,8 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned > ucp_enables); > > void nir_lower_two_sided_color(nir_shader *shader); > > -void nir_lower_atomics(nir_shader *shader); > +void nir_lower_atomics(nir_shader *shader, > + const struct gl_shader_program *shader_program); > void nir_lower_to_source_mods(nir_shader *shader); > > bool nir_lower_gs_intrinsics(nir_shader *shader); > diff --git a/src/glsl/nir/nir_lower_atomics.c > b/src/glsl/nir/nir_lower_atomics.c > index 6f9ecc0..ea237b5 100644 > --- a/src/glsl/nir/nir_lower_atomics.c > +++ b/src/glsl/nir/nir_lower_atomics.c > @@ -25,17 +25,24 @@ > * > */ > > +#include "ir_uniform.h" > #include "nir.h" > #include "main/config.h" > #include <assert.h> > > +typedef struct { > + const struct gl_shader_program *shader_program; > + nir_shader *shader; > +} lower_atomic_state; > + > /* > * replace atomic counter intrinsics that use a variable with intrinsics > * that directly store the buffer index and byte offset > */ > > static void > -lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) > +lower_instr(nir_intrinsic_instr *instr, > + lower_atomic_state *state) > { > nir_intrinsic_op op; > switch (instr->intrinsic) { > @@ -60,10 +67,11 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl > *impl) > return; /* atomics passed as function arguments can't be lowered */ > > void *mem_ctx = ralloc_parent(instr); > + unsigned uniform_loc = instr->variables[0]->var->data.location; > > nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); > new_instr->const_index[0] = > - (int) instr->variables[0]->var->data.atomic.buffer_index; > + > state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index; > > nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, > 1); > offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset; > @@ -130,18 +138,25 @@ lower_block(nir_block *block, void *state) > { > nir_foreach_instr_safe(block, instr) { > if (instr->type == nir_instr_type_intrinsic) > - lower_instr(nir_instr_as_intrinsic(instr), state); > + lower_instr(nir_instr_as_intrinsic(instr), > + (lower_atomic_state *) state); > } > > return true; > } > > void > -nir_lower_atomics(nir_shader *shader) > +nir_lower_atomics(nir_shader *shader, > + const struct gl_shader_program *shader_program) > { > + lower_atomic_state state = { > + .shader = shader, > + .shader_program = shader_program, > + }; > + > nir_foreach_overload(shader, overload) { > if (overload->impl) { > - nir_foreach_block(overload->impl, lower_block, overload->impl); > + nir_foreach_block(overload->impl, lower_block, (void *) &state); > nir_metadata_preserve(overload->impl, nir_metadata_block_index | > nir_metadata_dominance); > } > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 19a5117..404c9f5 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -1797,7 +1797,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw, > struct brw_stage_prog_data *prog_data, > bool dword_pitch); > void brw_upload_abo_surfaces(struct brw_context *brw, > - struct gl_shader_program *prog, > + struct gl_shader *shader, > struct brw_stage_state *stage_state, > struct brw_stage_prog_data *prog_data); > void brw_upload_image_surfaces(struct brw_context *brw, > diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c > b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c > index 0bb3074..b9d40cb 100644 > --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c > @@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw) > > if (prog) { > /* BRW_NEW_GS_PROG_DATA */ > - brw_upload_abo_surfaces(brw, prog, &brw->gs.base, > - &brw->gs.prog_data->base.base); > + brw_upload_abo_surfaces(brw, > prog->_LinkedShaders[MESA_SHADER_GEOMETRY], > + &brw->gs.base, &brw->gs.prog_data->base.base); > } > } > > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c > b/src/mesa/drivers/dri/i965/brw_nir.c > index 12f47ad..060e497 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir.c > +++ b/src/mesa/drivers/dri/i965/brw_nir.c > @@ -158,8 +158,10 @@ brw_create_nir(struct brw_context *brw, > nir_lower_system_values(nir); > nir_validate_shader(nir); > > - nir_lower_atomics(nir); > - nir_validate_shader(nir); > + if (shader_prog) { > + nir_lower_atomics(nir, shader_prog); > + nir_validate_shader(nir); > + } > > nir_optimize(nir, is_scalar); > > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index 3960e86..baa219c 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -1389,9 +1389,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage > stage, > stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; > } > > - if (shader_prog && shader_prog->NumAtomicBuffers) { > + if (shader && shader->NumAtomicBuffers) { > stage_prog_data->binding_table.abo_start = next_binding_table_offset; > - next_binding_table_offset += shader_prog->NumAtomicBuffers; > + next_binding_table_offset += shader->NumAtomicBuffers; > } else { > stage_prog_data->binding_table.abo_start = 0xd0d0d0d0; > } > diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c > b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c > index 9bb48eb..79547a6 100644 > --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c > @@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw) > > if (prog) { > /* BRW_NEW_VS_PROG_DATA */ > - brw_upload_abo_surfaces(brw, prog, &brw->vs.base, > - &brw->vs.prog_data->base.base); > + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX], > + &brw->vs.base, &brw->vs.prog_data->base.base); > } > } > > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index c671e23..89d67b0 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = { > > void > brw_upload_abo_surfaces(struct brw_context *brw, > - struct gl_shader_program *prog, > + struct gl_shader *shader, > struct brw_stage_state *stage_state, > struct brw_stage_prog_data *prog_data) > { > @@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw, > uint32_t *surf_offsets = > &stage_state->surf_offset[prog_data->binding_table.abo_start]; > > - for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) { > - struct gl_atomic_buffer_binding *binding = > - &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding]; > - struct intel_buffer_object *intel_bo = > - intel_buffer_object(binding->BufferObject); > - drm_intel_bo *bo = intel_bufferobj_buffer( > - brw, intel_bo, binding->Offset, intel_bo->Base.Size - > binding->Offset); > - > - brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, > - binding->Offset, > BRW_SURFACEFORMAT_RAW, > - bo->size - binding->Offset, 1, > true); > - } > + if (shader && shader->NumAtomicBuffers) { > + for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) { > + struct gl_atomic_buffer_binding *binding = > + &ctx->AtomicBufferBindings[(*shader->AtomicBuffers[i]).Binding]; > + struct intel_buffer_object *intel_bo = > + intel_buffer_object(binding->BufferObject); > + drm_intel_bo *bo = intel_bufferobj_buffer( > + brw, intel_bo, binding->Offset, intel_bo->Base.Size - > binding->Offset); > + > + brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, > + binding->Offset, > BRW_SURFACEFORMAT_RAW, > + bo->size - binding->Offset, 1, > true); > + } > > - if (prog->NumAtomicBuffers) > brw->ctx.NewDriverState |= BRW_NEW_SURFACES; > + } > } > > static void > @@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw) > > if (prog) { > /* BRW_NEW_FS_PROG_DATA */ > - brw_upload_abo_surfaces(brw, prog, &brw->wm.base, > - &brw->wm.prog_data->base); > + brw_upload_abo_surfaces(brw, > prog->_LinkedShaders[MESA_SHADER_FRAGMENT], > + &brw->wm.base, &brw->wm.prog_data->base); > } > } > > @@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw) > > if (prog) { > /* BRW_NEW_CS_PROG_DATA */ > - brw_upload_abo_surfaces(brw, prog, &brw->cs.base, > - &brw->cs.prog_data->base); > + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE], > + &brw->cs.base, &brw->cs.prog_data->base); > } > } > > diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h > index 288d757..6d0cf75 100644 > --- a/src/mesa/main/mtypes.h > +++ b/src/mesa/main/mtypes.h > @@ -2392,6 +2392,9 @@ struct gl_shader > */ > GLuint NumImages; > > + struct gl_active_atomic_buffer **AtomicBuffers; > + unsigned NumAtomicBuffers; > + > /** > * Whether early fragment tests are enabled as defined by > * ARB_shader_image_load_store. > @@ -4484,7 +4487,7 @@ static inline bool > _mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx) > { > return ctx->Shader._CurrentFragmentProgram != NULL && > - ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0; > + > ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers > > 0; > } > > #ifdef __cplusplus > -- > 2.4.3
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev