Timothy Arceri <t_arc...@yahoo.com.au> writes: > Cc: Francisco Jerez <curroje...@riseup.net> > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 44 > ++++++++++++++++---------- > src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 2 ++ > 2 files changed, 30 insertions(+), 16 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 0e044d0..16b5f0a 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -1037,19 +1037,27 @@ fs_visitor::get_nir_image_deref(const nir_deref_var > *deref) > { > fs_reg image(UNIFORM, deref->var->data.driver_location, > BRW_REGISTER_TYPE_UD); > - > - if (deref->deref.child) { > - const nir_deref_array *deref_array = > - nir_deref_as_array(deref->deref.child); > - assert(deref->deref.child->deref_type == nir_deref_type_array && > - deref_array->deref.child == NULL); > - const unsigned size = glsl_get_length(deref->var->type); > + fs_reg *indirect_offset = NULL;
Assign the indirect indexing temporary directly to image.reladdr so you avoid this declaration and the 'if (indirect_offset)...' conditional below. > + > + unsigned img_offset = 0; Do 'image = offset(image, bld, base * element-size)' inside the loop so you avoid this redundant temporary and the assignment at the bottom. > + const nir_deref *tail = &deref->deref; > + while (tail->child) { Looks like a for loop 'for (const nir_deref *tail = deref->deref.child; tail; tail = tail->child)'. > + const nir_deref_array *deref_array = nir_deref_as_array(tail->child); > + assert(tail->child->deref_type == nir_deref_type_array); > + tail = tail->child; > + const unsigned size = glsl_get_length(tail->type); > + const unsigned child_array_elements = tail->child != NULL ? > + glsl_get_aoa_size(tail->type) : 1; > const unsigned base = MIN2(deref_array->base_offset, size - 1); > - > - image = offset(image, bld, base * BRW_IMAGE_PARAM_SIZE); > + const unsigned aoa_size = child_array_elements * BRW_IMAGE_PARAM_SIZE; How about "element_size" instead? And wouldn't it be easier to initialize it to 'type_size_scalar(tail->type)' so you avoid the child_array_elements computation and multiplication by BRW_IMAGE_PARAM_SIZE? > + img_offset += base * aoa_size; > > if (deref_array->deref_array_type == nir_deref_array_type_indirect) { > - fs_reg *tmp = new(mem_ctx) fs_reg(vgrf(glsl_type::int_type)); > + fs_reg tmp = vgrf(glsl_type::int_type); > + if (indirect_offset == NULL) { > + indirect_offset = new(mem_ctx) fs_reg(vgrf(glsl_type::int_type)); > + bld.MOV(*indirect_offset, fs_reg(0)); > + } > > if (devinfo->gen == 7 && !devinfo->is_haswell) { > /* IVB hangs when trying to access an invalid surface index with > @@ -1060,18 +1068,22 @@ fs_visitor::get_nir_image_deref(const nir_deref_var > *deref) > * of the possible outcomes of the hang. Clamp the index to > * prevent access outside of the array bounds. > */ > - bld.emit_minmax(*tmp, retype(get_nir_src(deref_array->indirect), > - BRW_REGISTER_TYPE_UD), > + bld.emit_minmax(tmp, retype(get_nir_src(deref_array->indirect), > + BRW_REGISTER_TYPE_UD), > fs_reg(size - base - 1), BRW_CONDITIONAL_L); > } else { > - bld.MOV(*tmp, get_nir_src(deref_array->indirect)); > + bld.MOV(tmp, get_nir_src(deref_array->indirect)); > } > - > - bld.MUL(*tmp, *tmp, fs_reg(BRW_IMAGE_PARAM_SIZE)); > - image.reladdr = tmp; > + bld.MUL(tmp, tmp, fs_reg(aoa_size)); If you create the reladdr temporary down here you may be able avoid the useless zero-initialization and addition for the first AoA level, like: | if (image.reladdr) | bld.ADD(*image.reladdr, *image.reladdr, tmp); | else | image.reladdr = new(mem_ctx) fs_reg(tmp); > + bld.ADD(*indirect_offset, *indirect_offset, tmp); > } > } > > + if (indirect_offset) { > + image.reladdr = indirect_offset; > + } > + image = offset(image, bld, img_offset); > + > return image; > } > > diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > index d3326e9..87b3839 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > @@ -98,6 +98,8 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, > nir_variable *var, > if (storage->type->is_image()) { > brw_setup_image_uniform_values(stage, stage_prog_data, > uniform_index, storage); > + uniform_index += > + BRW_IMAGE_PARAM_SIZE * MAX2(storage->array_elements, 1); > } else { > gl_constant_value *components = storage->storage; > unsigned vector_count = (MAX2(storage->array_elements, 1) * > -- > 2.4.3
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev