On Monday, December 07, 2015 04:52:27 PM Jason Ekstrand wrote: > This commit pushes makes uniform offsets be terms of bytes starting with > nir_lower_io. They get converted to be in terms of vec4s or floats when we > cram them in the UNIFORM register file but reladdr remains in terms of > bytes all the way down to the point where we lower it to a pull constant > load. > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 4 +--- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 14 +++++++----- > src/mesa/drivers/dri/i965/brw_nir.c | 30 > +++++++++++++++++++++++--- > src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 6 ++---- > src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 12 +++++++---- > src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 3 +-- > 6 files changed, 48 insertions(+), 21 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index de5c17a..5e8acec 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -2052,11 +2052,9 @@ fs_visitor::demote_pull_constants() > > /* Generate a pull load into dst. */ > if (inst->src[i].reladdr) { > - fs_reg indirect = ibld.vgrf(BRW_REGISTER_TYPE_D); > - ibld.MUL(indirect, *inst->src[i].reladdr, brw_imm_d(4)); > VARYING_PULL_CONSTANT_LOAD(ibld, dst, > brw_imm_ud(index), > - indirect, > + *inst->src[i].reladdr, > pull_index * 4); > inst->src[i].reladdr = NULL; > inst->src[i].stride = 1; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index c34f856..e856e32 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -173,7 +173,7 @@ fs_visitor::nir_setup_uniforms() > if (dispatch_width != 8) > return; > > - uniforms = nir->num_uniforms; > + uniforms = nir->num_uniforms / 4; > > nir_foreach_variable(var, &nir->uniforms) { > /* UBO's and atomics don't take up space in the uniform file */ > @@ -181,7 +181,7 @@ fs_visitor::nir_setup_uniforms() > continue; > > if (type_size_scalar(var->type) > 0) > - param_size[var->data.driver_location] = type_size_scalar(var->type); > + param_size[var->data.driver_location / 4] = > type_size_scalar(var->type); > } > } > > @@ -1165,7 +1165,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var > *deref) > bld.MOV(tmp, get_nir_src(deref_array->indirect)); > } > > - bld.MUL(tmp, tmp, brw_imm_ud(element_size)); > + bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4)); > if (image.reladdr) > bld.ADD(*image.reladdr, *image.reladdr, tmp); > else > @@ -2300,8 +2300,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > has_indirect = true; > /* fallthrough */ > case nir_intrinsic_load_uniform: { > - fs_reg uniform_reg(UNIFORM, instr->const_index[0]); > - uniform_reg.reg_offset = instr->const_index[1]; > + /* Offsets are in bytes but they should always be multiples of 4 */ > + assert(instr->const_index[0] % 4 == 0); > + assert(instr->const_index[1] % 4 == 0); > + > + fs_reg uniform_reg(UNIFORM, instr->const_index[0] / 4); > + uniform_reg.reg_offset = instr->const_index[1] / 4; > > for (unsigned j = 0; j < instr->num_components; j++) { > fs_reg src = offset(retype(uniform_reg, dest.type), bld, j); > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c > b/src/mesa/drivers/dri/i965/brw_nir.c > index 5182bca..d624703 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir.c > +++ b/src/mesa/drivers/dri/i965/brw_nir.c > @@ -166,6 +166,32 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar) > } > } > > +static int > +type_size_scalar_bytes(const struct glsl_type *type) > +{ > + return type_size_scalar(type) * 4; > +} > + > +static int > +type_size_vec4_bytes(const struct glsl_type *type) > +{ > + return type_size_vec4(type) * 16; > +} > + > +static void > +brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) > +{ > + if (is_scalar) { > + nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, > + type_size_scalar_bytes); > + nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes); > + } else { > + nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, > + type_size_vec4_bytes); > + nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes); > + } > +} > + > #include "util/debug.h" > > static bool > @@ -295,9 +321,7 @@ brw_lower_nir(nir_shader *nir, > > OPT_V(brw_nir_lower_inputs, devinfo, is_scalar); > OPT_V(brw_nir_lower_outputs, is_scalar); > - nir_assign_var_locations(&nir->uniforms, > - &nir->num_uniforms, > - is_scalar ? type_size_scalar : type_size_vec4); > + OPT_V(brw_nir_lower_uniforms, is_scalar); > OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : > type_size_vec4); > > if (shader_prog) { > diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > index 155a9c6..0849ca4 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > @@ -34,8 +34,7 @@ brw_nir_setup_glsl_builtin_uniform(nir_variable *var, > const nir_state_slot *const slots = var->state_slots; > assert(var->state_slots != NULL); > > - unsigned uniform_index = is_scalar ? var->data.driver_location : > - var->data.driver_location * 4; > + unsigned uniform_index = var->data.driver_location / 4; > for (unsigned int i = 0; i < var->num_state_slots; i++) { > /* This state reference has already been setup by ir_to_mesa, but we'll > * get the same index back here. > @@ -81,8 +80,7 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, > nir_variable *var, > * order we'd walk the type, so walk the list of storage and find anything > * with our name, or the prefix of a component that starts with our name. > */ > - unsigned uniform_index = is_scalar ? var->data.driver_location : > - var->data.driver_location * 4; > + unsigned uniform_index = var->data.driver_location / 4; > for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { > struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > index d4eda4a..5089418 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > @@ -117,7 +117,7 @@ vec4_visitor::nir_setup_system_values() > void > vec4_visitor::nir_setup_uniforms() > { > - uniforms = nir->num_uniforms; > + uniforms = nir->num_uniforms / 16; > > nir_foreach_variable(var, &nir->uniforms) { > /* UBO's and atomics don't take up space in the uniform file */ > @@ -125,7 +125,7 @@ vec4_visitor::nir_setup_uniforms() > continue; > > if (type_size_vec4(var->type) > 0) > - uniform_size[var->data.driver_location] = type_size_vec4(var->type); > + uniform_size[var->data.driver_location / 16] = > type_size_vec4(var->type); > } > } > > @@ -677,10 +677,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr > *instr) > has_indirect = true; > /* fallthrough */ > case nir_intrinsic_load_uniform: { > + /* Offsets are in bytes but they should always be multiples of 16 */ > + assert(instr->const_index[0] % 4 == 0); > + assert(instr->const_index[1] % 4 == 0);
I think you want % 16 here, not % 4. This patch is: Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> but I don't think it should go to stable just yet.
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev