On Sun, May 01, 2016 at 07:42:42PM -0700, Jordan Justen wrote: > On 2016-04-29 04:29:53, Samuel Iglesias Gonsálvez wrote: > > When there is a mix of definitions of uniforms with 32-bit or 64-bit > > data type sizes, the driver ends up doing misaligned access to double > > based variables in the push constant buffer. > > > > To fix this, the driver adds padding when needed in the push constant > > buffer and takes it into account to avoid exceeding its limits and to > > update the GRF registers to access uniform variables. > > > > v2 (Iago): > > - Renamed some variables for clarity. > > - Replace the boolean array of paddings that tracked if each param was > > padded by an integer array that keeps count of accumnulated padding. > > This allows us to remove a couple of loops that had to compute that. > > - Reworked things a bit so we can get rid of the nr_paddings field in > > brw_stage_prog_data. > > - Use rzalloc_array instead or ralloc_array and memset. > > - Fixed wrong indentation. > > > > Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> > > Signed-off-by: Iago Toral Quiroga <ito...@igalia.com> > > --- > > src/mesa/drivers/dri/i965/brw_compiler.h | 8 ++++ > > src/mesa/drivers/dri/i965/brw_fs.cpp | 50 > > ++++++++++++++++++++++--- > > src/mesa/drivers/dri/i965/brw_program.c | 1 + > > src/mesa/drivers/dri/i965/brw_wm.c | 2 + > > src/mesa/drivers/dri/i965/gen6_constant_state.c | 12 ++++-- > > 5 files changed, 64 insertions(+), 9 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > > b/src/mesa/drivers/dri/i965/brw_compiler.h > > index 5807305..bc15bf3 100644 > > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > > @@ -333,6 +333,14 @@ struct brw_stage_prog_data { > > } binding_table; > > > > GLuint nr_params; /**< number of float params/constants */ > > + /** > > + * Accumulated padding (in 32-bit units) in the push constant buffer for > > + * each param. If param_padding[n] = P, it means that params 0..n > > required > > + * a total accumulated padding of P 32-bit slots. This is useful to > > compute > > + * the actual location, including padding, for each param. > > + */ > > + uint32_t *param_padding; > > + > > GLuint nr_pull_params; > > unsigned nr_image_params; > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > > b/src/mesa/drivers/dri/i965/brw_fs.cpp > > index 53f709b..5aa6ded 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > > @@ -1571,7 +1571,8 @@ fs_visitor::assign_curb_setup() > > int uniform_nr = inst->src[i].nr + inst->src[i].reg_offset; > > int constant_nr; > > if (uniform_nr >= 0 && uniform_nr < (int) uniforms) { > > - constant_nr = push_constant_loc[uniform_nr]; > > + constant_nr = push_constant_loc[uniform_nr] + > > + stage_prog_data->param_padding[uniform_nr]; > > } else { > > /* Section 5.11 of the OpenGL 4.1 spec says: > > * "Out-of-bounds reads return undefined values, which > > include > > @@ -2010,7 +2011,10 @@ fs_visitor::assign_constant_locations() > > return; > > > > bool is_live[uniforms]; > > + bool is_64bit[uniforms]; > > + > > memset(is_live, 0, sizeof(is_live)); > > + memset(is_64bit, 0, sizeof(is_64bit)); > > > > /* For each uniform slot, a value of true indicates that the given slot > > and > > * the next slot must remain contiguous. This is used to keep us from > > @@ -2047,8 +2051,11 @@ fs_visitor::assign_constant_locations() > > is_live[last] = true; > > } else { > > if (constant_nr >= 0 && constant_nr < (int) uniforms) { > > - for (int j = 0; j < inst->regs_read(i); j++) > > + for (int j = 0; j < inst->regs_read(i); j++) { > > is_live[constant_nr + j] = true; > > + if (type_sz(inst->src[i].type) == 8) > > + is_64bit[constant_nr + j] = true; > > + } > > } > > } > > } > > @@ -2072,14 +2079,18 @@ fs_visitor::assign_constant_locations() > > > > unsigned int num_push_constants = 0; > > unsigned int num_pull_constants = 0; > > + unsigned num_paddings = 0; > > + bool is_64bit_aligned = true; > > > > push_constant_loc = ralloc_array(mem_ctx, int, uniforms); > > pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); > > + stage_prog_data->param_padding = rzalloc_array(NULL, uint32_t, > > uniforms);
Related to Jordan's question, aren't we allocating "param_padding" twice for fragment shader? First in brw_codegen_wm_prog() and again here. > > > > int chunk_start = -1; > > for (unsigned u = 0; u < uniforms; u++) { > > push_constant_loc[u] = -1; > > pull_constant_loc[u] = -1; > > + stage_prog_data->param_padding[u] = num_paddings; > > > > if (!is_live[u]) > > continue; > > @@ -2094,17 +2105,44 @@ fs_visitor::assign_constant_locations() > > */ > > if (!contiguous[u]) { > > unsigned chunk_size = u - chunk_start + 1; > > + unsigned total_push_components = num_push_constants + > > num_paddings; > > + bool push_32_bits_constant = !is_64bit[u] && > > + total_push_components + chunk_size <= max_push_components; > > + /* Verify if the 64 bits constant can be saved into the push > > constant > > + * buffer. Take into account that it might need padding. > > + */ > > + bool push_64_bits_constant = is_64bit[u] && > > + (total_push_components + chunk_size + 1 + !is_64bit_aligned) <= > > + max_push_components; > > > > /* Decide whether we should push or pull this parameter. In the > > * Vulkan driver, push constants are explicitly exposed via the > > API > > * so we push everything. In GL, we only push small arrays. > > */ > > if (stage_prog_data->pull_param == NULL || > > - (num_push_constants + chunk_size <= max_push_components && > > + ((push_32_bits_constant || push_64_bits_constant) && > > chunk_size <= max_chunk_size)) { > > - assert(num_push_constants + chunk_size <= max_push_components); > > - for (unsigned j = chunk_start; j <= u; j++) > > + assert(total_push_components + chunk_size <= > > max_push_components); > > + > > + for (unsigned j = chunk_start; j <= u; j++) { > > + /* If it is a double and it is not aligned to 64 bits, add > > padding. > > + * It is already aligned, don't add padding. > > + */ > > + if (push_64_bits_constant) { > > + if (!is_64bit_aligned) { > > + num_paddings++; > > + stage_prog_data->param_padding[j] = num_paddings; > > + is_64bit_aligned = true; > > + } > > + } else { > > + /* We are pushing a 32 bits constant. Is the next push > > constant > > + * aligned 64 bits? > > + */ > > + is_64bit_aligned = !is_64bit_aligned; > > + } > > + > > push_constant_loc[j] = num_push_constants++; > > + } > > } else { > > for (unsigned j = chunk_start; j <= u; j++) > > pull_constant_loc[j] = num_pull_constants++; > > @@ -2114,7 +2152,7 @@ fs_visitor::assign_constant_locations() > > } > > } > > > > - stage_prog_data->nr_params = num_push_constants; > > + stage_prog_data->nr_params = num_push_constants + num_paddings; > > stage_prog_data->nr_pull_params = num_pull_constants; > > > > /* Up until now, the param[] array has been indexed by reg + reg_offset > > diff --git a/src/mesa/drivers/dri/i965/brw_program.c > > b/src/mesa/drivers/dri/i965/brw_program.c > > index 3112c0c..2d145cd 100644 > > --- a/src/mesa/drivers/dri/i965/brw_program.c > > +++ b/src/mesa/drivers/dri/i965/brw_program.c > > @@ -555,6 +555,7 @@ brw_stage_prog_data_free(const void *p) > > struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p; > > > > ralloc_free(prog_data->param); > > + ralloc_free(prog_data->param_padding); > > ralloc_free(prog_data->pull_param); > > ralloc_free(prog_data->image_param); > > } > > diff --git a/src/mesa/drivers/dri/i965/brw_wm.c > > b/src/mesa/drivers/dri/i965/brw_wm.c > > index dbc626c..025d484 100644 > > --- a/src/mesa/drivers/dri/i965/brw_wm.c > > +++ b/src/mesa/drivers/dri/i965/brw_wm.c > > @@ -103,6 +103,8 @@ brw_codegen_wm_prog(struct brw_context *brw, > > param_count += 2 * > > ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; > > prog_data.base.param = > > rzalloc_array(NULL, const gl_constant_value *, param_count); > > + prog_data.base.param_padding = > > + rzalloc_array(NULL, uint32_t, param_count); > > Wouldn't this need to be allocated in the various other scalar > brw_codegen_*_prog functions? > > Maybe another method might be to track the required alignment rather > that padding? Or, perhaps we could get all the 64-bit items put up > front so no padding will be required. Maybe this is something to put > on a list of 'follow-on' FP64 work to follow the first pass > implementation. > > > prog_data.base.pull_param = > > rzalloc_array(NULL, const gl_constant_value *, param_count); > > prog_data.base.image_param = > > diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c > > b/src/mesa/drivers/dri/i965/gen6_constant_state.c > > index 6c0c32b..3d0caf2 100644 > > --- a/src/mesa/drivers/dri/i965/gen6_constant_state.c > > +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c > > @@ -135,7 +135,9 @@ gen6_upload_push_constants(struct brw_context *brw, > > _mesa_load_state_parameters(ctx, prog->Parameters); > > > > gl_constant_value *param; > > - int i; > > + int i, p; > > + gl_constant_value zero; > > + zero.u = 0; > > > > param = brw_state_batch(brw, type, > > prog_data->nr_params * > > sizeof(gl_constant_value), > > @@ -149,8 +151,12 @@ gen6_upload_push_constants(struct brw_context *brw, > > * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS > > * wouldn't be set for them. > > */ > > - for (i = 0; i < prog_data->nr_params; i++) { > > - param[i] = *prog_data->param[i]; > > + for (i = 0, p = 0; i < prog_data->nr_params; i++) { > > + if (p > 0 && prog_data->param_padding && > > + prog_data->param_padding[p] > prog_data->param_padding[p - > > 1]) { > > + param[i++] = zero; > > + } > > + param[i] = *prog_data->param[p++]; > > brw_upload_cs_push_constants will need to be updated too? > > -Jordan > > > } > > > > if (0) { > > -- > > 2.5.0 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev