-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA256 Kenneth gave his R-b to this patch on IRC:
<Kayden> "i965/fs: push first double-based uniforms in push constant buffer" gets my R-b On 06/05/16 08:56, Samuel Iglesias Gonsálvez wrote: > When there is a mix of definitions of uniforms with 32-bit or > 64-bit data type sizes, the driver ends up doing misaligned access > to double based variables in the push constant buffer. > > To fix this, this patch pushes first all the 64-bit variables and > then the rest. Then, all the variables would be aligned to its data > type size. > > Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> > --- src/mesa/drivers/dri/i965/brw_fs.cpp | 113 > +++++++++++++++++++++++++---------- 1 file changed, 83 > insertions(+), 30 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp index 65aa3c7..7eaf1b4 > 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ > b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -39,6 +39,7 @@ #include > "brw_program.h" #include "brw_dead_control_flow.h" #include > "compiler/glsl_types.h" +#include "program/prog_parameter.h" > > using namespace brw; > > @@ -2004,6 +2005,45 @@ fs_visitor::compact_virtual_grfs() return > progress; } > > +static void +set_push_pull_constant_loc(unsigned uniform, int > &chunk_start, bool contiguous, + int > *push_constant_loc, int *pull_constant_loc, + > unsigned &num_push_constants, + unsigned > &num_pull_constants, + const unsigned > max_push_components, + const unsigned > max_chunk_size, + struct > brw_stage_prog_data *stage_prog_data) +{ + /* This is the first > live uniform in the chunk */ + if (chunk_start < 0) + > chunk_start = uniform; + + /* If this element does not need to be > contiguous with the next, we + * split at this point and > everthing between chunk_start and u forms a + * single chunk. + > */ + if (!contiguous) { + unsigned chunk_size = uniform - > chunk_start + 1; + + /* Decide whether we should push or pull > this parameter. In the + * Vulkan driver, push constants are > explicitly exposed via the API + * so we push everything. In > GL, we only push small arrays. + */ + if > (stage_prog_data->pull_param == NULL || + > (num_push_constants + chunk_size <= max_push_components && + > chunk_size <= max_chunk_size)) { + > assert(num_push_constants + chunk_size <= max_push_components); + > for (unsigned j = chunk_start; j <= uniform; j++) + > push_constant_loc[j] = num_push_constants++; + } else { + > for (unsigned j = chunk_start; j <= uniform; j++) + > pull_constant_loc[j] = num_pull_constants++; + } + + > chunk_start = -1; + } +} + /** * Assign UNIFORM file registers to > either push constants or pull constants. * @@ -2022,6 +2062,8 @@ > fs_visitor::assign_constant_locations() > > bool is_live[uniforms]; memset(is_live, 0, sizeof(is_live)); + > bool is_live_64bit[uniforms]; + memset(is_live_64bit, 0, > sizeof(is_live_64bit)); > > /* For each uniform slot, a value of true indicates that the given > slot and * the next slot must remain contiguous. This is used to > keep us from @@ -2054,14 +2096,21 @@ > fs_visitor::assign_constant_locations() for (unsigned j = > constant_nr; j < last; j++) { is_live[j] = true; contiguous[j] = > true; + if (type_sz(inst->src[i].type) == 8) { + > is_live_64bit[j] = true; + } } is_live[last] = true; > } else { if (constant_nr >= 0 && constant_nr < (int) uniforms) { > int regs_read = inst->components_read(i) * > type_sz(inst->src[i].type) / 4; - for (int j = 0; j < > regs_read; j++) + for (int j = 0; j < regs_read; j++) > { is_live[constant_nr + j] = true; + if > (type_sz(inst->src[i].type) == 8) { + > is_live_64bit[constant_nr + j] = true; + } + > } } } } @@ -2090,43 +2139,46 @@ > fs_visitor::assign_constant_locations() pull_constant_loc = > ralloc_array(mem_ctx, int, uniforms); > > int chunk_start = -1; + + + /* First push 64-bit uniforms */ for > (unsigned u = 0; u < uniforms; u++) { - push_constant_loc[u] = > -1; + if (!is_live[u] || !is_live_64bit[u]) + > continue; + pull_constant_loc[u] = -1; + push_constant_loc[u] > = -1; > > - if (!is_live[u]) - continue; + > set_push_pull_constant_loc(u, chunk_start, contiguous[u], + > push_constant_loc, pull_constant_loc, + > num_push_constants, num_pull_constants, + > max_push_components, max_chunk_size, + > stage_prog_data); > > - /* This is the first live uniform in the chunk */ - if > (chunk_start < 0) - chunk_start = u; + } > > - /* If this element does not need to be contiguous with the > next, we - * split at this point and everthing between > chunk_start and u forms a - * single chunk. - */ - > if (!contiguous[u]) { - unsigned chunk_size = u - > chunk_start + 1; + /* Then push the rest of uniforms */ + for > (unsigned u = 0; u < uniforms; u++) { + if (!is_live[u] || > is_live_64bit[u]) + continue; > > - /* Decide whether we should push or pull this parameter. > In the - * Vulkan driver, push constants are explicitly > exposed via the API - * so we push everything. In GL, we > only push small arrays. - */ - if > (stage_prog_data->pull_param == NULL || - > (num_push_constants + chunk_size <= max_push_components && - > chunk_size <= max_chunk_size)) { - > assert(num_push_constants + chunk_size <= max_push_components); - > for (unsigned j = chunk_start; j <= u; j++) - > push_constant_loc[j] = num_push_constants++; - } else { - > for (unsigned j = chunk_start; j <= u; j++) - > pull_constant_loc[j] = num_pull_constants++; - } + > pull_constant_loc[u] = -1; + push_constant_loc[u] = -1; > > - chunk_start = -1; - } + > set_push_pull_constant_loc(u, chunk_start, contiguous[u], + > push_constant_loc, pull_constant_loc, + > num_push_constants, num_pull_constants, + > max_push_components, max_chunk_size, + > stage_prog_data); } > > + /* As the uniforms are going to be reordered, take the data > from a temporal + * copy of the original param[]. + */ + > gl_constant_value **param = rzalloc_array(mem_ctx, > gl_constant_value*, + > stage_prog_data->nr_params); + memcpy(param, > stage_prog_data->param, + sizeof(gl_constant_value*) * > stage_prog_data->nr_params); stage_prog_data->nr_params = > num_push_constants; stage_prog_data->nr_pull_params = > num_pull_constants; > > @@ -2139,7 +2191,7 @@ fs_visitor::assign_constant_locations() * > having to make a copy. */ for (unsigned int i = 0; i < uniforms; > i++) { - const gl_constant_value *value = > stage_prog_data->param[i]; + const gl_constant_value *value = > param[i]; > > if (pull_constant_loc[i] != -1) { > stage_prog_data->pull_param[pull_constant_loc[i]] = value; @@ > -2147,6 +2199,7 @@ fs_visitor::assign_constant_locations() > stage_prog_data->param[push_constant_loc[i]] = value; } } + > ralloc_free(param); } > > /** > -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAEBCAAGBQJXLE5ZAAoJEH/0ujLxfcNDWx8P/ivUcJ+/lMPT/S/V7pD8Fbwc 4BocoIFrU4AwnO3Xn9FYIgG5r0XbbAeRP1Y0NRNRuwA/j7rbAoragCjLzsXflVAN vXnDw+ZoEO3COdoI4BOvN69N+wyK4Ei44oSIAenDDSVNQwwV2xav83M8oiirDE8N UVmHmiK0GYOgSAfBmvsMKoSd9YA5B3337plPommaOpNDHNaCmoHw7GCvc/zlDp2n KVXioGuYDouDfT5NcwSbDR2CW/u0UpzwHQkzA6acOE6qubYx76WrjnHgWvUvbdlQ S66Okz/Hs3yywhc2Ir7CRN1zYQqk4hpfJ5hZrn4AcWpjvuWdKYhW7ap3RnONSvbz 1I/Jr2F3++5nxRuelf0/BNAvwcnHyHolCXi16emdJHf6LrU7C9KwC6QpcH15dHvt 5+KaSM2fAl/y5uwqjvO++IHdmkW8z7FGSjCh203IFP+fQMld+vEPheLUpiL+3H9u pQVG0EuJFBZ5fzymgALqF/nCbGiGhSuwUo7Q3hX6YtMsrBzHDZ/ahFxCa0Jp+lYd 8kmGpGpfUYG5cWCaGj5dGFwP1YNtogWOgMmmqqod39pSe3ztYZs1uWFjJg2Y+FOc 8ycLTiwproIR1P9RuUPKjsD///oiWXYT+bOzhCW71fkgQL9LB02yp45iIlNjjm7y hDrNq6wZZbroS+bxX6Er =P1yR -----END PGP SIGNATURE----- _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev