On Thu, Oct 8, 2015 at 1:34 AM, Pohjolainen, Topi <topi.pohjolai...@intel.com> wrote: > On Wed, Oct 07, 2015 at 07:11:45AM -0700, Kristian H?gsberg Kristensen wrote: >> The initial motivation for this patch was to avoid calling >> brw_cs_prog_local_id_payload_dwords() in gen7_cs_state.c from the >> compiler. This commit ends up refactoring things a bit more so as to >> split out the logic to build the local id payload to brw_fs.cpp. This >> moves the payload building closer to the compiler code that uses the >> payload layout and makes it avaiable to other users of the compiler. > ^ > > available
Thanks, fixed. >> >> Signed-off-by: Kristian Høgsberg Kristensen <k...@bitplanet.net> >> --- >> src/mesa/drivers/dri/i965/brw_context.h | 1 + >> src/mesa/drivers/dri/i965/brw_cs.h | 5 +- >> src/mesa/drivers/dri/i965/brw_fs.cpp | 68 ++++++++++++++++++++++++-- >> src/mesa/drivers/dri/i965/gen7_cs_state.c | 80 >> ++++--------------------------- >> 4 files changed, 76 insertions(+), 78 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_context.h >> b/src/mesa/drivers/dri/i965/brw_context.h >> index 0a29a69..1869f28 100644 >> --- a/src/mesa/drivers/dri/i965/brw_context.h >> +++ b/src/mesa/drivers/dri/i965/brw_context.h >> @@ -484,6 +484,7 @@ struct brw_cs_prog_data { >> unsigned simd_size; >> bool uses_barrier; >> bool uses_num_work_groups; >> + unsigned local_invocation_id_regs; >> >> struct { >> /** @{ >> diff --git a/src/mesa/drivers/dri/i965/brw_cs.h >> b/src/mesa/drivers/dri/i965/brw_cs.h >> index 0c0ed2b..c07eb6c 100644 >> --- a/src/mesa/drivers/dri/i965/brw_cs.h >> +++ b/src/mesa/drivers/dri/i965/brw_cs.h >> @@ -48,8 +48,9 @@ brw_cs_emit(struct brw_context *brw, >> struct gl_shader_program *prog, >> unsigned *final_assembly_size); >> >> -unsigned >> -brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width); >> +void >> +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, >> + void *buffer, uint32_t threads, uint32_t >> stride); >> >> #ifdef __cplusplus >> } >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp >> b/src/mesa/drivers/dri/i965/brw_fs.cpp >> index 1dee888..b4125aa 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >> @@ -4718,20 +4718,43 @@ fs_visitor::setup_vs_payload() >> payload.num_regs = 2; >> } >> >> +/** >> + * We are building the local ID push constant data using the simplest >> possible >> + * method. We simply push the local IDs directly as they should appear in >> the >> + * registers for the uvec3 gl_LocalInvocationID variable. >> + * >> + * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6 >> + * registers worth of push constant space. >> + * >> + * Note: Any updates to brw_cs_prog_local_id_payload_dwords, >> + * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup >> need >> + * to coordinated. >> + * >> + * FINISHME: There are a few easy optimizations to consider. >> + * >> + * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is >> + * no need for using push constant space for that dimension. >> + * >> + * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can >> + * easily use 16-bit words rather than 32-bit dwords in the push constant >> + * data. >> + * >> + * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for >> + * conveying the data, and thereby reduce push constant usage. >> + * >> + */ >> void >> fs_visitor::setup_cs_payload() >> { >> assert(devinfo->gen >= 7); >> + brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data; >> >> payload.num_regs = 1; >> >> if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) { >> - const unsigned local_id_dwords = >> - brw_cs_prog_local_id_payload_dwords(dispatch_width); >> - assert((local_id_dwords & 0x7) == 0); >> - const unsigned local_id_regs = local_id_dwords / 8; >> + prog_data->local_invocation_id_regs = dispatch_width * 3 / 8; > > Subtle change in functionality here. Before this was fixed to 3 without > taking the dispatch width into account. Might be worth to note in the commit > message. brw_cs_prog_local_id_payload_dwords() used to return 3 * dispatch_width, which the code above then divided by 8. Did I miss sommething? Kristian >> payload.local_invocation_id_reg = payload.num_regs; >> - payload.num_regs += local_id_regs; >> + payload.num_regs += prog_data->local_invocation_id_regs; >> } >> } >> >> @@ -5171,6 +5194,41 @@ brw_wm_fs_emit(struct brw_context *brw, >> return g.get_assembly(final_assembly_size); >> } >> >> +void >> +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data, >> + void *buffer, uint32_t threads, uint32_t >> stride) >> +{ >> + if (prog_data->local_invocation_id_regs == 0) >> + return; >> + >> + /* stride should be an integer number of registers, that is, a multiple >> of >> + * 32 bytes. */ >> + assert(stride % 32 == 0); >> + >> + unsigned x = 0, y = 0, z = 0; >> + for (unsigned t = 0; t < threads; t++) { >> + uint32_t *param = (uint32_t *) buffer + stride * t / 4; >> + >> + for (unsigned i = 0; i < prog_data->simd_size; i++) { >> + param[0 * prog_data->simd_size + i] = x; >> + param[1 * prog_data->simd_size + i] = y; >> + param[2 * prog_data->simd_size + i] = z; >> + >> + x++; >> + if (x == prog_data->local_size[0]) { >> + x = 0; >> + y++; >> + if (y == prog_data->local_size[1]) { >> + y = 0; >> + z++; >> + if (z == prog_data->local_size[2]) >> + z = 0; >> + } >> + } >> + } >> + } >> +} >> + >> fs_reg * >> fs_visitor::emit_cs_local_invocation_id_setup() >> { >> diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c >> b/src/mesa/drivers/dri/i965/gen7_cs_state.c >> index 5edc4fc..6aeb0cb 100644 >> --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c >> +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c >> @@ -70,10 +70,8 @@ brw_upload_cs_state(struct brw_context *brw) >> >> unsigned local_id_dwords = 0; >> >> - if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { >> - local_id_dwords = >> - brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size); >> - } >> + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) >> + local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; >> >> unsigned push_constant_data_size = >> (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value); >> @@ -191,63 +189,6 @@ const struct brw_tracked_state brw_cs_state = { >> >> >> /** >> - * We are building the local ID push constant data using the simplest >> possible >> - * method. We simply push the local IDs directly as they should appear in >> the >> - * registers for the uvec3 gl_LocalInvocationID variable. >> - * >> - * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6 >> - * registers worth of push constant space. >> - * >> - * Note: Any updates to brw_cs_prog_local_id_payload_dwords, >> - * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup >> need >> - * to coordinated. >> - * >> - * FINISHME: There are a few easy optimizations to consider. >> - * >> - * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is >> - * no need for using push constant space for that dimension. >> - * >> - * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can >> - * easily use 16-bit words rather than 32-bit dwords in the push constant >> - * data. >> - * >> - * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for >> - * conveying the data, and thereby reduce push constant usage. >> - * >> - */ >> -unsigned >> -brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width) >> -{ >> - return 3 * dispatch_width; >> -} >> - >> - >> -static void >> -fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, >> - void *buffer, unsigned *x, unsigned *y, unsigned *z) >> -{ >> - uint32_t *param = (uint32_t *)buffer; >> - for (unsigned i = 0; i < cs_prog_data->simd_size; i++) { >> - param[0 * cs_prog_data->simd_size + i] = *x; >> - param[1 * cs_prog_data->simd_size + i] = *y; >> - param[2 * cs_prog_data->simd_size + i] = *z; >> - >> - (*x)++; >> - if (*x == cs_prog_data->local_size[0]) { >> - *x = 0; >> - (*y)++; >> - if (*y == cs_prog_data->local_size[1]) { >> - *y = 0; >> - (*z)++; >> - if (*z == cs_prog_data->local_size[2]) >> - *z = 0; >> - } >> - } >> - } >> -} >> - >> - >> -/** >> * Creates a region containing the push constants for the CS on gen7+. >> * >> * Push constants are constant values (such as GLSL uniforms) that are >> @@ -269,10 +210,8 @@ brw_upload_cs_push_constants(struct brw_context *brw, >> (struct brw_stage_prog_data*) cs_prog_data; >> unsigned local_id_dwords = 0; >> >> - if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { >> - local_id_dwords = >> - brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size); >> - } >> + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) >> + local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; >> >> /* Updates the ParamaterValues[i] pointers for all parameters of the >> * basic type of PROGRAM_STATE_VAR. >> @@ -302,14 +241,13 @@ brw_upload_cs_push_constants(struct brw_context *brw, >> >> STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); >> >> + brw_cs_fill_local_id_payload(cs_prog_data, param, threads, >> + reg_aligned_constant_size); >> + >> /* _NEW_PROGRAM_CONSTANTS */ >> - unsigned x = 0, y = 0, z = 0; >> for (t = 0; t < threads; t++) { >> - gl_constant_value *next_param = ¶m[t * param_aligned_count]; >> - if (local_id_dwords > 0) { >> - fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, >> &z); >> - next_param += local_id_dwords; >> - } >> + gl_constant_value *next_param = >> + ¶m[t * param_aligned_count + local_id_dwords]; >> for (i = 0; i < prog_data->nr_params; i++) { >> next_param[i] = *prog_data->param[i]; >> } >> -- >> 2.4.3 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev