Fixes intermittent GPU hangs on Broxton with an Intel internal test case. There are plenty of similar fragment shaders in piglit that do not use any varyings and any uniforms. According to the documentation special timing is needed between pipeline stages. Apparently we just don't hit that with piglit. Even with the failing test case one doesn't always get the hang.
Moreover, according to the error states the hang happens significantly later than the execution of the problematic shader. There are multiple render cycles (primitive submissions) in between. I've also seen error states where the ACTHD points outside the batch. Almost as if the hardware writes somewhere that gets used later on. That would also explain why piglit doesn't suffer from this - most tests kick off one render cycle and any corruption is left unseen. For clarity I chose to make the decision in the compiler only and mark it with a boolean. In principle, constant loaders could make the same decision by examing num_varying_inputs along with push constant details. Alternatively tweaking nr_params in compiler would allow GL driver to be kept as is if one did, for example: static const gl_constant_value zero = { 0 }; wm_prog_data->base.param[0] = &zero; wm_prog_data->base.nr_params = 1; This, however, doesn't work for Vulkan which would still need some logic to be added in anv_cmd_buffer_push_constants(). In the end I thought future debugging is probably easier when the explicit boolean tells about this corner case. CC: Jason Ekstrand <ja...@jlekstrand.net> CC: Eero Tamminen <eero.t.tammi...@intel.com> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_compiler.h | 7 ++++ src/intel/compiler/brw_fs.cpp | 46 +++++++++++++++++++++++++ src/intel/vulkan/anv_cmd_buffer.c | 22 ++++++++++-- src/intel/vulkan/genX_pipeline.c | 6 +++- src/mesa/drivers/dri/i965/gen6_constant_state.c | 17 +++++++-- src/mesa/drivers/dri/i965/genX_state_upload.c | 3 +- 6 files changed, 93 insertions(+), 8 deletions(-) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 6753a8daf0..8a1c8c85ac 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -622,6 +622,13 @@ struct brw_wm_prog_data { bool contains_noperspective_varying; /** + * Tell constant uplaoders, gen6_upload_push_constants() and + * anv_cmd_buffer_push_constants(), that workaround is needed. + * See gen9_ps_header_only_workaround(). + */ + bool needs_gen9_ps_header_only_workaround; + + /** * Mask of which interpolation modes are required by the fragment shader. * Used in hardware setup on gen6+. */ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index eb9b4c3890..5f4271fb59 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6159,6 +6159,48 @@ fs_visitor::run_gs() return !failed; } +/* From the SKL PRM, Volume 16, Workarounds: + * + * 0877 3D Pixel Shader Hang possible when pixel shader dispatched with + * only header phases (R0-R2) + * + * WA: Enable a non-header phase (e.g. push constant) when dispatch would + * have been header only. + * + * Additionally from the SKL PRM, Volume 2a, Command Reference, + * 3DSTATE_PS and Push Constant Enable: + * + * This field must be enabled if the sum of the PS Constant Buffer [3:0] + * Read Length fields in 3DSTATE_CONSTANT_PS is nonzero, and must be + * disabled if the sum is zero. + * + * Therefore one needs to prepare register space for minimum amount of + * constants to be uploaded. + * + * Here it is assumed that assign_curb_setup() has determined the total amount + * of constants (uniforms + ubos) and therefore it is safe to examine if the + * workaround is needed. + */ +static void +gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data, + int *first_non_payload_grf) +{ + if (wm_prog_data->num_varying_inputs) + return; + + if (wm_prog_data->base.curb_read_length) + return; + + assert(wm_prog_data->base.nr_params == 0); + + wm_prog_data->needs_gen9_ps_header_only_workaround = true; + + const unsigned wa_upload_size = DIV_ROUND_UP(1, 8); + + wm_prog_data->base.curb_read_length = wa_upload_size; + *first_non_payload_grf += wa_upload_size; +} + bool fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) { @@ -6222,6 +6264,10 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) optimize(); assign_curb_setup(); + + if (devinfo->gen >= 9) + gen9_ps_header_only_workaround(wm_prog_data, &first_non_payload_grf); + assign_urb_setup(); fixup_3src_null_dest(); diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 3b59af8f6f..07d45bd5d4 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -641,9 +641,25 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, const struct brw_stage_prog_data *prog_data = cmd_buffer->state.pipeline->shaders[stage]->prog_data; - /* If we don't actually have any push constants, bail. */ - if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) - return (struct anv_state) { .offset = 0 }; + if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) { + /* If we don't actually have any push constants, bail. */ + if (stage != MESA_SHADER_FRAGMENT) + return (struct anv_state) { .offset = 0 }; + + const struct brw_wm_prog_data *wm_prog_data = + (const struct brw_wm_prog_data *)prog_data; + if (!wm_prog_data->needs_gen9_ps_header_only_workaround) + return (struct anv_state) { .offset = 0 }; + + assert(cmd_buffer->device->info.gen >= 9); + assert(wm_prog_data->num_varying_inputs == 0); + + struct anv_state workaround_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 1 * sizeof(float), + 32 /* bottom 5 bits MBZ */); + anv_state_flush(cmd_buffer->device, workaround_state); + return workaround_state; + } struct anv_state state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index c2fa9c0ff7..42854b06b3 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1506,9 +1506,13 @@ emit_3dstate_ps(struct anv_pipeline *pipeline, ps.VectorMaskEnable = true; ps.SamplerCount = get_sampler_count(fs_bin); ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin); - ps.PushConstantEnable = wm_prog_data->base.nr_params > 0; ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE: POSOFFSET_NONE; + + ps.PushConstantEnable = + wm_prog_data->base.nr_params > 0 || + wm_prog_data->needs_gen9_ps_header_only_workaround; + #if GEN_GEN < 8 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c index 72f00d5640..4723cda778 100644 --- a/src/mesa/drivers/dri/i965/gen6_constant_state.c +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c @@ -52,9 +52,9 @@ gen6_upload_push_constants(struct brw_context *brw, const struct gen_device_info *devinfo = &brw->screen->devinfo; struct gl_context *ctx = &brw->ctx; - if (prog_data->nr_params == 0) { - stage_state->push_const_size = 0; - } else { + stage_state->push_const_size = 0; + + if (prog_data->nr_params) { /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ @@ -119,6 +119,17 @@ gen6_upload_push_constants(struct brw_context *brw, * The other shader stages all match the VS's limits. */ assert(stage_state->push_const_size <= 32); + } else if (stage_state->stage == MESA_SHADER_FRAGMENT && + devinfo->gen >= 9) { + const struct brw_wm_prog_data *wm_prog_data = + (const struct brw_wm_prog_data *)prog_data; + + if (wm_prog_data->needs_gen9_ps_header_only_workaround) { + intel_upload_space(brw, 1 * sizeof(gl_constant_value), 32, + &stage_state->push_const_bo, + &stage_state->push_const_offset); + stage_state->push_const_size = ALIGN(1, 8) / 8; + } } stage_state->push_constants_dirty = true; diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 612761601a..fdb5decf86 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -3831,7 +3831,8 @@ genX(upload_ps)(struct brw_context *brw) #endif if (prog_data->base.nr_params > 0 || - prog_data->base.ubo_ranges[0].length > 0) + prog_data->base.ubo_ranges[0].length > 0 || + prog_data->needs_gen9_ps_header_only_workaround) ps.PushConstantEnable = true; #if GEN_GEN < 8 -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev