I don't like having to fush, so this introduces the other workaround. Since my experience is that context register writes are pretty cheap, this should not have too much overhead.
I haven't seen any significant perf changes in benchmarks or games though. --- src/amd/vulkan/radv_cmd_buffer.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c735d201802..0ca33cc67bc 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1102,10 +1102,6 @@ radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer) { uint32_t count = cmd_buffer->state.dynamic.scissor.count; - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; - si_emit_cache_flush(cmd_buffer); - } si_write_scissors(cmd_buffer->cs, 0, count, cmd_buffer->state.dynamic.scissor.scissors, cmd_buffer->state.dynamic.viewport.viewports, @@ -1866,7 +1862,8 @@ radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool static void radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, bool indexed_draw, bool instanced_draw, bool indirect_draw, - uint32_t draw_vertex_count) + uint32_t draw_vertex_count, + bool *gfx9_context_roll) { struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; struct radv_cmd_state *state = &cmd_buffer->state; @@ -1921,6 +1918,7 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, bool indexed_draw, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, primitive_reset_index); state->last_primitive_reset_index = primitive_reset_index; + *gfx9_context_roll = true; } } } @@ -3279,6 +3277,9 @@ static void radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { + bool context_roll = cmd_buffer->state.dirty & ~RADV_CMD_DIRTY_INDEX_BUFFER; + bool scissor_emitted = cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) radv_emit_graphics_pipeline(cmd_buffer); @@ -3303,7 +3304,16 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, radv_emit_draw_registers(cmd_buffer, info->indexed, info->instance_count > 1, info->indirect, - info->indirect ? 0 : info->count); + info->indirect ? 0 : info->count, + &context_roll); + + /* VEGA10 and RAVEN need a workaround for scissor registers. Either we need to + * do a PS_APRTIAL_FLUSH before writing them, or we need to always write it if + * a context roll happens. This does the lattter. */ + if (context_roll && !scissor_emitted && + (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA10 || + cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN)) + radv_emit_scissor(cmd_buffer); } static void -- 2.15.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev