On Mon, Jan 14, 2019 at 5:12 PM Rhys Perry <pendingchao...@gmail.com> wrote: > > I did and found small improvements in Rise of the Tomb Raider. I > measured framerates ~104.3% that of without the changes for the > Geothermal Valley scene, ~101.2% for Spine of the Mountain and ~102.3% > for Prophets Tomb.
My main question would be what the statistical significance is. e.g. did you do one run of each, did you do multiple, and what was your test setup? Just curious because I have tried the exact same thing before and could not find anything more than noise. > > I found no change with Dota 2 but I've heard it's cpu-bound. > > On Mon, 14 Jan 2019 at 16:05, Samuel Pitoiset <samuel.pitoi...@gmail.com> > wrote: > > > > Did you benchmark? > > > > On 1/14/19 5:01 PM, Rhys Perry wrote: > > > It's common in some applications to bind a new graphics pipeline without > > > ending up changing any context registers. > > > > > > This has a pipline have two command buffers: one for setting context > > > registers and one for everything else. The context register command buffer > > > is only emitted if it differs from the previous pipeline's. > > > > > > Signed-off-by: Rhys Perry <pendingchao...@gmail.com> > > > --- > > > src/amd/vulkan/radv_cmd_buffer.c | 46 +++++-- > > > src/amd/vulkan/radv_pipeline.c | 217 ++++++++++++++++--------------- > > > src/amd/vulkan/radv_private.h | 2 + > > > 3 files changed, 150 insertions(+), 115 deletions(-) > > > > > > diff --git a/src/amd/vulkan/radv_cmd_buffer.c > > > b/src/amd/vulkan/radv_cmd_buffer.c > > > index f41d6c0b3e7..59903ab64d8 100644 > > > --- a/src/amd/vulkan/radv_cmd_buffer.c > > > +++ b/src/amd/vulkan/radv_cmd_buffer.c > > > @@ -634,7 +634,7 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer > > > *cmd_buffer, > > > } > > > } > > > > > > -static void > > > +static bool > > > radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, > > > struct radv_pipeline *pipeline) > > > { > > > @@ -646,7 +646,7 @@ radv_update_multisample_state(struct radv_cmd_buffer > > > *cmd_buffer, > > > cmd_buffer->sample_positions_needed = true; > > > > > > if (old_pipeline && num_samples == > > > old_pipeline->graphics.ms.num_samples) > > > - return; > > > + return false; > > > > > > radeon_set_context_reg_seq(cmd_buffer->cs, > > > R_028BDC_PA_SC_LINE_CNTL, 2); > > > radeon_emit(cmd_buffer->cs, ms->pa_sc_line_cntl); > > > @@ -661,6 +661,8 @@ radv_update_multisample_state(struct radv_cmd_buffer > > > *cmd_buffer, > > > radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); > > > radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) > > > | EVENT_INDEX(0)); > > > } > > > + > > > + return true; > > > } > > > > > > static void > > > @@ -863,15 +865,15 @@ radv_emit_rbplus_state(struct radv_cmd_buffer > > > *cmd_buffer) > > > radeon_emit(cmd_buffer->cs, sx_blend_opt_control); > > > } > > > > > > -static void > > > +static bool > > > radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) > > > { > > > struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; > > > > > > if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline) > > > - return; > > > + return false; > > > > > > - radv_update_multisample_state(cmd_buffer, pipeline); > > > + bool context_roll = radv_update_multisample_state(cmd_buffer, > > > pipeline); > > > > > > cmd_buffer->scratch_size_needed = > > > MAX2(cmd_buffer->scratch_size_needed, > > > @@ -884,6 +886,15 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer > > > *cmd_buffer) > > > > > > radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, > > > pipeline->cs.cdw); > > > > > > + if (!cmd_buffer->state.emitted_pipeline || > > > + cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != > > > pipeline->ctx_cs.cdw || > > > + cmd_buffer->state.emitted_pipeline->ctx_cs_hash != > > > pipeline->ctx_cs_hash || > > > + memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf, > > > + pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw * 4)) { > > > + radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, > > > pipeline->ctx_cs.cdw); > > > + context_roll = true; > > > + } > > > + > > > for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { > > > if (!pipeline->shaders[i]) > > > continue; > > > @@ -902,6 +913,8 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer > > > *cmd_buffer) > > > cmd_buffer->state.emitted_pipeline = pipeline; > > > > > > cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE; > > > + > > > + return context_roll; > > > } > > > > > > static void > > > @@ -2859,6 +2872,8 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer > > > *cmd_buffer) > > > if (!pipeline || pipeline == > > > cmd_buffer->state.emitted_compute_pipeline) > > > return; > > > > > > + assert(!pipeline->ctx_cs.cdw); > > > + > > > cmd_buffer->state.emitted_compute_pipeline = pipeline; > > > > > > radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, > > > pipeline->cs.cdw); > > > @@ -3609,30 +3624,30 @@ radv_emit_draw_packets(struct radv_cmd_buffer > > > *cmd_buffer, > > > * any context registers. > > > */ > > > static bool radv_need_late_scissor_emission(struct radv_cmd_buffer > > > *cmd_buffer, > > > - bool indexed_draw) > > > + bool indexed_draw, > > > + bool pipeline_context_roll) > > > { > > > struct radv_cmd_state *state = &cmd_buffer->state; > > > > > > if (!cmd_buffer->device->physical_device->has_scissor_bug) > > > return false; > > > > > > + if (pipeline_context_roll) > > > + return true; > > > + > > > uint32_t used_states = > > > cmd_buffer->state.pipeline->graphics.needed_dynamic_state | > > > ~RADV_CMD_DIRTY_DYNAMIC_ALL; > > > > > > /* Index, vertex and streamout buffers don't change context regs, > > > and > > > - * pipeline is handled later. > > > + * pipeline is already handled. > > > */ > > > used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | > > > RADV_CMD_DIRTY_VERTEX_BUFFER | > > > RADV_CMD_DIRTY_STREAMOUT_BUFFER | > > > RADV_CMD_DIRTY_PIPELINE); > > > > > > - /* Assume all state changes except these two can imply context > > > rolls. */ > > > if (cmd_buffer->state.dirty & used_states) > > > return true; > > > > > > - if (cmd_buffer->state.emitted_pipeline != > > > cmd_buffer->state.pipeline) > > > - return true; > > > - > > > if (indexed_draw && state->pipeline->graphics.prim_restart_enable && > > > (state->index_type ? 0xffffffffu : 0xffffu) != > > > state->last_primitive_reset_index) > > > return true; > > > @@ -3644,14 +3659,19 @@ static void > > > radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, > > > const struct radv_draw_info *info) > > > { > > > - bool late_scissor_emission = > > > radv_need_late_scissor_emission(cmd_buffer, info->indexed); > > > + bool late_scissor_emission; > > > + bool pipeline_context_roll = false; > > > > > > if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) || > > > cmd_buffer->state.emitted_pipeline != > > > cmd_buffer->state.pipeline) > > > radv_emit_rbplus_state(cmd_buffer); > > > > > > if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) > > > - radv_emit_graphics_pipeline(cmd_buffer); > > > + pipeline_context_roll = > > > radv_emit_graphics_pipeline(cmd_buffer); > > > + > > > + late_scissor_emission = > > > + radv_need_late_scissor_emission(cmd_buffer, info->indexed, > > > + pipeline_context_roll); > > > > > > if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) > > > radv_emit_framebuffer_state(cmd_buffer); > > > diff --git a/src/amd/vulkan/radv_pipeline.c > > > b/src/amd/vulkan/radv_pipeline.c > > > index 9d5da43532f..fba77584013 100644 > > > --- a/src/amd/vulkan/radv_pipeline.c > > > +++ b/src/amd/vulkan/radv_pipeline.c > > > @@ -2525,7 +2525,7 @@ radv_compute_bin_size(struct radv_pipeline > > > *pipeline, const VkGraphicsPipelineCr > > > } > > > > > > static void > > > -radv_pipeline_generate_binning_state(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs, > > > struct radv_pipeline *pipeline, > > > const VkGraphicsPipelineCreateInfo > > > *pCreateInfo) > > > { > > > @@ -2575,15 +2575,15 @@ radv_pipeline_generate_binning_state(struct > > > radeon_cmdbuf *cs, > > > S_028C44_OPTIMAL_BIN_SELECTION(1); > > > } > > > > > > - radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0, > > > + radeon_set_context_reg(ctx_cs, R_028C44_PA_SC_BINNER_CNTL_0, > > > pa_sc_binner_cntl_0); > > > - radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, > > > + radeon_set_context_reg(ctx_cs, R_028060_DB_DFSM_CONTROL, > > > db_dfsm_control); > > > } > > > > > > > > > static void > > > -radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, > > > struct radv_pipeline > > > *pipeline, > > > const > > > VkGraphicsPipelineCreateInfo *pCreateInfo, > > > const struct > > > radv_graphics_pipeline_create_info *extra) > > > @@ -2656,35 +2656,35 @@ radv_pipeline_generate_depth_stencil_state(struct > > > radeon_cmdbuf *cs, > > > db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1); > > > } > > > > > > - radeon_set_context_reg(cs, R_028800_DB_DEPTH_CONTROL, > > > db_depth_control); > > > - radeon_set_context_reg(cs, R_02842C_DB_STENCIL_CONTROL, > > > db_stencil_control); > > > + radeon_set_context_reg(ctx_cs, R_028800_DB_DEPTH_CONTROL, > > > db_depth_control); > > > + radeon_set_context_reg(ctx_cs, R_02842C_DB_STENCIL_CONTROL, > > > db_stencil_control); > > > > > > - radeon_set_context_reg(cs, R_028000_DB_RENDER_CONTROL, > > > db_render_control); > > > - radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, > > > db_render_override); > > > - radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, > > > db_render_override2); > > > + radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, > > > db_render_control); > > > + radeon_set_context_reg(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, > > > db_render_override); > > > + radeon_set_context_reg(ctx_cs, R_028010_DB_RENDER_OVERRIDE2, > > > db_render_override2); > > > } > > > > > > static void > > > -radv_pipeline_generate_blend_state(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs, > > > struct radv_pipeline *pipeline, > > > const struct radv_blend_state *blend) > > > { > > > - radeon_set_context_reg_seq(cs, R_028780_CB_BLEND0_CONTROL, 8); > > > - radeon_emit_array(cs, blend->cb_blend_control, > > > + radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8); > > > + radeon_emit_array(ctx_cs, blend->cb_blend_control, > > > 8); > > > - radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, > > > blend->cb_color_control); > > > - radeon_set_context_reg(cs, R_028B70_DB_ALPHA_TO_MASK, > > > blend->db_alpha_to_mask); > > > + radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, > > > blend->cb_color_control); > > > + radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, > > > blend->db_alpha_to_mask); > > > > > > if (pipeline->device->physical_device->has_rbplus) { > > > > > > - radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, > > > 8); > > > - radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8); > > > + radeon_set_context_reg_seq(ctx_cs, > > > R_028760_SX_MRT0_BLEND_OPT, 8); > > > + radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8); > > > } > > > > > > - radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, > > > blend->spi_shader_col_format); > > > + radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, > > > blend->spi_shader_col_format); > > > > > > - radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, > > > blend->cb_target_mask); > > > - radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, > > > blend->cb_shader_mask); > > > + radeon_set_context_reg(ctx_cs, R_028238_CB_TARGET_MASK, > > > blend->cb_target_mask); > > > + radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, > > > blend->cb_shader_mask); > > > > > > pipeline->graphics.col_format = blend->spi_shader_col_format; > > > pipeline->graphics.cb_target_mask = blend->cb_target_mask; > > > @@ -2702,7 +2702,7 @@ radv_get_conservative_raster_mode(const > > > VkPipelineRasterizationStateCreateInfo * > > > } > > > > > > static void > > > -radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs, > > > struct radv_pipeline *pipeline, > > > const VkGraphicsPipelineCreateInfo > > > *pCreateInfo) > > > { > > > @@ -2711,14 +2711,14 @@ radv_pipeline_generate_raster_state(struct > > > radeon_cmdbuf *cs, > > > radv_get_conservative_raster_mode(vkraster); > > > uint32_t pa_sc_conservative_rast = > > > S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1); > > > > > > - radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, > > > + radeon_set_context_reg(ctx_cs, R_028810_PA_CL_CLIP_CNTL, > > > S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan > > > uses DX conventions. > > > > > > S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) | > > > > > > S_028810_ZCLIP_FAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) | > > > > > > S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) > > > | > > > S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); > > > > > > - radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0, > > > + radeon_set_context_reg(ctx_cs, R_0286D4_SPI_INTERP_CONTROL_0, > > > S_0286D4_FLAT_SHADE_ENA(1) | > > > S_0286D4_PNT_SPRITE_ENA(1) | > > > > > > S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | > > > @@ -2727,12 +2727,12 @@ radv_pipeline_generate_raster_state(struct > > > radeon_cmdbuf *cs, > > > > > > S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | > > > S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is > > > top to bottom - 1.0 at bottom */ > > > > > > - radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL, > > > + radeon_set_context_reg(ctx_cs, R_028BE4_PA_SU_VTX_CNTL, > > > S_028BE4_PIX_CENTER(1) | // TODO verify > > > > > > S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) | > > > > > > S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); > > > > > > - radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, > > > + radeon_set_context_reg(ctx_cs, R_028814_PA_SU_SC_MODE_CNTL, > > > S_028814_FACE(vkraster->frontFace) | > > > S_028814_CULL_FRONT(!!(vkraster->cullMode & > > > VK_CULL_MODE_FRONT_BIT)) | > > > S_028814_CULL_BACK(!!(vkraster->cullMode & > > > VK_CULL_MODE_BACK_BIT)) | > > > @@ -2773,37 +2773,37 @@ radv_pipeline_generate_raster_state(struct > > > radeon_cmdbuf *cs, > > > } > > > } > > > > > > - radeon_set_context_reg(cs, > > > R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, > > > + radeon_set_context_reg(ctx_cs, > > > R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, > > > pa_sc_conservative_rast); > > > } > > > > > > > > > static void > > > -radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs, > > > struct radv_pipeline *pipeline) > > > { > > > struct radv_multisample_state *ms = &pipeline->graphics.ms; > > > > > > - radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); > > > - radeon_emit(cs, ms->pa_sc_aa_mask[0]); > > > - radeon_emit(cs, ms->pa_sc_aa_mask[1]); > > > + radeon_set_context_reg_seq(ctx_cs, > > > R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); > > > + radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]); > > > + radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]); > > > > > > - radeon_set_context_reg(cs, R_028804_DB_EQAA, ms->db_eqaa); > > > - radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, > > > ms->pa_sc_mode_cntl_1); > > > + radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa); > > > + radeon_set_context_reg(ctx_cs, R_028A4C_PA_SC_MODE_CNTL_1, > > > ms->pa_sc_mode_cntl_1); > > > > > > /* The exclusion bits can be set to improve rasterization efficiency > > > * if no sample lies on the pixel boundary (-8 sample offset). It's > > > * currently always TRUE because the driver doesn't support 16 > > > samples. > > > */ > > > bool exclusion = > > > pipeline->device->physical_device->rad_info.chip_class >= CIK; > > > - radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, > > > + radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, > > > S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | > > > S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); > > > } > > > > > > static void > > > -radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *cs, > > > - const struct radv_pipeline *pipeline) > > > +radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs, > > > + struct radv_pipeline *pipeline) > > > { > > > const struct radv_vs_output_info *outinfo = > > > get_vs_output_info(pipeline); > > > > > > @@ -2821,12 +2821,13 @@ radv_pipeline_generate_vgt_gs_mode(struct > > > radeon_cmdbuf *cs, > > > vgt_primitiveid_en = true; > > > } > > > > > > - radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, > > > vgt_primitiveid_en); > > > - radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, vgt_gs_mode); > > > + radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, > > > vgt_primitiveid_en); > > > + radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode); > > > } > > > > > > static void > > > -radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, > > > + struct radeon_cmdbuf *cs, > > > struct radv_pipeline *pipeline, > > > struct radv_shader_variant *shader) > > > { > > > @@ -2847,10 +2848,10 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf > > > *cs, > > > outinfo->writes_layer || > > > outinfo->writes_viewport_index; > > > > > > - radeon_set_context_reg(cs, R_0286C4_SPI_VS_OUT_CONFIG, > > > + radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, > > > S_0286C4_VS_EXPORT_COUNT(MAX2(1, > > > outinfo->param_exports) - 1)); > > > > > > - radeon_set_context_reg(cs, R_02870C_SPI_SHADER_POS_FORMAT, > > > + radeon_set_context_reg(ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT, > > > > > > S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | > > > > > > S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? > > > > > > V_02870C_SPI_SHADER_4COMP : > > > @@ -2862,13 +2863,13 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf > > > *cs, > > > > > > V_02870C_SPI_SHADER_4COMP : > > > > > > V_02870C_SPI_SHADER_NONE)); > > > > > > - radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL, > > > + radeon_set_context_reg(ctx_cs, R_028818_PA_CL_VTE_CNTL, > > > S_028818_VTX_W0_FMT(1) | > > > S_028818_VPORT_X_SCALE_ENA(1) | > > > S_028818_VPORT_X_OFFSET_ENA(1) | > > > S_028818_VPORT_Y_SCALE_ENA(1) | > > > S_028818_VPORT_Y_OFFSET_ENA(1) | > > > S_028818_VPORT_Z_SCALE_ENA(1) | > > > S_028818_VPORT_Z_OFFSET_ENA(1)); > > > > > > - radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, > > > + radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL, > > > > > > S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | > > > > > > S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | > > > > > > S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | > > > @@ -2880,7 +2881,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf > > > *cs, > > > clip_dist_mask); > > > > > > if (pipeline->device->physical_device->rad_info.chip_class <= VI) > > > - radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, > > > + radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, > > > outinfo->writes_viewport_index); > > > } > > > > > > @@ -2948,7 +2949,8 @@ radv_pipeline_generate_hw_hs(struct radeon_cmdbuf > > > *cs, > > > } > > > > > > static void > > > -radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs, > > > + struct radeon_cmdbuf *cs, > > > struct radv_pipeline *pipeline, > > > const struct radv_tessellation_state > > > *tess) > > > { > > > @@ -2964,11 +2966,12 @@ radv_pipeline_generate_vertex_shader(struct > > > radeon_cmdbuf *cs, > > > else if (vs->info.vs.as_es) > > > radv_pipeline_generate_hw_es(cs, pipeline, vs); > > > else > > > - radv_pipeline_generate_hw_vs(cs, pipeline, vs); > > > + radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, vs); > > > } > > > > > > static void > > > -radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, > > > + struct radeon_cmdbuf *cs, > > > struct radv_pipeline *pipeline, > > > const struct radv_tessellation_state > > > *tess) > > > { > > > @@ -2984,24 +2987,25 @@ radv_pipeline_generate_tess_shaders(struct > > > radeon_cmdbuf *cs, > > > if (tes->info.tes.as_es) > > > radv_pipeline_generate_hw_es(cs, pipeline, tes); > > > else > > > - radv_pipeline_generate_hw_vs(cs, pipeline, tes); > > > + radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, > > > tes); > > > } > > > > > > radv_pipeline_generate_hw_hs(cs, pipeline, tcs, tess); > > > > > > - radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM, > > > + radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM, > > > tess->tf_param); > > > > > > if (pipeline->device->physical_device->rad_info.chip_class >= CIK) > > > - radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, > > > + radeon_set_context_reg_idx(ctx_cs, > > > R_028B58_VGT_LS_HS_CONFIG, 2, > > > tess->ls_hs_config); > > > else > > > - radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, > > > + radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, > > > tess->ls_hs_config); > > > } > > > > > > static void > > > -radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, > > > + struct radeon_cmdbuf *cs, > > > struct radv_pipeline *pipeline, > > > const struct radv_gs_state *gs_state) > > > { > > > @@ -3022,32 +3026,32 @@ radv_pipeline_generate_geometry_shader(struct > > > radeon_cmdbuf *cs, > > > > > > offset = num_components[0] * gs_max_out_vertices; > > > > > > - radeon_set_context_reg_seq(cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); > > > - radeon_emit(cs, offset); > > > + radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, > > > 3); > > > + radeon_emit(ctx_cs, offset); > > > if (max_stream >= 1) > > > offset += num_components[1] * gs_max_out_vertices; > > > - radeon_emit(cs, offset); > > > + radeon_emit(ctx_cs, offset); > > > if (max_stream >= 2) > > > offset += num_components[2] * gs_max_out_vertices; > > > - radeon_emit(cs, offset); > > > + radeon_emit(ctx_cs, offset); > > > if (max_stream >= 3) > > > offset += num_components[3] * gs_max_out_vertices; > > > - radeon_set_context_reg(cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset); > > > + radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, > > > offset); > > > > > > - radeon_set_context_reg(cs, R_028B38_VGT_GS_MAX_VERT_OUT, > > > gs->info.gs.vertices_out); > > > + radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, > > > gs->info.gs.vertices_out); > > > > > > - radeon_set_context_reg_seq(cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); > > > - radeon_emit(cs, num_components[0]); > > > - radeon_emit(cs, (max_stream >= 1) ? num_components[1] : 0); > > > - radeon_emit(cs, (max_stream >= 2) ? num_components[2] : 0); > > > - radeon_emit(cs, (max_stream >= 3) ? num_components[3] : 0); > > > + radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, > > > 4); > > > + radeon_emit(ctx_cs, num_components[0]); > > > + radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0); > > > + radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0); > > > + radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0); > > > > > > uint32_t gs_num_invocations = gs->info.gs.invocations; > > > - radeon_set_context_reg(cs, R_028B90_VGT_GS_INSTANCE_CNT, > > > + radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, > > > S_028B90_CNT(MIN2(gs_num_invocations, 127)) | > > > S_028B90_ENABLE(gs_num_invocations > 0)); > > > > > > - radeon_set_context_reg(cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, > > > + radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, > > > gs_state->vgt_esgs_ring_itemsize); > > > > > > va = radv_buffer_get_va(gs->bo) + gs->bo_offset; > > > @@ -3061,8 +3065,8 @@ radv_pipeline_generate_geometry_shader(struct > > > radeon_cmdbuf *cs, > > > radeon_emit(cs, gs->rsrc1); > > > radeon_emit(cs, gs->rsrc2 | > > > S_00B22C_LDS_SIZE(gs_state->lds_size)); > > > > > > - radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, > > > gs_state->vgt_gs_onchip_cntl); > > > - radeon_set_context_reg(cs, > > > R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, > > > gs_state->vgt_gs_max_prims_per_subgroup); > > > + radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, > > > gs_state->vgt_gs_onchip_cntl); > > > + radeon_set_context_reg(ctx_cs, > > > R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, > > > gs_state->vgt_gs_max_prims_per_subgroup); > > > } else { > > > radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); > > > radeon_emit(cs, va >> 8); > > > @@ -3071,7 +3075,7 @@ radv_pipeline_generate_geometry_shader(struct > > > radeon_cmdbuf *cs, > > > radeon_emit(cs, gs->rsrc2); > > > } > > > > > > - radv_pipeline_generate_hw_vs(cs, pipeline, > > > pipeline->gs_copy_shader); > > > + radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, > > > pipeline->gs_copy_shader); > > > } > > > > > > static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade) > > > @@ -3093,8 +3097,8 @@ static uint32_t offset_to_ps_input(uint32_t offset, > > > bool flat_shade) > > > } > > > > > > static void > > > -radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs, > > > - struct radv_pipeline *pipeline) > > > +radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, > > > + struct radv_pipeline *pipeline) > > > { > > > struct radv_shader_variant *ps = > > > pipeline->shaders[MESA_SHADER_FRAGMENT]; > > > const struct radv_vs_output_info *outinfo = > > > get_vs_output_info(pipeline); > > > @@ -3165,9 +3169,9 @@ radv_pipeline_generate_ps_inputs(struct > > > radeon_cmdbuf *cs, > > > } > > > > > > if (ps_offset) { > > > - radeon_set_context_reg_seq(cs, > > > R_028644_SPI_PS_INPUT_CNTL_0, ps_offset); > > > + radeon_set_context_reg_seq(ctx_cs, > > > R_028644_SPI_PS_INPUT_CNTL_0, ps_offset); > > > for (unsigned i = 0; i < ps_offset; i++) { > > > - radeon_emit(cs, ps_input_cntl[i]); > > > + radeon_emit(ctx_cs, ps_input_cntl[i]); > > > } > > > } > > > } > > > @@ -3205,7 +3209,8 @@ radv_compute_db_shader_control(const struct > > > radv_device *device, > > > } > > > > > > static void > > > -radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, > > > + struct radeon_cmdbuf *cs, > > > struct radv_pipeline *pipeline) > > > { > > > struct radv_shader_variant *ps; > > > @@ -3221,22 +3226,22 @@ radv_pipeline_generate_fragment_shader(struct > > > radeon_cmdbuf *cs, > > > radeon_emit(cs, ps->rsrc1); > > > radeon_emit(cs, ps->rsrc2); > > > > > > - radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, > > > + radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL, > > > > > > radv_compute_db_shader_control(pipeline->device, > > > pipeline, > > > ps)); > > > > > > - radeon_set_context_reg(cs, R_0286CC_SPI_PS_INPUT_ENA, > > > + radeon_set_context_reg(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA, > > > ps->config.spi_ps_input_ena); > > > > > > - radeon_set_context_reg(cs, R_0286D0_SPI_PS_INPUT_ADDR, > > > + radeon_set_context_reg(ctx_cs, R_0286D0_SPI_PS_INPUT_ADDR, > > > ps->config.spi_ps_input_addr); > > > > > > - radeon_set_context_reg(cs, R_0286D8_SPI_PS_IN_CONTROL, > > > + radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL, > > > S_0286D8_NUM_INTERP(ps->info.fs.num_interp)); > > > > > > - radeon_set_context_reg(cs, R_0286E0_SPI_BARYC_CNTL, > > > pipeline->graphics.spi_baryc_cntl); > > > + radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, > > > pipeline->graphics.spi_baryc_cntl); > > > > > > - radeon_set_context_reg(cs, R_028710_SPI_SHADER_Z_FORMAT, > > > + radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT, > > > > > > ac_get_spi_shader_z_format(ps->info.info.ps.writes_z, > > > > > > ps->info.info.ps.writes_stencil, > > > > > > ps->info.info.ps.writes_sample_mask)); > > > @@ -3249,7 +3254,7 @@ radv_pipeline_generate_fragment_shader(struct > > > radeon_cmdbuf *cs, > > > } > > > > > > static void > > > -radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *cs, > > > +radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs, > > > struct radv_pipeline *pipeline) > > > { > > > if (pipeline->device->physical_device->rad_info.family < > > > CHIP_POLARIS10) > > > @@ -3260,7 +3265,7 @@ radv_pipeline_generate_vgt_vertex_reuse(struct > > > radeon_cmdbuf *cs, > > > radv_get_shader(pipeline, > > > MESA_SHADER_TESS_EVAL)->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) { > > > vtx_reuse_depth = 14; > > > } > > > - radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, > > > + radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, > > > S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth)); > > > } > > > > > > @@ -3330,38 +3335,46 @@ radv_pipeline_generate_pm4(struct radv_pipeline > > > *pipeline, > > > const struct radv_gs_state *gs, > > > unsigned prim, unsigned gs_out) > > > { > > > - pipeline->cs.buf = malloc(4 * 256); > > > - pipeline->cs.max_dw = 256; > > > - > > > - radv_pipeline_generate_depth_stencil_state(&pipeline->cs, pipeline, > > > pCreateInfo, extra); > > > - radv_pipeline_generate_blend_state(&pipeline->cs, pipeline, blend); > > > - radv_pipeline_generate_raster_state(&pipeline->cs, pipeline, > > > pCreateInfo); > > > - radv_pipeline_generate_multisample_state(&pipeline->cs, pipeline); > > > - radv_pipeline_generate_vgt_gs_mode(&pipeline->cs, pipeline); > > > - radv_pipeline_generate_vertex_shader(&pipeline->cs, pipeline, tess); > > > - radv_pipeline_generate_tess_shaders(&pipeline->cs, pipeline, tess); > > > - radv_pipeline_generate_geometry_shader(&pipeline->cs, pipeline, gs); > > > - radv_pipeline_generate_fragment_shader(&pipeline->cs, pipeline); > > > - radv_pipeline_generate_ps_inputs(&pipeline->cs, pipeline); > > > - radv_pipeline_generate_vgt_vertex_reuse(&pipeline->cs, pipeline); > > > - radv_pipeline_generate_binning_state(&pipeline->cs, pipeline, > > > pCreateInfo); > > > - > > > - radeon_set_context_reg(&pipeline->cs, R_0286E8_SPI_TMPRING_SIZE, > > > + struct radeon_cmdbuf *ctx_cs = &pipeline->ctx_cs; > > > + struct radeon_cmdbuf *cs = &pipeline->cs; > > > + > > > + cs->max_dw = 64; > > > + ctx_cs->max_dw = 256; > > > + cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw)); > > > + ctx_cs->buf = cs->buf + cs->max_dw; > > > + > > > + radv_pipeline_generate_depth_stencil_state(ctx_cs, pipeline, > > > pCreateInfo, extra); > > > + radv_pipeline_generate_blend_state(ctx_cs, pipeline, blend); > > > + radv_pipeline_generate_raster_state(ctx_cs, pipeline, pCreateInfo); > > > + radv_pipeline_generate_multisample_state(ctx_cs, pipeline); > > > + radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline); > > > + radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline, tess); > > > + radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline, tess); > > > + radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline, gs); > > > + radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline); > > > + radv_pipeline_generate_ps_inputs(ctx_cs, pipeline); > > > + radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline); > > > + radv_pipeline_generate_binning_state(ctx_cs, pipeline, pCreateInfo); > > > + > > > + radeon_set_context_reg(ctx_cs, R_0286E8_SPI_TMPRING_SIZE, > > > S_0286E8_WAVES(pipeline->max_waves) | > > > > > > S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); > > > > > > - radeon_set_context_reg(&pipeline->cs, > > > R_028B54_VGT_SHADER_STAGES_EN, > > > radv_compute_vgt_shader_stages_en(pipeline)); > > > + radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, > > > radv_compute_vgt_shader_stages_en(pipeline)); > > > > > > if (pipeline->device->physical_device->rad_info.chip_class >= CIK) { > > > - radeon_set_uconfig_reg_idx(&pipeline->cs, > > > R_030908_VGT_PRIMITIVE_TYPE, 1, prim); > > > + radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, > > > 1, prim); > > > } else { > > > - radeon_set_config_reg(&pipeline->cs, > > > R_008958_VGT_PRIMITIVE_TYPE, prim); > > > + radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, > > > prim); > > > } > > > - radeon_set_context_reg(&pipeline->cs, > > > R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out); > > > + radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, > > > gs_out); > > > > > > - radeon_set_context_reg(&pipeline->cs, R_02820C_PA_SC_CLIPRECT_RULE, > > > radv_compute_cliprect_rule(pCreateInfo)); > > > + radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, > > > radv_compute_cliprect_rule(pCreateInfo)); > > > > > > - assert(pipeline->cs.cdw <= pipeline->cs.max_dw); > > > + pipeline->ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * > > > 4); > > > + > > > + assert(ctx_cs->cdw <= ctx_cs->max_dw); > > > + assert(cs->cdw <= cs->max_dw); > > > } > > > > > > static struct radv_ia_multi_vgt_param_helpers > > > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h > > > index 6089ee6a607..f6534ae3309 100644 > > > --- a/src/amd/vulkan/radv_private.h > > > +++ b/src/amd/vulkan/radv_private.h > > > @@ -1365,6 +1365,8 @@ struct radv_pipeline { > > > VkShaderStageFlags active_stages; > > > > > > struct radeon_cmdbuf cs; > > > + uint32_t ctx_cs_hash; > > > + struct radeon_cmdbuf ctx_cs; > > > > > > struct radv_vertex_elements_info vertex_elements; > > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev