From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 8e6a94d..df2f130 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -329,20 +329,21 @@ struct si_context { unsigned scratch_waves; unsigned spi_tmpring_size; struct r600_resource *compute_scratch_buffer; /* Emitted derived tessellation state. */ struct si_shader *last_ls; /* local shader (VS) */ struct si_shader_selector *last_tcs; int last_num_tcs_input_cp; int last_tes_sh_base; + unsigned last_num_patches; /* Debug state. */ bool is_debug; struct radeon_saved_cs last_gfx; struct r600_resource *last_trace_buf; struct r600_resource *trace_buf; unsigned trace_id; uint64_t dmesg_timestamp; unsigned apitrace_call_number; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index affc156..e904164 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -101,20 +101,33 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL]; unsigned num_tcs_input_cp = info->vertices_per_patch; unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs; unsigned num_tcs_patch_outputs; unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size; unsigned input_patch_size, output_patch_size, output_patch0_offset; unsigned perpatch_output_offset, lds_size, ls_rsrc2; unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets; unsigned offchip_layout, hardware_lds_size, ls_hs_config; + if (sctx->last_ls == ls->current && + sctx->last_tcs == tcs && + sctx->last_tes_sh_base == tes_sh_base && + sctx->last_num_tcs_input_cp == num_tcs_input_cp) { + *num_patches = sctx->last_num_patches; + return; + } + + sctx->last_ls = ls->current; + sctx->last_tcs = tcs; + sctx->last_tes_sh_base = tes_sh_base; + sctx->last_num_tcs_input_cp = num_tcs_input_cp; + /* This calculates how shader inputs and outputs among VS, TCS, and TES * are laid out in LDS. */ num_tcs_inputs = util_last_bit64(ls->cso->outputs_written); if (sctx->tcs_shader.cso) { num_tcs_outputs = util_last_bit64(tcs->outputs_written); num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written); } else { /* No TCS. Route varyings from LS to TES. */ @@ -146,46 +159,36 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Make sure the output data fits in the offchip buffer */ *num_patches = MIN2(*num_patches, (sctx->screen->tess_offchip_block_dw_size * 4) / output_patch_size); /* Not necessary for correctness, but improves performance. The * specific value is taken from the proprietary driver. */ *num_patches = MIN2(*num_patches, 40); + sctx->last_num_patches = *num_patches; output_patch0_offset = input_patch_size * *num_patches; perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; lds_size = output_patch0_offset + output_patch_size * *num_patches; ls_rsrc2 = ls->current->config.rsrc2; if (sctx->b.chip_class >= CIK) { assert(lds_size <= 65536); ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512); } else { assert(lds_size <= 32768); ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256); } - if (sctx->last_ls == ls->current && - sctx->last_tcs == tcs && - sctx->last_tes_sh_base == tes_sh_base && - sctx->last_num_tcs_input_cp == num_tcs_input_cp) - return; - - sctx->last_ls = ls->current; - sctx->last_tcs = tcs; - sctx->last_tes_sh_base = tes_sh_base; - sctx->last_num_tcs_input_cp = num_tcs_input_cp; - /* Due to a hw bug, RSRC2_LS must be written twice with another * LS register written in between. */ if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII) radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); radeon_emit(cs, ls->current->config.rsrc1); radeon_emit(cs, ls_rsrc2); /* Compute userdata SGPRs. */ assert(((input_vertex_size / 4) & ~0xff) == 0); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev