From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_pipe.h       |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c | 25 ++++++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 8e6a94d..df2f130 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -329,20 +329,21 @@ struct si_context {
        unsigned                scratch_waves;
        unsigned                spi_tmpring_size;
 
        struct r600_resource    *compute_scratch_buffer;
 
        /* Emitted derived tessellation state. */
        struct si_shader        *last_ls; /* local shader (VS) */
        struct si_shader_selector *last_tcs;
        int                     last_num_tcs_input_cp;
        int                     last_tes_sh_base;
+       unsigned                last_num_patches;
 
        /* Debug state. */
        bool                    is_debug;
        struct radeon_saved_cs  last_gfx;
        struct r600_resource    *last_trace_buf;
        struct r600_resource    *trace_buf;
        unsigned                trace_id;
        uint64_t                dmesg_timestamp;
        unsigned                apitrace_call_number;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index affc156..e904164 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -101,20 +101,33 @@ static void si_emit_derived_tess_state(struct si_context 
*sctx,
        unsigned tes_sh_base = 
sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
        unsigned num_tcs_input_cp = info->vertices_per_patch;
        unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
        unsigned num_tcs_patch_outputs;
        unsigned input_vertex_size, output_vertex_size, 
pervertex_output_patch_size;
        unsigned input_patch_size, output_patch_size, output_patch0_offset;
        unsigned perpatch_output_offset, lds_size, ls_rsrc2;
        unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
        unsigned offchip_layout, hardware_lds_size, ls_hs_config;
 
+       if (sctx->last_ls == ls->current &&
+           sctx->last_tcs == tcs &&
+           sctx->last_tes_sh_base == tes_sh_base &&
+           sctx->last_num_tcs_input_cp == num_tcs_input_cp) {
+               *num_patches = sctx->last_num_patches;
+               return;
+       }
+
+       sctx->last_ls = ls->current;
+       sctx->last_tcs = tcs;
+       sctx->last_tes_sh_base = tes_sh_base;
+       sctx->last_num_tcs_input_cp = num_tcs_input_cp;
+
        /* This calculates how shader inputs and outputs among VS, TCS, and TES
         * are laid out in LDS. */
        num_tcs_inputs = util_last_bit64(ls->cso->outputs_written);
 
        if (sctx->tcs_shader.cso) {
                num_tcs_outputs = util_last_bit64(tcs->outputs_written);
                num_tcs_output_cp = 
tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
                num_tcs_patch_outputs = 
util_last_bit64(tcs->patch_outputs_written);
        } else {
                /* No TCS. Route varyings from LS to TES. */
@@ -146,46 +159,36 @@ static void si_emit_derived_tess_state(struct si_context 
*sctx,
 
        /* Make sure the output data fits in the offchip buffer */
        *num_patches = MIN2(*num_patches,
                            (sctx->screen->tess_offchip_block_dw_size * 4) /
                            output_patch_size);
 
        /* Not necessary for correctness, but improves performance. The
         * specific value is taken from the proprietary driver.
         */
        *num_patches = MIN2(*num_patches, 40);
+       sctx->last_num_patches = *num_patches;
 
        output_patch0_offset = input_patch_size * *num_patches;
        perpatch_output_offset = output_patch0_offset + 
pervertex_output_patch_size;
 
        lds_size = output_patch0_offset + output_patch_size * *num_patches;
        ls_rsrc2 = ls->current->config.rsrc2;
 
        if (sctx->b.chip_class >= CIK) {
                assert(lds_size <= 65536);
                ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512);
        } else {
                assert(lds_size <= 32768);
                ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
        }
 
-       if (sctx->last_ls == ls->current &&
-           sctx->last_tcs == tcs &&
-           sctx->last_tes_sh_base == tes_sh_base &&
-           sctx->last_num_tcs_input_cp == num_tcs_input_cp)
-               return;
-
-       sctx->last_ls = ls->current;
-       sctx->last_tcs = tcs;
-       sctx->last_tes_sh_base = tes_sh_base;
-       sctx->last_num_tcs_input_cp = num_tcs_input_cp;
-
        /* Due to a hw bug, RSRC2_LS must be written twice with another
         * LS register written in between. */
        if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
                radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, 
ls_rsrc2);
        radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
        radeon_emit(cs, ls->current->config.rsrc1);
        radeon_emit(cs, ls_rsrc2);
 
        /* Compute userdata SGPRs. */
        assert(((input_vertex_size / 4) & ~0xff) == 0);
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to