From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_state_shaders.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 0696582..0087eeb 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -20,20 +20,21 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Christian König <christian.koe...@amd.com> * Marek Olšák <mar...@gmail.com> */ #include "si_pipe.h" #include "sid.h" +#include "gfx9d.h" #include "radeon/r600_cs.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" #include "util/hash_table.h" #include "util/crc32.h" #include "util/u_memory.h" #include "util/u_prim.h" #include "util/disk_cache.h" @@ -456,42 +457,42 @@ static void si_shader_ls(struct si_shader *shader) shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B528_DX10_CLAMP(1) | S_00B528_FLOAT_MODE(shader->config.float_mode); shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) | S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } -static void si_shader_hs(struct si_shader *shader) +static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) { struct si_pm4_state *pm4; uint64_t va; pm4 = si_get_shader_pm4_state(shader); if (!pm4) return; va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40); si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS, S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B428_DX10_CLAMP(1) | S_00B428_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) | - S_00B42C_OC_LDS_EN(1) | + S_00B42C_OC_LDS_EN(sscreen->b.chip_class <= VI) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) { struct si_pm4_state *pm4; unsigned num_user_sgprs; unsigned vgpr_comp_cnt; uint64_t va; unsigned oc_lds_en; @@ -923,21 +924,21 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, switch (shader->selector->type) { case PIPE_SHADER_VERTEX: if (shader->key.as_ls) si_shader_ls(shader); else if (shader->key.as_es) si_shader_es(sscreen, shader); else si_shader_vs(sscreen, shader, NULL); break; case PIPE_SHADER_TESS_CTRL: - si_shader_hs(shader); + si_shader_hs(sscreen, shader); break; case PIPE_SHADER_TESS_EVAL: if (shader->key.as_es) si_shader_es(sscreen, shader); else si_shader_vs(sscreen, shader, NULL); break; case PIPE_SHADER_GEOMETRY: si_shader_gs(shader); break; @@ -2117,22 +2118,25 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) min_esgs_ring_size = align(min_esgs_ring_size, alignment); esgs_ring_size = align(esgs_ring_size, alignment); gsvs_ring_size = align(gsvs_ring_size, alignment); esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size); gsvs_ring_size = MIN2(gsvs_ring_size, max_size); /* Some rings don't have to be allocated if shaders don't use them. * (e.g. no varyings between ES and GS or GS and VS) + * + * GFX9 doesn't have the ESGS ring. */ - bool update_esgs = esgs_ring_size && + bool update_esgs = sctx->b.chip_class <= VI && + esgs_ring_size && (!sctx->esgs_ring || sctx->esgs_ring->width0 < esgs_ring_size); bool update_gsvs = gsvs_ring_size && (!sctx->gsvs_ring || sctx->gsvs_ring->width0 < gsvs_ring_size); if (!update_esgs && !update_gsvs) return true; if (update_esgs) { @@ -2156,23 +2160,25 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) if (!sctx->gsvs_ring) return false; } /* Create the "init_config_gs_rings" state. */ pm4 = CALLOC_STRUCT(si_pm4_state); if (!pm4) return false; if (sctx->b.chip_class >= CIK) { - if (sctx->esgs_ring) + if (sctx->esgs_ring) { + assert(sctx->b.chip_class <= VI); si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE, sctx->esgs_ring->width0 / 256); + } if (sctx->gsvs_ring) si_pm4_set_reg(pm4, R_030904_VGT_GSVS_RING_SIZE, sctx->gsvs_ring->width0 / 256); } else { if (sctx->esgs_ring) si_pm4_set_reg(pm4, R_0088C8_VGT_ESGS_RING_SIZE, sctx->esgs_ring->width0 / 256); if (sctx->gsvs_ring) si_pm4_set_reg(pm4, R_0088CC_VGT_GSVS_RING_SIZE, sctx->gsvs_ring->width0 / 256); @@ -2187,20 +2193,21 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) si_init_config_add_vgt_flush(sctx); si_pm4_upload_indirect_buffer(sctx, sctx->init_config); } /* Flush the context to re-emit both init_config states. */ sctx->b.initial_gfx_cs_size = 0; /* force flush */ si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL); /* Set ring bindings. */ if (sctx->esgs_ring) { + assert(sctx->b.chip_class <= VI); si_set_ring_buffer(&sctx->b.b, SI_ES_RING_ESGS, sctx->esgs_ring, 0, sctx->esgs_ring->width0, true, true, 4, 64, 0); si_set_ring_buffer(&sctx->b.b, SI_GS_RING_ESGS, sctx->esgs_ring, 0, sctx->esgs_ring->width0, false, false, 0, 0, 0); } if (sctx->gsvs_ring) { si_set_ring_buffer(&sctx->b.b, SI_RING_GSVS, sctx->gsvs_ring, 0, sctx->gsvs_ring->width0, @@ -2423,20 +2430,23 @@ static void si_init_tess_factor_ring(struct si_context *sctx) /* Append these registers to the init config state. */ if (sctx->b.chip_class >= CIK) { if (sctx->b.chip_class >= VI) --max_offchip_buffers; si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(sctx->tf_ring->width0 / 4)); si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE, r600_resource(sctx->tf_ring)->gpu_address >> 8); + if (sctx->b.chip_class >= GFX9) + si_pm4_set_reg(sctx->init_config, R_030944_VGT_TF_MEMORY_BASE_HI, + r600_resource(sctx->tf_ring)->gpu_address >> 40); si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM, S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | S_03093C_OFFCHIP_GRANULARITY(offchip_granularity)); } else { assert(offchip_granularity == V_03093C_X_8K_DWORDS); si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(sctx->tf_ring->width0 / 4)); si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE, r600_resource(sctx->tf_ring)->gpu_address >> 8); si_pm4_set_reg(sctx->init_config, R_0089B0_VGT_HS_OFFCHIP_PARAM, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev