From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_descriptors.c | 6 +++++- src/gallium/drivers/radeonsi/si_shader.c | 15 ++++++--------- src/gallium/drivers/radeonsi/si_shader.h | 13 +------------ src/gallium/drivers/radeonsi/si_state_draw.c | 9 +++++---- src/gallium/drivers/radeonsi/si_state_shaders.c | 4 ++-- 5 files changed, 19 insertions(+), 28 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 8f5a16b..4cb6d86 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1823,22 +1823,26 @@ static void si_shader_userdata_begin_new_cs(struct si_context *sctx) * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*. */ static void si_set_user_data_base(struct si_context *sctx, unsigned shader, uint32_t new_base) { uint32_t *base = &sctx->shader_userdata.sh_base[shader]; if (*base != new_base) { *base = new_base; - if (new_base) + if (new_base) { si_mark_shader_pointers_dirty(sctx, shader); + + if (shader == PIPE_SHADER_VERTEX) + sctx->last_vs_state = ~0; + } } } /* This must be called when these shaders are changed from non-NULL to NULL * and vice versa: * - geometry shader * - tessellation control shader * - tessellation evaluation shader */ void si_shader_change_notify(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 0f080cf..02447dd 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -220,33 +220,33 @@ static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx) * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) * - ... * * All three shaders VS(LS), TCS, TES share the same LDS space. */ static LLVMValueRef get_tcs_in_patch_stride(struct si_shader_context *ctx) { if (ctx->type == PIPE_SHADER_VERTEX) - return unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 8, 13); + return unpack_param(ctx, SI_PARAM_VS_STATE_BITS, 8, 13); else if (ctx->type == PIPE_SHADER_TESS_CTRL) return unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 8, 13); else { assert(0); return NULL; } } static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx) { - return unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13); + return unpack_param(ctx, SI_PARAM_VS_STATE_BITS, 0, 13); } static LLVMValueRef get_tcs_out_patch0_offset(struct si_shader_context *ctx) { return lp_build_mul_imm(&ctx->bld_base.uint_bld, unpack_param(ctx, SI_PARAM_TCS_OUT_OFFSETS, 0, 16), 4); @@ -2656,21 +2656,21 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); struct si_shader *shader = ctx->shader; struct tgsi_shader_info *info = &shader->selector->info; struct gallivm_state *gallivm = &ctx->gallivm; unsigned i, chan; LLVMValueRef vertex_id = LLVMGetParam(ctx->main_fn, ctx->param_rel_auto_id); LLVMValueRef vertex_dw_stride = - unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 24, 8); + unpack_param(ctx, SI_PARAM_VS_STATE_BITS, 24, 8); LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id, vertex_dw_stride, ""); /* Write outputs to LDS. The next shader (TCS aka HS) will read * its inputs from it. */ for (i = 0; i < info->num_outputs; i++) { LLVMValueRef *out_ptr = ctx->outputs[i]; unsigned name = info->output_semantic_name[i]; unsigned index = info->output_semantic_index[i]; int param = si_shader_io_get_unique_index(name, index); @@ -5602,33 +5602,30 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS); params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES); params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS); switch (ctx->type) { case PIPE_SHADER_VERTEX: params[SI_PARAM_VERTEX_BUFFERS] = const_array(ctx->v16i8, SI_MAX_ATTRIBS); params[SI_PARAM_BASE_VERTEX] = ctx->i32; params[SI_PARAM_START_INSTANCE] = ctx->i32; params[SI_PARAM_DRAWID] = ctx->i32; - num_params = SI_PARAM_DRAWID+1; + params[SI_PARAM_VS_STATE_BITS] = ctx->i32; + num_params = SI_PARAM_VS_STATE_BITS+1; if (shader->key.as_es) { params[ctx->param_es2gs_offset = num_params++] = ctx->i32; } else if (shader->key.as_ls) { - params[SI_PARAM_LS_OUT_LAYOUT] = ctx->i32; - num_params = SI_PARAM_LS_OUT_LAYOUT+1; + /* no extra parameters */ } else { if (shader->is_gs_copy_shader) { num_params = SI_PARAM_RW_BUFFERS+1; - } else { - params[SI_PARAM_VS_STATE_BITS] = ctx->i32; - num_params = SI_PARAM_VS_STATE_BITS+1; } /* The locations of the other parameters are assigned dynamically. */ declare_streamout_params(ctx, &shader->selector->so, params, ctx->i32, &num_params); } last_sgpr = num_params-1; /* VGPRs */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 6ce2b26..fdb0dd4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -91,30 +91,23 @@ enum { SI_SGPR_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS_HI, SI_NUM_RESOURCE_SGPRS, /* all VS variants */ SI_SGPR_VERTEX_BUFFERS = SI_NUM_RESOURCE_SGPRS, SI_SGPR_VERTEX_BUFFERS_HI, SI_SGPR_BASE_VERTEX, SI_SGPR_START_INSTANCE, SI_SGPR_DRAWID, - SI_ES_NUM_USER_SGPR, - - /* hw VS only */ - SI_SGPR_VS_STATE_BITS = SI_ES_NUM_USER_SGPR, + SI_SGPR_VS_STATE_BITS, SI_VS_NUM_USER_SGPR, - /* hw LS only */ - SI_SGPR_LS_OUT_LAYOUT = SI_ES_NUM_USER_SGPR, - SI_LS_NUM_USER_SGPR, - /* both TCS and TES */ SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, SI_TES_NUM_USER_SGPR, /* TCS only */ SI_SGPR_TCS_OUT_OFFSETS = SI_TES_NUM_USER_SGPR, SI_SGPR_TCS_OUT_LAYOUT, SI_SGPR_TCS_IN_LAYOUT, SI_TCS_NUM_USER_SGPR, @@ -139,25 +132,21 @@ enum { SI_PARAM_SAMPLERS, SI_PARAM_IMAGES, SI_PARAM_SHADER_BUFFERS, SI_NUM_RESOURCE_PARAMS, /* VS only parameters */ SI_PARAM_VERTEX_BUFFERS = SI_NUM_RESOURCE_PARAMS, SI_PARAM_BASE_VERTEX, SI_PARAM_START_INSTANCE, SI_PARAM_DRAWID, - /* [0] = clamp vertex color, VS as VS only */ SI_PARAM_VS_STATE_BITS, - /* same value as TCS_IN_LAYOUT, VS as LS only */ - SI_PARAM_LS_OUT_LAYOUT = SI_PARAM_DRAWID + 1, - /* the other VS parameters are assigned dynamically */ /* Layout of TCS outputs in the offchip buffer * [0:8] = the number of patches per threadgroup. * [9:15] = the number of output vertices per patch. * [16:31] = the offset of per patch attributes in the buffer in bytes. */ SI_PARAM_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */ /* TCS only parameters. */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 7bf4f4d..0d70ea9 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -215,23 +215,23 @@ static void si_emit_derived_tess_state(struct si_context *sctx, tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) | S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4); tcs_out_layout = (output_patch_size / 4) | ((output_vertex_size / 4) << 13); tcs_out_offsets = (output_patch0_offset / 16) | ((perpatch_output_offset / 16) << 16); offchip_layout = (pervertex_output_patch_size * *num_patches << 16) | (num_tcs_output_cp << 9) | *num_patches; /* Set them for LS. */ - radeon_set_sh_reg(cs, - R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4, - tcs_in_layout); + sctx->current_vs_state &= C_VS_STATE_LS_OUT_PATCH_SIZE & + C_VS_STATE_LS_OUT_VERTEX_SIZE; + sctx->current_vs_state |= tcs_in_layout; /* Set them for TCS. */ radeon_set_sh_reg_seq(cs, R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4); radeon_emit(cs, offchip_layout); radeon_emit(cs, tcs_out_offsets); radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26)); radeon_emit(cs, tcs_in_layout); /* Set them for TES. */ @@ -493,21 +493,22 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx) sctx->last_rast_prim = rast_prim; sctx->last_sc_line_stipple = rs->pa_sc_line_stipple; } static void si_emit_vs_state(struct si_context *sctx) { if (sctx->current_vs_state != sctx->last_vs_state) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; radeon_set_sh_reg(cs, - R_00B130_SPI_SHADER_USER_DATA_VS_0 + SI_SGPR_VS_STATE_BITS * 4, + sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX] + + SI_SGPR_VS_STATE_BITS * 4, sctx->current_vs_state); sctx->last_vs_state = sctx->current_vs_state; } } static void si_emit_draw_registers(struct si_context *sctx, const struct pipe_draw_info *info, unsigned num_patches) { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ff4ff01..c52ffd9 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -458,21 +458,21 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40); shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B528_DX10_CLAMP(1) | S_00B528_FLOAT_MODE(shader->config.float_mode); - shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) | + shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_VS_NUM_USER_SGPR) | S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) { struct si_pm4_state *pm4; uint64_t va; pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -506,21 +506,21 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) pm4 = si_get_shader_pm4_state(shader); if (!pm4) return; va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); if (shader->selector->type == PIPE_SHADER_VERTEX) { vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0; - num_user_sgprs = SI_ES_NUM_USER_SGPR; + num_user_sgprs = SI_VS_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = 3; /* all components are needed for TES */ num_user_sgprs = SI_TES_NUM_USER_SGPR; } else unreachable("invalid shader selector type"); oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0; si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, shader->selector->esgs_itemsize / 4); -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev