From: Marek Olšák <marek.ol...@amd.com> This is the first user of optimized monolithic shader variants.
Cull distances can't be disabled by states. --- src/gallium/drivers/radeonsi/si_shader.c | 6 ++++++ src/gallium/drivers/radeonsi/si_shader.h | 5 +++-- src/gallium/drivers/radeonsi/si_state.c | 10 +++++++++- src/gallium/drivers/radeonsi/si_state_shaders.c | 21 +++++++++++++++++++-- 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index cd436f7..514c23b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2287,23 +2287,29 @@ handle_semantic: target = V_008DFC_SQ_EXP_POS; break; case TGSI_SEMANTIC_COLOR: case TGSI_SEMANTIC_BCOLOR: target = V_008DFC_SQ_EXP_PARAM + param_count; assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); shader->info.vs_output_param_offset[i] = param_count; param_count++; break; case TGSI_SEMANTIC_CLIPDIST: + if (shader->key.opt.hw_vs.clip_disable) { + semantic_name = TGSI_SEMANTIC_GENERIC; + goto handle_semantic; + } target = V_008DFC_SQ_EXP_POS + 2 + semantic_index; break; case TGSI_SEMANTIC_CLIPVERTEX: + if (shader->key.opt.hw_vs.clip_disable) + continue; si_llvm_emit_clipvertex(bld_base, pos_args, outputs[i].values); continue; case TGSI_SEMANTIC_PRIMID: case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_TEXCOORD: case TGSI_SEMANTIC_GENERIC: target = V_008DFC_SQ_EXP_PARAM + param_count; assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); shader->info.vs_output_param_offset[i] = param_count; param_count++; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 38aa361..4cbd1c2 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -313,22 +313,20 @@ struct si_shader_selector { /* Common VS bits between the shader key and the prolog key. */ struct si_vs_prolog_bits { unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; }; /* Common VS bits between the shader key and the epilog key. */ struct si_vs_epilog_bits { unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ /* TODO: - * - skip clipdist, culldist (including clipvertex code) exports based - * on which clip_plane_enable bits are set * - skip layer, viewport, clipdist, and culldist parameter exports * if PS doesn't read them */ }; /* Common TCS bits between the shader key and the epilog key. */ struct si_tcs_epilog_bits { unsigned prim_mode:3; }; @@ -431,20 +429,23 @@ struct si_shader_key { /* One pair of bits for every input: SI_FIX_FETCH_* enums. */ uint32_t fix_fetch; } vs; struct { uint64_t inputs_to_copy; /* for fixed-func TCS */ } tcs; } mono; /* Optimization flags for asynchronous compilation only. */ union { + struct { + unsigned clip_disable:1; + } hw_vs; /* HW VS (it can be VS, TES, GS) */ } opt; }; struct si_shader_config { unsigned num_sgprs; unsigned num_vgprs; unsigned spilled_sgprs; unsigned spilled_vgprs; unsigned lds_size; unsigned spi_ps_input_ena; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 7d118b0..b30bec1 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -637,29 +637,37 @@ static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom) radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); } #define SIX_BITS 0x3F static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + struct si_shader *vs = si_get_vs_state(sctx); struct tgsi_shader_info *info = si_get_vs_info(sctx); struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned window_space = info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; unsigned clipdist_mask = info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS; unsigned culldist_mask = info->culldist_writemask << info->num_written_clipdistance; - unsigned total_mask = clipdist_mask | culldist_mask; + unsigned total_mask; + + if (vs->key.opt.hw_vs.clip_disable) { + assert(!info->culldist_writemask); + clipdist_mask = 0; + culldist_mask = 0; + } + total_mask = clipdist_mask | culldist_mask; radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || info->writes_edgeflag || diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 00ccbbd..1d116f6 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -847,20 +847,31 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, static unsigned si_get_alpha_test_func(struct si_context *sctx) { /* Alpha-test should be disabled if colorbuffer 0 is integer. */ if (sctx->queued.named.dsa) return sctx->queued.named.dsa->alpha_func; return PIPE_FUNC_ALWAYS; } +static void si_shader_selector_key_hw_vs(struct si_context *sctx, + struct si_shader_selector *vs, + struct si_shader_key *key) +{ + key->opt.hw_vs.clip_disable = + sctx->queued.named.rasterizer->clip_plane_enable == 0 && + (vs->info.clipdist_writemask || + vs->info.writes_clipvertex) && + !vs->info.culldist_writemask; +} + /* Compute the key for the hw shader variant */ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_shader_selector *sel, struct si_shader_key *key) { struct si_context *sctx = (struct si_context *)ctx; unsigned i; memset(key, 0, sizeof(*key)); @@ -875,36 +886,42 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, key->mono.vs.fix_fetch = sctx->vertex_elements->fix_fetch & u_bit_consecutive(0, 2 * count); } if (sctx->tes_shader.cso) key->as_ls = 1; else if (sctx->gs_shader.cso) key->as_es = 1; else { + si_shader_selector_key_hw_vs(sctx, sel, key); + if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) key->part.vs.epilog.export_prim_id = 1; } break; case PIPE_SHADER_TESS_CTRL: key->part.tcs.epilog.prim_mode = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; if (sel == sctx->fixed_func_tcs_shader.cso) key->mono.tcs.inputs_to_copy = sctx->vs_shader.cso->outputs_written; break; case PIPE_SHADER_TESS_EVAL: if (sctx->gs_shader.cso) key->as_es = 1; - else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) - key->part.tes.epilog.export_prim_id = 1; + else { + si_shader_selector_key_hw_vs(sctx, sel, key); + + if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) + key->part.tes.epilog.export_prim_id = 1; + } break; case PIPE_SHADER_GEOMETRY: key->part.gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix; break; case PIPE_SHADER_FRAGMENT: { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct si_state_blend *blend = sctx->queued.named.blend; if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && sel->info.colors_written == 0x1) -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev