From: Nicolai Hähnle <nicolai.haeh...@amd.com> We only advertise a maximum of 32 inputs and outputs in each shader stage, so everything fits into 64 bits. --- src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++--------------- src/gallium/drivers/radeonsi/si_shader.h | 6 +--- src/gallium/drivers/radeonsi/si_state_shaders.c | 40 ++++--------------------- 3 files changed, 17 insertions(+), 64 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a48a552..67d62c3 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -135,48 +135,41 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index) { switch (semantic_name) { case TGSI_SEMANTIC_POSITION: return 0; case TGSI_SEMANTIC_PSIZE: return 1; case TGSI_SEMANTIC_CLIPDIST: assert(index <= 1); return 2 + index; case TGSI_SEMANTIC_GENERIC: - if (index <= 63-4) + if (index < 32) return 4 + index; assert(!"invalid generic index"); return 0; - default: - assert(!"invalid semantic name"); - return 0; - } -} - -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index) -{ - switch (name) { case TGSI_SEMANTIC_FOG: - return 0; + return 36; case TGSI_SEMANTIC_LAYER: - return 1; + return 37; case TGSI_SEMANTIC_VIEWPORT_INDEX: - return 2; + return 38; case TGSI_SEMANTIC_PRIMID: - return 3; + return 39; case TGSI_SEMANTIC_COLOR: /* these alias */ case TGSI_SEMANTIC_BCOLOR: - return 4 + index; + assert(index < 2); + return 40 + index; case TGSI_SEMANTIC_TEXCOORD: - return 6 + index; + assert(index < 8); + return 42 + index; default: assert(!"invalid semantic name"); return 0; } } /** * Get the value of a shader input parameter and extract a bitfield. */ static LLVMValueRef unpack_param(struct si_shader_context *ctx, @@ -2297,31 +2290,24 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, semantic_name = outputs[i].semantic_name; semantic_index = outputs[i].semantic_index; bool export_param = true; switch (semantic_name) { case TGSI_SEMANTIC_POSITION: /* ignore these */ case TGSI_SEMANTIC_PSIZE: case TGSI_SEMANTIC_CLIPVERTEX: case TGSI_SEMANTIC_EDGEFLAG: break; - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_CLIPDIST: + default: if (shader->key.opt.hw_vs.kill_outputs & (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index))) export_param = false; - break; - default: - if (shader->key.opt.hw_vs.kill_outputs2 & - (1u << si_shader_io_get_unique_index2(semantic_name, semantic_index))) - export_param = false; - break; } if (outputs[i].vertex_stream[0] != 0 && outputs[i].vertex_stream[1] != 0 && outputs[i].vertex_stream[2] != 0 && outputs[i].vertex_stream[3] != 0) export_param = false; handle_semantic: /* Select the correct target */ @@ -7154,21 +7140,20 @@ static void si_dump_shader_key(unsigned processor, struct si_shader *shader, default: assert(0); } if ((processor == PIPE_SHADER_GEOMETRY || processor == PIPE_SHADER_TESS_EVAL || processor == PIPE_SHADER_VERTEX) && !key->as_es && !key->as_ls) { fprintf(f, " opt.hw_vs.kill_outputs = 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs); - fprintf(f, " opt.hw_vs.kill_outputs2 = 0x%x\n", key->opt.hw_vs.kill_outputs2); fprintf(f, " opt.hw_vs.clip_disable = %u\n", key->opt.hw_vs.clip_disable); } } static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_screen *sscreen, LLVMTargetMachineRef tm) { struct lp_build_tgsi_context *bld_base; struct lp_build_tgsi_action tmpl = {}; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index cb8a902..5e43b4c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -349,25 +349,23 @@ struct si_shader_selector { unsigned db_shader_control; /* Set 0xf or 0x0 (4 bits) per each written output. * ANDed with spi_shader_col_format. */ unsigned colors_written_4bit; /* CS parameters */ unsigned local_size; uint64_t outputs_written; /* "get_unique_index" bits */ - uint32_t patch_outputs_written; /* "get_unique_index" bits */ - uint32_t outputs_written2; /* "get_unique_index2" bits */ + uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */ uint64_t inputs_read; /* "get_unique_index" bits */ - uint32_t inputs_read2; /* "get_unique_index2" bits */ }; /* Valid shader configurations: * * API shaders VS | TCS | TES | GS |pass| PS * are compiled as: | | | |thru| * | | | | | * Only VS & PS: VS | | | | | PS * GFX6 - with GS: ES | | | GS | VS | PS * - with tess: LS | HS | VS | | | PS @@ -493,21 +491,20 @@ struct si_shader_key { uint8_t vs_fix_fetch[SI_MAX_ATTRIBS]; uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */ /* When PS needs PrimID and GS is disabled. */ unsigned vs_export_prim_id:1; } mono; /* Optimization flags for asynchronous compilation only. */ struct { struct { uint64_t kill_outputs; /* "get_unique_index" bits */ - uint32_t kill_outputs2; /* "get_unique_index2" bits */ unsigned clip_disable:1; } hw_vs; /* HW VS (it can be VS, TES, GS) */ /* For shaders where monolithic variants have better code. * * This is a flag that has no effect on code generation, * but forces monolithic shaders to be used as soon as * possible, because it's in the "opt" group. */ unsigned prefer_mono:1; @@ -600,21 +597,20 @@ int si_compile_llvm(struct si_screen *sscreen, struct ac_shader_binary *binary, struct si_shader_config *conf, LLVMTargetMachineRef tm, LLVMModuleRef mod, struct pipe_debug_callback *debug, unsigned processor, const char *name); void si_shader_destroy(struct si_shader *shader); unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor, FILE *f, bool check_debug_option); void si_multiwave_lds_size_workaround(struct si_screen *sscreen, unsigned *lds_size); void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, struct si_shader_config *config, uint64_t scratch_va); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 68f4d21..cf0c11f 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1225,36 +1225,31 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx, ps_colormask &= ps->colors_written_4bit; ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard || (!ps_colormask && !ps_modifies_zs && !ps->info.writes_memory); } /* Find out which VS outputs aren't used by the PS. */ uint64_t outputs_written = vs->outputs_written; - uint32_t outputs_written2 = vs->outputs_written2; uint64_t inputs_read = 0; - uint32_t inputs_read2 = 0; outputs_written &= ~0x3; /* ignore POSITION, PSIZE */ if (!ps_disabled) { inputs_read = ps->inputs_read; - inputs_read2 = ps->inputs_read2; } uint64_t linked = outputs_written & inputs_read; - uint32_t linked2 = outputs_written2 & inputs_read2; key->opt.hw_vs.kill_outputs = ~linked & outputs_written; - key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2; } /* Compute the key for the hw shader variant */ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_shader_selector *sel, struct si_shader_key *key) { struct si_context *sctx = (struct si_context *)ctx; memset(key, 0, sizeof(*key)); @@ -1835,37 +1830,29 @@ void si_init_shader_selector_async(void *job, int thread_index) unsigned offset = shader->info.vs_output_param_offset[i]; if (offset <= AC_EXP_PARAM_OFFSET_31) continue; unsigned name = sel->info.output_semantic_name[i]; unsigned index = sel->info.output_semantic_index[i]; unsigned id; switch (name) { - case TGSI_SEMANTIC_GENERIC: - /* don't process indices the function can't handle */ - if (index >= 60) - break; - /* fall through */ - case TGSI_SEMANTIC_CLIPDIST: - id = si_shader_io_get_unique_index(name, index); - sel->outputs_written &= ~(1ull << id); - break; case TGSI_SEMANTIC_POSITION: /* ignore these */ case TGSI_SEMANTIC_PSIZE: case TGSI_SEMANTIC_CLIPVERTEX: case TGSI_SEMANTIC_EDGEFLAG: break; default: - id = si_shader_io_get_unique_index2(name, index); - sel->outputs_written2 &= ~(1u << id); + id = si_shader_io_get_unique_index(name, index); + sel->outputs_written &= ~(1ull << id); + break; } } } } /* Pre-compilation. */ if (sscreen->b.debug_flags & DBG_PRECOMPILE) { struct si_shader_ctx_state state = {sel}; struct si_shader_key key; @@ -1992,64 +1979,49 @@ static void *si_create_shader_selector(struct pipe_context *ctx, unsigned index = sel->info.output_semantic_index[i]; switch (name) { case TGSI_SEMANTIC_TESSINNER: case TGSI_SEMANTIC_TESSOUTER: case TGSI_SEMANTIC_PATCH: sel->patch_outputs_written |= 1llu << si_shader_io_get_unique_index_patch(name, index); break; - case TGSI_SEMANTIC_GENERIC: - /* don't process indices the function can't handle */ - if (index >= 60) - break; - /* fall through */ - case TGSI_SEMANTIC_POSITION: - case TGSI_SEMANTIC_PSIZE: - case TGSI_SEMANTIC_CLIPDIST: - sel->outputs_written |= - 1llu << si_shader_io_get_unique_index(name, index); - break; case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */ case TGSI_SEMANTIC_EDGEFLAG: break; default: - sel->outputs_written2 |= - 1u << si_shader_io_get_unique_index2(name, index); + sel->outputs_written |= + 1llu << si_shader_io_get_unique_index(name, index); } } sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16; /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank * conflicts, i.e. each vertex will start at a different bank. */ if (sctx->b.chip_class >= GFX9) sel->esgs_itemsize += 4; break; case PIPE_SHADER_FRAGMENT: for (i = 0; i < sel->info.num_inputs; i++) { unsigned name = sel->info.input_semantic_name[i]; unsigned index = sel->info.input_semantic_index[i]; switch (name) { - case TGSI_SEMANTIC_CLIPDIST: - case TGSI_SEMANTIC_GENERIC: + default: sel->inputs_read |= 1llu << si_shader_io_get_unique_index(name, index); break; case TGSI_SEMANTIC_PCOORD: /* ignore this */ break; - default: - sel->inputs_read2 |= - 1u << si_shader_io_get_unique_index2(name, index); } } for (i = 0; i < 8; i++) if (sel->info.colors_written & (1 << i)) sel->colors_written_4bit |= 0xf << (4 * i); for (i = 0; i < sel->info.num_inputs; i++) { if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) { int index = sel->info.input_semantic_index[i]; -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev