For the series: Reviewed-by: Marek Olšák <marek.ol...@amd.com>
Marek On Wed, May 10, 2017 at 7:30 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > By keeping track of fewer generics, everything can fit into 64 bits. > --- > src/gallium/drivers/radeonsi/si_shader.c | 32 > +++++++------------------ > src/gallium/drivers/radeonsi/si_shader.h | 6 +---- > src/gallium/drivers/radeonsi/si_state_shaders.c | 22 +++-------------- > 3 files changed, 13 insertions(+), 47 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index c12c8ea..837cc1c 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -141,43 +141,36 @@ unsigned si_shader_io_get_unique_index(unsigned > semantic_name, unsigned index) > case TGSI_SEMANTIC_CLIPDIST: > assert(index <= 1); > return 2 + index; > case TGSI_SEMANTIC_GENERIC: > if (index < SI_MAX_IO_GENERIC) > return 4 + index; > > assert(!"invalid generic index"); > return 0; > > - default: > - assert(!"invalid semantic name"); > - return 0; > - } > -} > - > -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index) > -{ > - switch (name) { > case TGSI_SEMANTIC_FOG: > - return 0; > + return SI_MAX_IO_GENERIC + 4; > case TGSI_SEMANTIC_LAYER: > - return 1; > + return SI_MAX_IO_GENERIC + 5; > case TGSI_SEMANTIC_VIEWPORT_INDEX: > - return 2; > + return SI_MAX_IO_GENERIC + 6; > case TGSI_SEMANTIC_PRIMID: > - return 3; > + return SI_MAX_IO_GENERIC + 7; > case TGSI_SEMANTIC_COLOR: /* these alias */ > case TGSI_SEMANTIC_BCOLOR: > - return 4 + index; > + assert(index < 2); > + return SI_MAX_IO_GENERIC + 8 + index; > case TGSI_SEMANTIC_TEXCOORD: > assert(index < 8); > - return 6 + index; > + assert(SI_MAX_IO_GENERIC + 10 + index < 64); > + return SI_MAX_IO_GENERIC + 10 + index; > default: > assert(!"invalid semantic name"); > return 0; > } > } > > /** > * Get the value of a shader input parameter and extract a bitfield. > */ > static LLVMValueRef unpack_param(struct si_shader_context *ctx, > @@ -2291,30 +2284,24 @@ static void si_llvm_export_vs(struct > lp_build_tgsi_context *bld_base, > case TGSI_SEMANTIC_POSITION: /* ignore these */ > case TGSI_SEMANTIC_PSIZE: > case TGSI_SEMANTIC_CLIPVERTEX: > case TGSI_SEMANTIC_EDGEFLAG: > break; > case TGSI_SEMANTIC_GENERIC: > /* don't process indices the function can't handle */ > if (semantic_index >= SI_MAX_IO_GENERIC) > break; > /* fall through */ > - case TGSI_SEMANTIC_CLIPDIST: > + default: > if (shader->key.opt.hw_vs.kill_outputs & > (1ull << > si_shader_io_get_unique_index(semantic_name, semantic_index))) > export_param = false; > - break; > - default: > - if (shader->key.opt.hw_vs.kill_outputs2 & > - (1u << > si_shader_io_get_unique_index2(semantic_name, semantic_index))) > - export_param = false; > - break; > } > > if (outputs[i].vertex_stream[0] != 0 && > outputs[i].vertex_stream[1] != 0 && > outputs[i].vertex_stream[2] != 0 && > outputs[i].vertex_stream[3] != 0) > export_param = false; > > handle_semantic: > /* Select the correct target */ > @@ -7152,21 +7139,20 @@ static void si_dump_shader_key(unsigned processor, > const struct si_shader *shade > > default: > assert(0); > } > > if ((processor == PIPE_SHADER_GEOMETRY || > processor == PIPE_SHADER_TESS_EVAL || > processor == PIPE_SHADER_VERTEX) && > !key->as_es && !key->as_ls) { > fprintf(f, " opt.hw_vs.kill_outputs = 0x%"PRIx64"\n", > key->opt.hw_vs.kill_outputs); > - fprintf(f, " opt.hw_vs.kill_outputs2 = 0x%x\n", > key->opt.hw_vs.kill_outputs2); > fprintf(f, " opt.hw_vs.clip_disable = %u\n", > key->opt.hw_vs.clip_disable); > } > } > > static void si_init_shader_ctx(struct si_shader_context *ctx, > struct si_screen *sscreen, > LLVMTargetMachineRef tm) > { > struct lp_build_tgsi_context *bld_base; > struct lp_build_tgsi_action tmpl = {}; > diff --git a/src/gallium/drivers/radeonsi/si_shader.h > b/src/gallium/drivers/radeonsi/si_shader.h > index 3075900..1627de3 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.h > +++ b/src/gallium/drivers/radeonsi/si_shader.h > @@ -354,25 +354,23 @@ struct si_shader_selector { > unsigned db_shader_control; > /* Set 0xf or 0x0 (4 bits) per each written output. > * ANDed with spi_shader_col_format. > */ > unsigned colors_written_4bit; > > /* CS parameters */ > unsigned local_size; > > uint64_t outputs_written; /* "get_unique_index" bits */ > - uint32_t patch_outputs_written; /* "get_unique_index" bits */ > - uint32_t outputs_written2; /* "get_unique_index2" bits */ > + uint32_t patch_outputs_written; /* "get_unique_index_patch" > bits */ > > uint64_t inputs_read; /* "get_unique_index" bits */ > - uint32_t inputs_read2; /* "get_unique_index2" bits */ > }; > > /* Valid shader configurations: > * > * API shaders VS | TCS | TES | GS |pass| PS > * are compiled as: | | | |thru| > * | | | | | > * Only VS & PS: VS | | | | | PS > * GFX6 - with GS: ES | | | GS | VS | PS > * - with tess: LS | HS | VS | | | PS > @@ -498,21 +496,20 @@ struct si_shader_key { > uint8_t vs_fix_fetch[SI_MAX_ATTRIBS]; > uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS > */ > /* When PS needs PrimID and GS is disabled. */ > unsigned vs_export_prim_id:1; > } mono; > > /* Optimization flags for asynchronous compilation only. */ > struct { > struct { > uint64_t kill_outputs; /* "get_unique_index" > bits */ > - uint32_t kill_outputs2; /* "get_unique_index2" > bits */ > unsigned clip_disable:1; > } hw_vs; /* HW VS (it can be VS, TES, GS) */ > > /* For shaders where monolithic variants have better code. > * > * This is a flag that has no effect on code generation, > * but forces monolithic shaders to be used as soon as > * possible, because it's in the "opt" group. > */ > unsigned prefer_mono:1; > @@ -597,21 +594,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, > LLVMTargetMachineRef tm, > struct si_shader *shader, > bool is_monolithic, > struct pipe_debug_callback *debug); > int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, > struct si_shader *shader, > struct pipe_debug_callback *debug); > void si_shader_destroy(struct si_shader *shader); > unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, > unsigned index); > unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned > index); > -unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index); > int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader > *shader); > void si_shader_dump(struct si_screen *sscreen, const struct si_shader > *shader, > struct pipe_debug_callback *debug, unsigned processor, > FILE *f, bool check_debug_option); > void si_multiwave_lds_size_workaround(struct si_screen *sscreen, > unsigned *lds_size); > void si_shader_apply_scratch_relocs(struct si_shader *shader, > uint64_t scratch_va); > void si_shader_binary_read_config(struct ac_shader_binary *binary, > struct si_shader_config *conf, > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index 6020bec..5da6014 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -1224,36 +1224,31 @@ static void si_shader_selector_key_hw_vs(struct > si_context *sctx, > ps_colormask &= ps->colors_written_4bit; > > ps_disabled = > sctx->queued.named.rasterizer->rasterizer_discard || > (!ps_colormask && > !ps_modifies_zs && > !ps->info.writes_memory); > } > > /* Find out which VS outputs aren't used by the PS. */ > uint64_t outputs_written = vs->outputs_written; > - uint32_t outputs_written2 = vs->outputs_written2; > uint64_t inputs_read = 0; > - uint32_t inputs_read2 = 0; > > outputs_written &= ~0x3; /* ignore POSITION, PSIZE */ > > if (!ps_disabled) { > inputs_read = ps->inputs_read; > - inputs_read2 = ps->inputs_read2; > } > > uint64_t linked = outputs_written & inputs_read; > - uint32_t linked2 = outputs_written2 & inputs_read2; > > key->opt.hw_vs.kill_outputs = ~linked & outputs_written; > - key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2; > } > > /* Compute the key for the hw shader variant */ > static inline void si_shader_selector_key(struct pipe_context *ctx, > struct si_shader_selector *sel, > struct si_shader_key *key) > { > struct si_context *sctx = (struct si_context *)ctx; > > memset(key, 0, sizeof(*key)); > @@ -1839,32 +1834,29 @@ void si_init_shader_selector_async(void *job, int > thread_index) > unsigned name = > sel->info.output_semantic_name[i]; > unsigned index = > sel->info.output_semantic_index[i]; > unsigned id; > > switch (name) { > case TGSI_SEMANTIC_GENERIC: > /* don't process indices the function > can't handle */ > if (index >= SI_MAX_IO_GENERIC) > break; > /* fall through */ > - case TGSI_SEMANTIC_CLIPDIST: > + default: > id = > si_shader_io_get_unique_index(name, index); > sel->outputs_written &= ~(1ull << id); > break; > case TGSI_SEMANTIC_POSITION: /* ignore these > */ > case TGSI_SEMANTIC_PSIZE: > case TGSI_SEMANTIC_CLIPVERTEX: > case TGSI_SEMANTIC_EDGEFLAG: > break; > - default: > - id = > si_shader_io_get_unique_index2(name, index); > - sel->outputs_written2 &= ~(1u << id); > } > } > } > } > > /* Pre-compilation. */ > if (sscreen->b.debug_flags & DBG_PRECOMPILE) { > struct si_shader_ctx_state state = {sel}; > struct si_shader_key key; > > @@ -1996,32 +1988,27 @@ static void *si_create_shader_selector(struct > pipe_context *ctx, > case TGSI_SEMANTIC_PATCH: > sel->patch_outputs_written |= > 1llu << > si_shader_io_get_unique_index_patch(name, index); > break; > > case TGSI_SEMANTIC_GENERIC: > /* don't process indices the function can't > handle */ > if (index >= SI_MAX_IO_GENERIC) > break; > /* fall through */ > - case TGSI_SEMANTIC_POSITION: > - case TGSI_SEMANTIC_PSIZE: > - case TGSI_SEMANTIC_CLIPDIST: > + default: > sel->outputs_written |= > 1llu << > si_shader_io_get_unique_index(name, index); > break; > case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */ > case TGSI_SEMANTIC_EDGEFLAG: > break; > - default: > - sel->outputs_written2 |= > - 1u << > si_shader_io_get_unique_index2(name, index); > } > } > sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * > 16; > > /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank > * conflicts, i.e. each vertex will start at a different bank. > */ > if (sctx->b.chip_class >= GFX9) > sel->esgs_itemsize += 4; > break; > @@ -2030,29 +2017,26 @@ static void *si_create_shader_selector(struct > pipe_context *ctx, > for (i = 0; i < sel->info.num_inputs; i++) { > unsigned name = sel->info.input_semantic_name[i]; > unsigned index = sel->info.input_semantic_index[i]; > > switch (name) { > case TGSI_SEMANTIC_GENERIC: > /* don't process indices the function can't > handle */ > if (index >= SI_MAX_IO_GENERIC) > break; > /* fall through */ > - case TGSI_SEMANTIC_CLIPDIST: > + default: > sel->inputs_read |= > 1llu << > si_shader_io_get_unique_index(name, index); > break; > case TGSI_SEMANTIC_PCOORD: /* ignore this */ > break; > - default: > - sel->inputs_read2 |= > - 1u << > si_shader_io_get_unique_index2(name, index); > } > } > > for (i = 0; i < 8; i++) > if (sel->info.colors_written & (1 << i)) > sel->colors_written_4bit |= 0xf << (4 * i); > > for (i = 0; i < sel->info.num_inputs; i++) { > if (sel->info.input_semantic_name[i] == > TGSI_SEMANTIC_COLOR) { > int index = sel->info.input_semantic_index[i]; > -- > 2.9.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev