----- Original Message ----- > From: Roland Scheidegger <srol...@vmware.com> > > In 1d35f77228ad540a551a8e09e062b764a6e31f5e support for multiple constant > buffers was introduced. This meant we had another indirection, and we did > resolve the indirection for each constant buffer access. This looks very > reasonable since llvm can figure out if it's the same pointer, however it > turns out that this can cause llvm compilation time to go through the roof > and beyond (I've seen cases in excess of factor 100, e.g. from 50 ms to more > than 10 seconds (!)), with all the additional time spent in IR optimization > passes (and in the end all of it in DominatorTree::dominate()). > I've been unable to narrow it down a bit more (only some shaders seem > affected, > seemingly without much correlation to overall shader complexity or constant > usage) but it is easily avoidable by doing the buffer lookups themeselves > just > once (at constant buffer declaration time). > --- > src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 2 + > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 100 > +++++++++++++++--------- > 2 files changed, 65 insertions(+), 37 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > index ffd6e87..88ac3c9 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > @@ -437,6 +437,8 @@ struct lp_build_tgsi_soa_context > > LLVMValueRef consts_ptr; > LLVMValueRef const_sizes_ptr; > + LLVMValueRef consts[LP_MAX_TGSI_CONST_BUFFERS]; > + LLVMValueRef consts_sizes[LP_MAX_TGSI_CONST_BUFFERS]; > const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; > LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS]; > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > index 2b47fc2..0eaa020 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > @@ -1213,7 +1213,6 @@ emit_fetch_constant( > LLVMBuilderRef builder = gallivm->builder; > struct lp_build_context *uint_bld = &bld_base->uint_bld; > unsigned dimension = 0; > - LLVMValueRef dimension_index; > LLVMValueRef consts_ptr; > LLVMValueRef num_consts; > LLVMValueRef res; > @@ -1227,11 +1226,8 @@ emit_fetch_constant( > assert(dimension < LP_MAX_TGSI_CONST_BUFFERS); > } > > - dimension_index = lp_build_const_int32(gallivm, dimension); > - consts_ptr = > - lp_build_array_get(gallivm, bld->consts_ptr, dimension_index); > - num_consts = > - lp_build_array_get(gallivm, bld->const_sizes_ptr, dimension_index); > + consts_ptr = bld->consts[dimension]; > + num_consts = bld->consts_sizes[dimension]; > > if (reg->Register.Indirect) { > LLVMValueRef indirect_index; > @@ -2677,56 +2673,86 @@ lp_emit_declaration_soa( > const unsigned last = decl->Range.Last; > unsigned idx, i; > > - for (idx = first; idx <= last; ++idx) { > - assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); > - switch (decl->Declaration.File) { > - case TGSI_FILE_TEMPORARY: > - if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { > - assert(idx < LP_MAX_INLINED_TEMPS); > + assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); > + > + switch (decl->Declaration.File) { > + case TGSI_FILE_TEMPORARY: > + if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { > + assert(last < LP_MAX_INLINED_TEMPS); > + for (idx = first; idx <= last; ++idx) { > for (i = 0; i < TGSI_NUM_CHANNELS; i++) > bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, > "temp"); > } > - break; > + } > + break; > > - case TGSI_FILE_OUTPUT: > - if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { > + case TGSI_FILE_OUTPUT: > + if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { > + for (idx = first; idx <= last; ++idx) { > for (i = 0; i < TGSI_NUM_CHANNELS; i++) > bld->outputs[idx][i] = lp_build_alloca(gallivm, > vec_type, "output"); > } > - break; > + } > + break; > > - case TGSI_FILE_ADDRESS: > - /* ADDR registers are only allocated with an integer LLVM IR type, > - * as they are guaranteed to always have integers. > - * XXX: Not sure if this exception is worthwhile (or the whole idea of > - * an ADDR register for that matter). > - */ > + case TGSI_FILE_ADDRESS: > + /* ADDR registers are only allocated with an integer LLVM IR type, > + * as they are guaranteed to always have integers. > + * XXX: Not sure if this exception is worthwhile (or the whole idea of > + * an ADDR register for that matter). > + */ > + assert(last < LP_MAX_TGSI_ADDRS); > + for (idx = first; idx <= last; ++idx) { > assert(idx < LP_MAX_TGSI_ADDRS); > for (i = 0; i < TGSI_NUM_CHANNELS; i++) > bld->addr[idx][i] = lp_build_alloca(gallivm, > bld_base->base.int_vec_type, "addr"); > - break; > + } > + break; > > - case TGSI_FILE_PREDICATE: > - assert(idx < LP_MAX_TGSI_PREDS); > + case TGSI_FILE_PREDICATE: > + assert(last < LP_MAX_TGSI_PREDS); > + for (idx = first; idx <= last; ++idx) { > for (i = 0; i < TGSI_NUM_CHANNELS; i++) > bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, > "predicate"); > - break; > + } > + break; > > - case TGSI_FILE_SAMPLER_VIEW: > - /* > - * The target stored here MUST match whatever there actually > - * is in the set sampler views (what about return type?). > - */ > - assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS); > + case TGSI_FILE_SAMPLER_VIEW: > + /* > + * The target stored here MUST match whatever there actually > + * is in the set sampler views (what about return type?). > + */ > + assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); > + for (idx = first; idx <= last; ++idx) { > bld->sv[idx] = decl->SamplerView; > - break; > - > - default: > - /* don't need to declare other vars */ > - break; > } > + break; > + > + case TGSI_FILE_CONSTANT: > + { > + /* > + * We could trivially fetch the per-buffer pointer when fetching the > + * constant, relying on llvm to figure out it's always the same > pointer > + * anyway. However, doing so results in a huge (more than factor of > 10) > + * slowdown in llvm compilation times for some (but not all) shaders > + * (more specifically, the IR optimization spends way more time in > + * DominatorTree::dominates). At least with llvm versions 3.1, 3.3. > + */ > + unsigned idx2D = decl->Dim.Index2D; > + LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D); > + assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS); > + bld->consts[idx2D] = > + lp_build_array_get(gallivm, bld->consts_ptr, index2D); > + bld->consts_sizes[idx2D] = > + lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D); > + } > + break; > + > + default: > + /* don't need to declare other vars */ > + break; > } > }
Looks good. Reviewed-by: Jose Fonseca <jfons...@vmware.com> I wonder if there are other bits from the state that we should putting in variables at the top code block. Jose _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev