On 08/27/2012 10:49 AM, Eric Anholt wrote: > Saves 26.5MB of wasted memory allocation in the l4d2 demo. > --- > src/mesa/drivers/dri/i965/brw_context.h | 15 ++++++++--- > src/mesa/drivers/dri/i965/brw_fs.cpp | 2 -- > src/mesa/drivers/dri/i965/brw_state_cache.c | 7 +++++ > src/mesa/drivers/dri/i965/brw_wm.c | 38 > +++++++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_wm.h | 1 + > 5 files changed, 58 insertions(+), 5 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 15ef0a3..c1cd500 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -313,11 +313,11 @@ struct brw_wm_prog_data { > */ > uint32_t barycentric_interp_modes; > > - /* Pointer to tracked values (only valid once > + /* Pointers to tracked values (only valid once > * _mesa_load_state_parameters has been called at runtime). > */ > - const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ > - const float *pull_param[MAX_UNIFORMS * 4]; > + const float **param; > + const float **pull_param; > }; > > /** > @@ -619,6 +619,7 @@ struct brw_cache_item { > }; > > > +typedef void (*cache_aux_free_func)(const void *aux); > > struct brw_cache { > struct brw_context *brw; > @@ -629,6 +630,14 @@ struct brw_cache { > > uint32_t next_offset; > bool bo_used_by_gpu; > + > + /** > + * Functions used in determining whether the prog_data for a new cache > item > + * matches an existing cache item. This is sometimes done by memcmp, but > + * for some cache ids that would mean statically sizing arrays instead of > + * using nice pointers. > + */
This comment makes no sense to me. Your newly introduced function is a helper to free arrays. I don't see any changes to how key comparisons are done (memcmp or something else). I'm kind of scared that this comment implies that memcmp doesn't work but I don't see a change to not use memcmp. > + cache_aux_free_func aux_free[BRW_MAX_CACHE]; > }; > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index f5b2649..35ed3be 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -528,8 +528,6 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type > *type) > for (unsigned int i = 0; i < type->vector_elements; i++) { > unsigned int param = c->prog_data.nr_params++; > > - assert(param < ARRAY_SIZE(c->prog_data.param)); > - > this->param_index[param] = loc; > this->param_offset[param] = i; > } > diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c > b/src/mesa/drivers/dri/i965/brw_state_cache.c > index 57a5ee9..092baf3 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_cache.c > +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c > @@ -47,6 +47,7 @@ > #include "main/imports.h" > #include "intel_batchbuffer.h" > #include "brw_state.h" > +#include "brw_wm.h" > > #define FILE_DEBUG_FLAG DEBUG_STATE > > @@ -333,6 +334,8 @@ brw_init_caches(struct brw_context *brw) > cache->bo = drm_intel_bo_alloc(intel->bufmgr, > "program cache", > 4096, 64); > + > + cache->aux_free[BRW_WM_PROG] = brw_wm_prog_data_free; > } > > static void > @@ -347,6 +350,10 @@ brw_clear_cache(struct brw_context *brw, struct > brw_cache *cache) > for (i = 0; i < cache->size; i++) { > for (c = cache->items[i]; c; c = next) { > next = c->next; > + if (cache->aux_free[c->cache_id]) { > + const void *item_aux = c->key + c->key_size; > + cache->aux_free[c->cache_id](item_aux); > + } > free((void *)c->key); > free(c); > } > diff --git a/src/mesa/drivers/dri/i965/brw_wm.c > b/src/mesa/drivers/dri/i965/brw_wm.c > index e7ef9f2..1ec8264 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm.c > +++ b/src/mesa/drivers/dri/i965/brw_wm.c > @@ -253,6 +253,15 @@ brw_wm_payload_setup(struct brw_context *brw, > } > } > > +void > +brw_wm_prog_data_free(const void *in_prog_data) > +{ > + const struct brw_wm_prog_data *prog_data = in_prog_data; > + > + ralloc_free((void *)prog_data->param); > + ralloc_free((void *)prog_data->pull_param); > +} > + > /** > * All Mesa program -> GPU code generation goes through this function. > * Depending on the instructions used (i.e. flow control instructions) > @@ -266,8 +275,12 @@ bool do_wm_prog(struct brw_context *brw, > struct intel_context *intel = &brw->intel; > struct brw_wm_compile *c; > const GLuint *program; > + struct gl_shader *fs = NULL; > GLuint program_size; > > + if (prog) > + fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; > + > c = brw->wm.compile_data; > if (c == NULL) { > brw->wm.compile_data = rzalloc(NULL, struct brw_wm_compile); > @@ -290,6 +303,31 @@ bool do_wm_prog(struct brw_context *brw, > c->vreg = vreg; > c->refs = refs; > } > + > + /* Allocate the references to the uniforms that will end up in the > + * prog_data associated with the compiled program, and which will be freed > + * by the state cache. > + */ > + if (fs) { > + int param_count = fs->num_uniform_components; > + /* The backend also sometimes adds params for texture size. */ > + param_count += 2 * BRW_MAX_TEX_UNIT; > + > + c->prog_data.param = rzalloc_array(c, const float *, param_count); > + c->prog_data.pull_param = rzalloc_array(c, const float *, param_count); > + } else { > + /* brw_wm_pass0.c will also add references to 0.0 and 1.0 which are > + * uploaded as push parameters. > + */ > + int param_count = (fp->program.Base.Parameters->NumParameters + 2) * 4; > + /* The old backend also uploads constants using push parameters, so we > + * can't just rely on the NumParameters set up by . > + */ > + c->prog_data.param = rzalloc_array(c, const float *, param_count); Your sentence here is unfinished, but it sounds like you're saying we need to include constants, and I sure don't see that in the code (other than 0.0 and 1.0). Don't you need to solve that? A totally lame solution would be to use MAX_UNIFORMS for the count when using the old backend. That gives the benefit for anything not using ARB programs... > + /* The old backend never does pull constants. */ > + c->prog_data.pull_param = NULL; > + } > + > memcpy(&c->key, key, sizeof(*key)); > > c->fp = fp; > diff --git a/src/mesa/drivers/dri/i965/brw_wm.h > b/src/mesa/drivers/dri/i965/brw_wm.h > index 5e4af27..1de1690 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm.h > +++ b/src/mesa/drivers/dri/i965/brw_wm.h > @@ -484,5 +484,6 @@ bool do_wm_prog(struct brw_context *brw, > void brw_wm_debug_recompile(struct brw_context *brw, > struct gl_shader_program *prog, > const struct brw_wm_prog_key *key); > +void brw_wm_prog_data_free(const void *in_prog_data); > > #endif _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev