On Mon, Sep 07, 2015 at 10:15:56AM -0700, Kenneth Graunke wrote: > On Saturday, September 05, 2015 08:58:07 PM Chris Wilson wrote: > > On Sat, Sep 05, 2015 at 11:30:44AM -0700, Jordan Justen wrote: > > > From: Francisco Jerez <curroje...@riseup.net> > > > > > > Fixes > > > arb_shader_image_load_store/execution/load-from-cleared-image.shader_test > > > > > > Cc: Chris Wilson <ch...@chris-wilson.co.uk> > > > Cc: Jason Ekstrand <jason.ekstr...@intel.com> > > > Tested-by: Jordan Justen <jordan.l.jus...@intel.com> > > > --- > > > RE: i965: Perform an explicit flush after doing > > > _mesa_meta_pbo_TexSubImage > > > > > > curro has some concerns about potential perf impact by this and > > > wanted it to be checked on small-core w/CPU bound apps. > > > Unfortunately, he is on vacation now. > > > > One thing that may help (other than having a bitfield of valid > > ImageUnits being more cache friendly) is that intel_update_state() need > > only invalidate state. Moving the heavyweight resolves/flushes to just > > prior to running the pipeline should help reduce the profile of the > > fairly frequently called mesa_update_state(). > > -Chris > > _________________________________ > < welcome to recursive meta hell! > > --------------------------------- > \ / \ //\ > \ |\___/| / \// \\ > /0 0 \__ / // | \ \ > / / \/_/ // | \ \ > @_^_@'/ \/_ // | \ \ > //_^_/ \/_ // | \ \ > ( //) | \/// | \ \ > ( / /) _|_ / ) // | \ _\ > ( // /) '/,_ _ _/ ( ; -. | _ _\.-~ .-~~~^-. > (( / / )) ,-{ _ `-.|.-~-. .~ `. > (( // / )) '/\ / ~-. _ .-~ .-~^-. \ > (( /// )) `. { } / \ \ > (( / )) .----~-.\ \-' .~ \ `. \^-. > ///.----..> \ _ -~ `. ^-` ^-_ > ///-._ _ _ _ _ _ _}^ - - - - ~ ~-- ,.-~ > /.-~ > > That sounds reasonable, but fair warning: Kristian spent literally a month > trying to figure out how to make this work, and we tried a wide variety of > reasonable sounding plans that ended up falling apart. Awkward resolve > timings make for a world of nasty corner cases.
Solution I have is to add a new callback from vbo before vbo->draw_prims to avoid the recursion. Looking at it, I am pretty sure the current code will also recurse and fail miserable along some meta paths. Haven't checked non-i965 for sanity, so wip: diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 78cd269..580131d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -160,37 +160,12 @@ static void intel_update_state(struct gl_context * ctx, GLuint new_state) { struct brw_context *brw = brw_context(ctx); - struct intel_texture_object *tex_obj; - struct intel_renderbuffer *depth_irb; if (ctx->swrast_context) _swrast_InvalidateState(ctx, new_state); _vbo_InvalidateState(ctx, new_state); brw->NewGLState |= new_state; - - _mesa_unlock_context_textures(ctx); - - /* Resolve the depth buffer's HiZ buffer. */ - depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); - if (depth_irb) - intel_renderbuffer_resolve_hiz(brw, depth_irb); - - /* Resolve depth buffer and render cache of each enabled texture. */ - int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; - for (int i = 0; i <= maxEnabledUnit; i++) { - if (!ctx->Texture.Unit[i]._Current) - continue; - tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); - if (!tex_obj || !tex_obj->mt) - continue; - intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); - intel_miptree_resolve_color(brw, tex_obj->mt); - if (brw_check_dirty(tex_obj->mt->bo)) - brw->ctx.NewDriverState |= BRW_NEW_CACHE_FLUSH; - } - - _mesa_lock_context_textures(ctx); } #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 7a42403..bda251f 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -339,6 +339,38 @@ brw_merge_inputs(struct brw_context *brw, } } +static void +brw_draw_resolve(struct gl_context *ctx) +{ + struct brw_context *brw = brw_context(ctx); + struct intel_texture_object *tex_obj; + struct intel_renderbuffer *depth_irb; + + /* Resolve the depth buffer's HiZ buffer. */ + depth_irb = intel_get_renderbuffer(brw->ctx.DrawBuffer, BUFFER_DEPTH); + if (depth_irb) + intel_renderbuffer_resolve_hiz(brw, depth_irb); + + /* Resolve depth buffer and render cache of each enabled texture. */ + int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit; + for (int i = 0; i <= maxEnabledUnit; i++) { + if (!brw->ctx.Texture.Unit[i]._Current) + continue; + tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current); + if (!tex_obj || !tex_obj->mt) + continue; + intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); + intel_miptree_resolve_color(brw, tex_obj->mt); + if (tex_obj->mt->bo->dirty) + brw->ctx.NewDriverState |= BRW_NEW_CACHE_FLUSH; + } +} + + /** * \brief Call this after drawing to mark which buffers need resolving * @@ -592,6 +624,7 @@ brw_draw_init(struct brw_context *brw) /* Register our drawing function: */ + vbo->resolve = brw_draw_resolve; vbo->draw_prims = brw_draw_prims; for (int i = 0; i < VERT_ATTRIB_MAX; i++) diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 2aaff5d..b64c468 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -89,6 +89,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx, struct _glapi_table *exec); +typedef void (*vbo_resolve_func)( struct gl_context *ctx); typedef void (*vbo_draw_func)( struct gl_context *ctx, const struct _mesa_prim *prims, GLuint nr_prims, diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c index e3eb286..1f0b46a 100644 --- a/src/mesa/vbo/vbo_context.c +++ b/src/mesa/vbo/vbo_context.c @@ -148,11 +148,30 @@ static void init_mat_currval(struct gl_context *ctx) } +static void nop_resolve(struct gl_context *ctx) +{ +} + +static void nop_draw(struct gl_context *ctx, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, + struct gl_buffer_object *indirect) +{ +} + GLboolean _vbo_CreateContext( struct gl_context *ctx ) { struct vbo_context *vbo = CALLOC_STRUCT(vbo_context); ctx->vbo_context = vbo; + vbo->draw_prims = nop_draw; + vbo->resolve = nop_resolve; /* Initialize the arrayelt helper */ diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h index a376efe..c4033ee4 100644 --- a/src/mesa/vbo/vbo_context.h +++ b/src/mesa/vbo/vbo_context.h @@ -75,6 +75,7 @@ struct vbo_context { /* Callback into the driver. This must always succeed, the driver * is responsible for initiating any fallback actions required: */ + vbo_resolve_func resolve; vbo_draw_func draw_prims; }; diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 34d2c1d..d592ae4 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -549,6 +549,7 @@ vbo_bind_arrays(struct gl_context *ctx) struct vbo_context *vbo = vbo_context(ctx); struct vbo_exec_context *exec = &vbo->exec; + vbo->resolve(ctx); vbo_draw_method(vbo, DRAW_ARRAYS); if (exec->array.recalculate_inputs) { diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 2bfb0c3..dcbb9a5 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -388,11 +388,14 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped) if (exec->vtx.copied.nr != exec->vtx.vert_count) { struct gl_context *ctx = exec->ctx; + + vbo_context(ctx)->resolve(ctx); /* Before the update_state() as this may raise _NEW_VARYING_VP_INPUTS * from _mesa_set_varying_vp_inputs(). */ - vbo_exec_bind_arrays( ctx ); + vbo_draw_method(vbo_context(ctx), DRAW_BEGIN_END); + vbo_exec_bind_arrays( ctx ); if (ctx->NewState) _mesa_update_state( ctx ); diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index b1fd689..2103b8e 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -297,6 +297,8 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data) return; } + vbo_context(ctx)->resolve(ctx); + vbo_bind_vertex_list( ctx, node ); vbo_draw_method(vbo_context(ctx), DRAW_DISPLAY_LIST); -Chris -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev