On Tue, Oct 24, 2017 at 9:06 AM, Chris Wilson <ch...@chris-wilson.co.uk> wrote:
> Through the use of mocs, we can define the cache usage for any surface > used by the GPU. In particular, we can request that L3 cache be > allocated for either a read/write miss so that subsequent reads can be > fetched from cache rather than memory. A consequence of this is that if > we allocate a L3/LLC cacheline for a read and the object is changed in > main memory (e.g. a PCIe write bypassing the CPU) then the next read > will be serviced from the stale cache and not from the new data in > memory. This is an issue for external PRIME buffers where we may miss > the updates entirely if the image is small enough to fit within our > cache. > > Currently, we have a single bit to mark all external buffers so use that > to tell us when it is unsafe to use a cache override in mocs and > fallback to the PTE value instead (which should be set to the correct > cache level to be coherent amongst all active parties: PRIME, scanout and > render). This may be refined in future to limit the override to buffers > outside the control of mesa; as buffers being shared between mesa > clients should be able to coordinate themselves without resolves. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101691 > Cc: Kenneth Graunke <kenn...@whitecape.org> > Cc: Jason Ekstrand <ja...@jlekstrand.net> > Cc: Lyude Paul <ly...@redhat.com> > Cc: Timo Aalton <tjaal...@ubuntu.com> > Cc: Ben Widawsky <b...@bwidawsk.net> > Cc: Daniel Vetter <daniel.vet...@ffwll.ch> > --- > src/intel/blorp/blorp.c | 1 + > src/intel/blorp/blorp.h | 1 + > src/intel/blorp/blorp_genX_exec.h | 2 +- > src/intel/blorp/blorp_priv.h | 1 + > src/mesa/drivers/dri/i965/brw_blorp.c | 1 + > src/mesa/drivers/dri/i965/brw_state.h | 3 ++- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 16 +++++++++++----- > 7 files changed, 18 insertions(+), 7 deletions(-) > > diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c > index 7cc6335f2f..459ad66652 100644 > --- a/src/intel/blorp/blorp.c > +++ b/src/intel/blorp/blorp.c > @@ -71,6 +71,7 @@ brw_blorp_surface_info_init(struct blorp_context *blorp, > surf->surf->logical_level0_px.array_len)); > > info->enabled = true; > + info->external = surf->external; > > if (format == ISL_FORMAT_UNSUPPORTED) > format = surf->surf->format; > diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h > index 9716c66302..af056c9d52 100644 > --- a/src/intel/blorp/blorp.h > +++ b/src/intel/blorp/blorp.h > @@ -106,6 +106,7 @@ struct blorp_surf > enum isl_aux_usage aux_usage; > > union isl_color_value clear_color; > + bool external; > }; > > void > diff --git a/src/intel/blorp/blorp_genX_exec.h > b/src/intel/blorp/blorp_genX_exec.h > index 5389262098..18715788ff 100644 > --- a/src/intel/blorp/blorp_genX_exec.h > +++ b/src/intel/blorp/blorp_genX_exec.h > @@ -1328,7 +1328,7 @@ blorp_emit_surface_states(struct blorp_batch *batch, > blorp_emit_surface_state(batch, ¶ms->src, > surface_maps[BLORP_TEXTURE_BT_INDEX], > surface_offsets[BLORP_TEXTURE_ > BT_INDEX], > - NULL, false); > + NULL, params->src.external); > } > } > > diff --git a/src/intel/blorp/blorp_priv.h b/src/intel/blorp/blorp_priv.h > index c7d5d308da..f841aa7cdc 100644 > --- a/src/intel/blorp/blorp_priv.h > +++ b/src/intel/blorp/blorp_priv.h > @@ -47,6 +47,7 @@ enum { > struct brw_blorp_surface_info > { > bool enabled; > + bool external; > > struct isl_surf surf; > struct blorp_address addr; > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c > b/src/mesa/drivers/dri/i965/brw_blorp.c > index ed4f9870f2..563d13a037 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp.c > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c > @@ -160,6 +160,7 @@ blorp_surf_for_miptree(struct brw_context *brw, > .offset = mt->offset, > .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0, > }; > + surf->external = mt->bo->external; > > surf->aux_usage = aux_usage; > > diff --git a/src/mesa/drivers/dri/i965/brw_state.h > b/src/mesa/drivers/dri/i965/brw_state.h > index 8db354cf23..01c0cd12cb 100644 > --- a/src/mesa/drivers/dri/i965/brw_state.h > +++ b/src/mesa/drivers/dri/i965/brw_state.h > @@ -342,6 +342,7 @@ void gen10_init_atoms(struct brw_context *brw); > * may still respect that. > */ > #define GEN7_MOCS_L3 1 > +#define GEN7_MOCS_PTE 0 > > /* Ivybridge only: cache in LLC. > * Specifying zero here means to use the PTE values set by the kernel; > @@ -367,7 +368,7 @@ void gen10_init_atoms(struct brw_context *brw); > */ > #define BDW_MOCS_WB 0x78 > #define BDW_MOCS_WT 0x58 > -#define BDW_MOCS_PTE 0x18 > +#define BDW_MOCS_PTE 0x08 > Why are we throwing away L3 caching here and in gen7 above? We should be able to handle L3 caching with manual texture/render cache invalidates/flushes. Our performance is already going to be not great with linear buffers with no LLC, we shouldn't kill it completely. > /* Skylake: MOCS is now an index into an array of 62 different caching > * configurations programmed by the kernel. > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index f4e9cf48c6..50a3682efc 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -63,12 +63,17 @@ uint32_t tex_mocs[] = { > }; > > uint32_t rb_mocs[] = { > - [7] = GEN7_MOCS_L3, > + [7] = GEN7_MOCS_PTE, > [8] = BDW_MOCS_PTE, > [9] = SKL_MOCS_PTE, > [10] = CNL_MOCS_PTE, > }; > > +static inline uint32_t get_tex_mocs(struct brw_bo *bo, unsigned int gen) > +{ > + return (bo && bo->external ? rb_mocs : tex_mocs)[gen]; > +} > + > static void > get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt, > GLenum target, struct isl_view *view, > @@ -585,7 +590,7 @@ brw_update_texture_surface(struct gl_context *ctx, > aux_usage = ISL_AUX_USAGE_NONE; > > brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, > - tex_mocs[devinfo->gen], > + get_tex_mocs(mt->bo, devinfo->gen), > surf_offset, surf_index, > 0); > } > @@ -616,7 +621,7 @@ brw_emit_buffer_surface_state(struct brw_context *brw, > .size = buffer_size, > .format = surface_format, > .stride = pitch, > - .mocs = tex_mocs[devinfo->gen]); > + .mocs = get_tex_mocs(bo, devinfo->gen)); > } > > void > @@ -1106,7 +1111,7 @@ update_renderbuffer_read_surfaces(struct > brw_context *brw) > aux_usage = ISL_AUX_USAGE_NONE; > > brw_emit_surface_state(brw, irb->mt, target, view, aux_usage, > - tex_mocs[devinfo->gen], > + get_tex_mocs(irb->mt->bo, > devinfo->gen), > surf_offset, surf_index, > 0); > > @@ -1598,7 +1603,8 @@ update_image_surface(struct brw_context *brw, > > view.base_array_layer, > view.array_len)); > brw_emit_surface_state(brw, mt, mt->target, view, > - ISL_AUX_USAGE_NONE, > tex_mocs[devinfo->gen], > + ISL_AUX_USAGE_NONE, > + get_tex_mocs(mt->bo, devinfo->gen), > surf_offset, surf_index, > access == GL_READ_ONLY ? 0 : > RELOC_WRITE); > } > -- > 2.15.0.rc1 > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev