Re: [Mesa-dev] [PATCH 4/4] i965: Use unsynchronized maps for the program cache on LLC platforms.

Kenneth Graunke Fri, 26 Sep 2014 14:19:31 -0700

On Friday, September 26, 2014 09:22:31 AM Kristian Høgsberg wrote:
> On Fri, Aug 29, 2014 at 11:10:50PM -0700, Kenneth Graunke wrote:
> > There's no reason to stall on pwrite - the CPU always appends to the
> > buffer and never modifies existing contents, and the GPU never writes
> > it.  Further, the CPU always appends new data before submitting a batch
> > that requires it.
> > 
> > This code predates the unsynchronized mapping feature, so we simply
> > didn't have the option when it was written.
> > 
> > Ideally, we would do this for non-LLC platforms too, but unsynchronized
> > mapping support only exists for LLC systems.
> > 
> > Saves repeated 0.001ms stalls on program upload.
> > 
> > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
> > ---
> >  src/mesa/drivers/dri/i965/brw_state_cache.c | 34 
> > +++++++++++++++++++++++------
> >  1 file changed, 27 insertions(+), 7 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c 
> > b/src/mesa/drivers/dri/i965/brw_state_cache.c
> > index b9bb0fc..1d2d32f 100644
> > --- a/src/mesa/drivers/dri/i965/brw_state_cache.c
> > +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
> > @@ -172,14 +172,23 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t 
> > new_size)
> >     drm_intel_bo *new_bo;
> >  
> >     new_bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", new_size, 64);
> > +   if (brw->has_llc)
> > +      drm_intel_gem_bo_map_unsynchronized(new_bo);
> >  
> >     /* Copy any existing data that needs to be saved. */
> >     if (cache->next_offset != 0) {
> > -      brw_bo_map(brw, cache->bo, false, "program cache");
> > -      drm_intel_bo_subdata(new_bo, 0, cache->next_offset, 
> > cache->bo->virtual);
> > -      drm_intel_bo_unmap(cache->bo);
> > +      if (brw->has_llc) {
> > +         memcpy(new_bo->virtual, cache->bo->virtual, cache->next_offset);
> 
> Move the drm_intel_gem_bo_map_unsynchronized() and drm_intel_bo_unmap()
> calls into this block so they bracket the memcpy as for the subdata case
> below?
> 
> Other than that,
> 
> Reviewed-by: Kristian Høgsberg <k...@bitplanet.net>


That won't work---the point is to map new_bo, and leave it mapped...and unmap 
the old BO before throwing it away.  If I moved the map call into the if 
(cache->next_offset != 0) block, then the initial mapping would never occur.

> > +      } else {
> > +         brw_bo_map(brw, cache->bo, false, "program cache");
> > +         drm_intel_bo_subdata(new_bo, 0, cache->next_offset,
> > +                              cache->bo->virtual);
> > +         drm_intel_bo_unmap(cache->bo);
> > +      }
> >     }
> >  
> > +   if (brw->has_llc)
> > +      drm_intel_bo_unmap(cache->bo);
> >     drm_intel_bo_unreference(cache->bo);
> >     cache->bo = new_bo;
> >     cache->bo_used_by_gpu = false;
> > @@ -222,9 +231,11 @@ brw_try_upload_using_copy(struct brw_cache *cache,
> >         continue;
> >      }
> >  
> > -    brw_bo_map(brw, cache->bo, false, "program cache");
> > +         if (!brw->has_llc)
> > +            brw_bo_map(brw, cache->bo, false, "program cache");
> >      ret = memcmp(cache->bo->virtual + item->offset, data, item->size);
> > -    drm_intel_bo_unmap(cache->bo);
> > +         if (!brw->has_llc)
> > +            drm_intel_bo_unmap(cache->bo);
> >      if (ret)
> >         continue;
> >  
> > @@ -257,7 +268,7 @@ brw_upload_item_data(struct brw_cache *cache,
> >     /* If we would block on writing to an in-use program BO, just
> >      * recreate it.
> >      */
> > -   if (cache->bo_used_by_gpu) {
> > +   if (!brw->has_llc && cache->bo_used_by_gpu) {
> >        perf_debug("Copying busy program cache buffer.\n");
> >        brw_cache_new_bo(cache, cache->bo->size);
> >     }
> > @@ -280,6 +291,7 @@ brw_upload_cache(struct brw_cache *cache,
> >              uint32_t *out_offset,
> >              void *out_aux)
> >  {
> > +   struct brw_context *brw = cache->brw;
> >     struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
> >     GLuint hash;
> >     void *tmp;
> > @@ -320,7 +332,11 @@ brw_upload_cache(struct brw_cache *cache,
> >     cache->n_items++;
> >  
> >     /* Copy data to the buffer */
> > -   drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
> > +   if (brw->has_llc) {
> > +      memcpy((char *) cache->bo->virtual + item->offset, data, data_size);
> > +   } else {
> > +      drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
> > +   }
> >  
> >     *out_offset = item->offset;
> >     *(void **)out_aux = (void *)((char *)item->key + item->key_size);
> > @@ -342,6 +358,8 @@ brw_init_caches(struct brw_context *brw)
> >     cache->bo = drm_intel_bo_alloc(brw->bufmgr,
> >                               "program cache",
> >                               4096, 64);
> > +   if (brw->has_llc)
> > +      drm_intel_gem_bo_map_unsynchronized(cache->bo);
> >  
> >     cache->aux_compare[BRW_VS_PROG] = brw_vs_prog_data_compare;
> >     cache->aux_compare[BRW_GS_PROG] = brw_gs_prog_data_compare;
> > @@ -408,6 +426,8 @@ brw_destroy_cache(struct brw_context *brw, struct 
> > brw_cache *cache)
> >  
> >     DBG("%s\n", __FUNCTION__);
> >  
> > +   if (brw->has_llc)
> > +      drm_intel_bo_unmap(cache->bo);
> >     drm_intel_bo_unreference(cache->bo);
> >     cache->bo = NULL;
> >     brw_clear_cache(brw, cache);
>

signature.asc
Description: This is a digitally signed message part.

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] i965: Use unsynchronized maps for the program cache on LLC platforms.

Reply via email to