The drivers was setting MOCS (Memory Object Control State) to 0 for all objects. This patch sets it as following: renderbuffer, depthbuffer => LLC uncacheable, L3 cacheable texture, stencil, hiz => LLC cacheable, L3 cacheable
The goal here is to avoid blowing out the LLC with too-large buffers. Performance: Haswell Harris Beach GT3 Android 4.2.2 kernel based on 3.8-4fc7c97 GLBenchmark 2.5.1 Egypt HD C24Z16 Offscreen DXT1 +32.0309% +/- 0.775397%, n = 5, 95% confidence GLBenchmark 2.7 T-Rex HD C24Z16 Offscreen Fixed timestep ETC1 +20.2435% +/- 0.821163%, n = 5, 95% confidence CC: Stéphane Marchesin <marc...@chromium.org> CC: Kenneth Graunke <kenn...@whitecape.org> CC: Eric Anholt <e...@anholt.net> CC: Matt Turner <matts...@gmail.com> Signed-off-by: Chad Versace <chad.vers...@linux.intel.com> --- src/mesa/drivers/dri/i965/brw_context.c | 31 +++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_context.h | 13 ++++++++++ src/mesa/drivers/dri/i965/gen7_blorp.cpp | 14 +++++++--- src/mesa/drivers/dri/i965/gen7_misc_state.c | 16 ++++++++++-- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 7 +++-- 5 files changed, 74 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 4650553..edcf59d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -418,3 +418,34 @@ brwCreateContext(int api, return true; } +/** + * Get the region's Memory Object Control State. + */ +uint32_t +brw_get_mocs(struct brw_context *brw, + struct intel_region *region, + enum brw_mocs_usage usage) +{ + struct intel_context *intel = &brw->intel; + uint32_t mocs = 0; + + if (intel->is_haswell) { + /* This heuristic is dumb: it considers the buffer's usage, but not its + * size. A more intelligent heuristic may give us better performance. + */ + switch (usage) { + case BRW_MOCS_USAGE_RB_SURFACE: + case BRW_MOCS_USAGE_DEPTH: + /* These surfaces are usually so large they blow out the LLC. */ + mocs = HSW_MOCS_LCC_UNCACHEABLE | HSW_MOCS_L3_CACHEABLE; + break; + case BRW_MOCS_USAGE_TEX_SURFACE: + case BRW_MOCS_USAGE_HIZ: + case BRW_MOCS_USAGE_STENCIL: + mocs = HSW_MOCS_LCC_WB_TO_ALL | HSW_MOCS_L3_CACHEABLE; + break; + } + } + + return mocs; +} diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c682501..172d36d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1371,6 +1371,19 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, uint32_t width, uint32_t height, uint32_t tile_x, uint32_t tile_y); +enum brw_mocs_usage { + BRW_MOCS_USAGE_RB_SURFACE, + BRW_MOCS_USAGE_TEX_SURFACE, + BRW_MOCS_USAGE_DEPTH, + BRW_MOCS_USAGE_STENCIL, + BRW_MOCS_USAGE_HIZ, +}; + +uint32_t +brw_get_mocs(struct brw_context *brw, + struct intel_region *region, + enum brw_mocs_usage usage); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 1c23866..48e98dc 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -151,6 +151,9 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, */ struct intel_region *region = surface->mt->region; uint32_t tile_x, tile_y; + uint32_t mocs = brw_get_mocs(brw, region, is_render_target + ? BRW_MOCS_USAGE_RB_SURFACE + : BRW_MOCS_USAGE_TEX_SURFACE); uint32_t tiling = surface->map_stencil_as_y_tiled ? I915_TILING_Y : region->tiling; @@ -183,7 +186,8 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, assert(tile_x % 4 == 0); assert(tile_y % 2 == 0); surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) | - SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET); + SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) | + SET_FIELD(mocs, GEN7_SURFACE_MOCS); surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) | SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT); @@ -598,6 +602,8 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, intel_region_get_aligned_offset(params->depth.mt->region, draw_x & ~tile_mask_x, draw_y & ~tile_mask_y, false); + uint32_t depth_mocs = brw_get_mocs(brw, params->depth.mt->region, + BRW_MOCS_USAGE_DEPTH); /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth @@ -634,7 +640,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, offset); OUT_BATCH((params->depth.width + tile_x - 1) << 4 | (params->depth.height + tile_y - 1) << 18); - OUT_BATCH(0); + OUT_BATCH(depth_mocs); OUT_BATCH(tile_x | tile_y << 16); OUT_BATCH(0); @@ -648,10 +654,12 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, intel_region_get_aligned_offset(hiz_region, draw_x & ~tile_mask_x, (draw_y & ~tile_mask_y) / 2, false); + uint32_t hiz_mocs = brw_get_mocs(brw, hiz_region, BRW_MOCS_USAGE_HIZ); BEGIN_BATCH(3); OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); - OUT_BATCH(hiz_region->pitch - 1); + OUT_BATCH((hiz_mocs << 25) | + (hiz_region->pitch - 1)); OUT_RELOC(hiz_region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, hiz_offset); diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c index 12b752c..7e358ae 100644 --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c @@ -41,6 +41,11 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; + uint32_t depth_mocs = 0; + + if (depth_mt) { + depth_mocs = brw_get_mocs(brw, depth_mt->region, BRW_MOCS_USAGE_DEPTH); + } intel_emit_depth_stall_flushes(intel); @@ -64,7 +69,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, OUT_BATCH(((width + tile_x - 1) << 4) | ((height + tile_y - 1) << 18)); - OUT_BATCH(0); + OUT_BATCH(depth_mocs); OUT_BATCH(tile_x | (tile_y << 16)); OUT_BATCH(0); ADVANCE_BATCH(); @@ -77,9 +82,13 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, ADVANCE_BATCH(); } else { struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt; + uint32_t hiz_mocs = brw_get_mocs(brw, hiz_mt->region, + BRW_MOCS_USAGE_HIZ); + BEGIN_BATCH(3); OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); - OUT_BATCH(hiz_mt->region->pitch - 1); + OUT_BATCH((hiz_mocs << 25) | + (hiz_mt->region->pitch - 1)); OUT_RELOC(hiz_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, @@ -95,6 +104,8 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, ADVANCE_BATCH(); } else { const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0; + uint32_t stencil_mocs = brw_get_mocs(brw, stencil_mt->region, + BRW_MOCS_USAGE_STENCIL); BEGIN_BATCH(3); OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); @@ -113,6 +124,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, * indicate that it does. */ OUT_BATCH(enabled | + (stencil_mocs << 25) | (2 * stencil_mt->region->pitch - 1)); OUT_RELOC(stencil_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 435f9dc..34d5e68 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -294,6 +294,7 @@ gen7_update_texture_surface(struct gl_context *ctx, struct intel_mipmap_tree *mt = intelObj->mt; struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); + uint32_t mocs = brw_get_mocs(brw, mt->region, BRW_MOCS_USAGE_TEX_SURFACE); int width, height, depth; uint32_t tile_x, tile_y; @@ -347,6 +348,7 @@ gen7_update_texture_surface(struct gl_context *ctx, */ surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | + SET_FIELD(mocs, GEN7_SURFACE_MOCS) | /* mip count */ (intelObj->_MaxLevel - tObj->BaseLevel)); @@ -532,7 +534,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, uint32_t format; /* _NEW_BUFFERS */ gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); - + uint32_t mocs = brw_get_mocs(brw, region, BRW_MOCS_USAGE_RB_SURFACE); uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, &brw->wm.surf_offset[unit]); memset(surf, 0, 8 * 4); @@ -569,7 +571,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, assert(tile_x % 4 == 0); assert(tile_y % 2 == 0); surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) | - SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET); + SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) | + SET_FIELD(mocs, GEN7_SURFACE_MOCS); surf[2] = SET_FIELD(rb->Width - 1, GEN7_SURFACE_WIDTH) | SET_FIELD(rb->Height - 1, GEN7_SURFACE_HEIGHT); -- 1.8.1.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev