Set MOCS to L3_CACHEABLE for the following packets, during the normal draw upload path and during blorp: 3DSTATE_CONSTANT_PS 3DSTATE_CONSTANT_VS 3DSTATE_DEPTH_BUFFER 3DSTATE_HIER_DEPTH_BUFFER 3DSTATE_STENCIL_BUFFER 3DSTATE_VERTEX_BUFFERS CMD_STATE_BASE_ADDRESS SURFACE_STATE
It's not possible to set the MOCS of the MCS buffer, because the MCS inherits MOCS from the parent surface. Gives +5.68% on Xonotic 1920x1080 on Haswell GT2. Performance Measurements ======================== system-info { gpu: haswell_m_gt2 0x0416 rev05 arch: x86_64 kernel: 3.9.9-1-ARCH (Archlinux) xf86-video-intel: 2.12.11-1 (Archlinux) libdrm: 2.4.46-2 (Archlinux) } test { name: pts/xonotic-1.3.1 1920x1080 quality=high delta: +5.68152% +/- 0.653452% (student's t) at 95% confidence n: 3 pooled-s: 0.288297 mesa-base: master-00d32cd } CC: Kenneth Graunke <kenn...@whitecape.org> Signed-off-by: Chad Versace <chad.vers...@linux.intel.com> --- src/mesa/drivers/dri/i965/brw_draw_upload.c | 3 +++ src/mesa/drivers/dri/i965/brw_misc_state.c | 11 +++++++- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 14 +++++++++- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 32 ++++++++++++++++++++--- src/mesa/drivers/dri/i965/gen7_misc_state.c | 13 +++++++-- src/mesa/drivers/dri/i965/gen7_vs_state.c | 10 ++++++- src/mesa/drivers/dri/i965/gen7_wm_state.c | 10 ++++++- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 18 ++++++++++++- 8 files changed, 100 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 2952027..039c299 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -658,6 +658,9 @@ static void brw_emit_vertices(struct brw_context *brw) if (brw->gen >= 7) dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; + if (brw->is_haswell) + dw0 |= HSW_MOCS_L3_CACHEABLE << 16; + OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); if (brw->gen >= 5) { diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 0ab1e76..3811870 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -1023,13 +1023,22 @@ static void upload_state_base_address( struct brw_context *brw ) */ if (brw->gen >= 6) { + uint32_t mocs; + + if (brw->is_haswell) { + mocs = HSW_MOCS_L3_CACHEABLE; + } else { + mocs = 0; + } + if (brw->gen == 6) intel_emit_post_sync_nonzero_flush(brw); BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); /* General state base address: stateless DP read/write requests */ - OUT_BATCH(1); + OUT_BATCH(mocs << 8 | + 1 /*general state base address modify enable*/); /* Surface state base address: * BINDING_TABLE_STATE * SURFACE_STATE diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index 8056bf5..2a1e765 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -74,9 +74,18 @@ void gen6_blorp_emit_state_base_address(struct brw_context *brw, const brw_blorp_params *params) { + uint32_t mocs; + + if (brw->is_haswell) { + mocs = HSW_MOCS_L3_CACHEABLE; + } else { + mocs = 0; + } + BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); - OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */ + OUT_BATCH(mocs << 8 | + 1 /* GeneralStateBaseAddressModifyEnable */); /* SurfaceStateBaseAddress */ OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); /* DynamicStateBaseAddress */ @@ -163,6 +172,9 @@ gen6_blorp_emit_vertices(struct brw_context *brw, if (brw->gen >= 7) dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; + if (brw->is_haswell) + dw0 |= HSW_MOCS_L3_CACHEABLE << 16; + BEGIN_BATCH(batch_length); OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); OUT_BATCH(dw0); diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index fbdd2be..6b5d0be 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -143,6 +143,7 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, */ struct intel_region *region = surface->mt->region; uint32_t tile_x, tile_y; + uint32_t mocs; uint32_t tiling = surface->map_stencil_as_y_tiled ? I915_TILING_Y : region->tiling; @@ -151,6 +152,12 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, &wm_surf_offset); memset(surf, 0, 8 * 4); + if (brw->is_haswell) { + mocs = HSW_MOCS_L3_CACHEABLE; + } else { + mocs = 0; + } + surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT | gen7_surface_tiling_mode(tiling); @@ -175,7 +182,8 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, assert(tile_x % 4 == 0); assert(tile_y % 2 == 0); surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) | - SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET); + SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) | + SET_FIELD(mocs, GEN7_SURFACE_MOCS); surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) | SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT); @@ -614,6 +622,8 @@ gen7_blorp_emit_constant_ps(struct brw_context *brw, const brw_blorp_params *params, uint32_t wm_push_const_offset) { + uint32_t mocs; + /* Make sure the push constants fill an exact integer number of * registers. */ @@ -622,13 +632,19 @@ gen7_blorp_emit_constant_ps(struct brw_context *brw, /* There must be at least one register worth of push constant data. */ assert(BRW_BLORP_NUM_PUSH_CONST_REGS > 0); + if (brw->is_haswell) { + mocs = HSW_MOCS_L3_CACHEABLE; + } else { + mocs = 0; + } + /* Enable push constant buffer 0. */ BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2)); OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS); OUT_BATCH(0); - OUT_BATCH(wm_push_const_offset); + OUT_BATCH(wm_push_const_offset | mocs); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -658,6 +674,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, uint32_t draw_x = params->depth.x_offset; uint32_t draw_y = params->depth.y_offset; uint32_t tile_mask_x, tile_mask_y; + uint32_t mocs; brw_get_depthstencil_tile_masks(params->depth.mt, params->depth.level, @@ -665,6 +682,12 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, NULL, &tile_mask_x, &tile_mask_y); + if (brw->is_haswell) { + mocs = HSW_MOCS_L3_CACHEABLE; + } else { + mocs = 0; + } + /* 3DSTATE_DEPTH_BUFFER */ { uint32_t tile_x = draw_x & tile_mask_x; @@ -709,7 +732,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, offset); OUT_BATCH((params->depth.width + tile_x - 1) << 4 | (params->depth.height + tile_y - 1) << 18); - OUT_BATCH(0); + OUT_BATCH(mocs); OUT_BATCH(tile_x | tile_y << 16); OUT_BATCH(0); @@ -726,7 +749,8 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, BEGIN_BATCH(3); OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); - OUT_BATCH(hiz_region->pitch - 1); + OUT_BATCH((mocs << 25) | + (hiz_region->pitch - 1)); OUT_RELOC(hiz_region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, hiz_offset); diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c index 7f61881..31e80f5 100644 --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c @@ -40,6 +40,13 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, uint32_t tile_x, uint32_t tile_y) { struct gl_context *ctx = &brw->ctx; + uint32_t mocs; + + if (brw->is_haswell) { + mocs = HSW_MOCS_L3_CACHEABLE; + } else { + mocs = 0; + } intel_emit_depth_stall_flushes(brw); @@ -63,7 +70,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, OUT_BATCH(((width + tile_x - 1) << 4) | ((height + tile_y - 1) << 18)); - OUT_BATCH(0); + OUT_BATCH(mocs); OUT_BATCH(tile_x | (tile_y << 16)); OUT_BATCH(0); ADVANCE_BATCH(); @@ -78,7 +85,8 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt; BEGIN_BATCH(3); OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); - OUT_BATCH(hiz_mt->region->pitch - 1); + OUT_BATCH((mocs << 25) | + (hiz_mt->region->pitch - 1)); OUT_RELOC(hiz_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, @@ -108,6 +116,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, * same text, and experiments indicate that this is necessary. */ OUT_BATCH(enabled | + mocs << 25 | (2 * stencil_mt->region->pitch - 1)); OUT_RELOC(stencil_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 7369a9c..c1ccc62 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -63,6 +63,14 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { + uint32_t mocs; + + if (brw->is_haswell) { + mocs = HSW_MOCS_L3_CACHEABLE; + } else { + mocs = 0; + } + BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2)); OUT_BATCH(brw->vs.push_const_size); @@ -70,7 +78,7 @@ upload_vs_state(struct brw_context *brw) /* Pointer to the VS constant buffer. Covered by the set of * state flags from gen6_prepare_wm_contants */ - OUT_BATCH(brw->vs.push_const_offset); + OUT_BATCH(brw->vs.push_const_offset | mocs); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 8f90371..46cf795 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -141,6 +141,14 @@ upload_ps_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { + uint32_t mocs; + + if (brw->is_haswell) { + mocs = HSW_MOCS_L3_CACHEABLE; + } else { + mocs = 0; + } + BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2)); @@ -150,7 +158,7 @@ upload_ps_state(struct brw_context *brw) /* Pointer to the WM constant buffer. Covered by the set of * state flags from gen6_upload_wm_push_constants. */ - OUT_BATCH(brw->wm.push_const_offset); + OUT_BATCH(brw->wm.push_const_offset | mocs); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index fe97803..322b650 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -286,6 +286,7 @@ gen7_update_texture_surface(struct gl_context *ctx, struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); uint32_t tile_x, tile_y; + uint32_t mocs; if (tObj->Target == GL_TEXTURE_BUFFER) { gen7_update_buffer_texture_surface(ctx, unit, binding_table, surf_index); @@ -301,6 +302,12 @@ gen7_update_texture_surface(struct gl_context *ctx, tObj->DepthMode, sampler->sRGBDecode); + if (brw->is_haswell) { + mocs = HSW_MOCS_L3_CACHEABLE; + } else { + mocs = 0; + } + surf[0] = translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | tex_format << BRW_SURFACE_FORMAT_SHIFT | gen7_surface_tiling_mode(mt->region->tiling) | @@ -334,6 +341,7 @@ gen7_update_texture_surface(struct gl_context *ctx, */ surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | + SET_FIELD(mocs, GEN7_SURFACE_MOCS) | /* mip count */ (intelObj->_MaxLevel - tObj->BaseLevel)); @@ -512,6 +520,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, bool is_array = false; int depth = MAX2(rb->Depth, 1); int min_array_element; + uint32_t mocs; GLenum gl_target = rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D; @@ -552,6 +561,12 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, min_array_element = irb->mt_layer; } + if (brw->is_haswell) { + mocs = HSW_MOCS_L3_CACHEABLE; + } else { + mocs = 0; + } + surf[0] = surftype << BRW_SURFACE_TYPE_SHIFT | format << BRW_SURFACE_FORMAT_SHIFT | (irb->mt->array_spacing_lod0 ? GEN7_SURFACE_ARYSPC_LOD0 @@ -571,7 +586,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, assert(brw->has_surface_tile_offset); - surf[5] = irb->mt_level - irb->mt->first_level; + surf[5] = SET_FIELD(mocs, GEN7_SURFACE_MOCS) | + (irb->mt_level - irb->mt->first_level); surf[2] = SET_FIELD(irb->mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) | SET_FIELD(irb->mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT); -- 1.8.3.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev