--- src/mesa/drivers/dri/i965/brw_context.h | 44 +++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/gen7_blorp.cpp | 8 +++++ src/mesa/drivers/dri/i965/gen7_gs_state.c | 18 +++++++++++ src/mesa/drivers/dri/i965/gen7_urb.c | 24 ++------------- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 38 +++++++++++++++++++++++ src/mesa/drivers/dri/i965/intel_batchbuffer.h | 1 + 6 files changed, 111 insertions(+), 22 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index bec4d6b..9e96e1d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -862,6 +862,44 @@ struct intel_sync_object { drm_intel_bo *bo; }; + +/** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > Geometry + * > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in the + * 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + * + * This enum keeps track of the information we need to determine when a flush + * needs to happen. + */ +enum ivbgt2_gs_flush_workaround_state { + + /** + * The pipeline has been flushed since the last 3DSTATE_GS, so no further + * flushing is needed right now. + */ + IVBGT2_GS_FLUSH_UNNECESSARY, + + /** + * Since the last flush, primitives have been rendered with GS enabled. So + * a flush will be needed next time we want to disable the GS. + */ + IVBGT2_GS_FLUSH_NEEDED_BEFORE_DISABLE, + + /** + * Since the last flush, primitives have been rendered with GS disabled. + * So a flush will be needed next time we want to enable the GS. + */ + IVBGT2_GS_FLUSH_NEEDED_BEFORE_ENABLE, +}; + + struct intel_batchbuffer { /** Current batchbuffer being queued up. */ drm_intel_bo *bo; @@ -871,6 +909,12 @@ struct intel_batchbuffer { drm_intel_bo *workaround_bo; bool need_workaround_flush; + /** + * For Ivy Bridge GT2, state machine to determine whether a flush is needed + * before changing GS enable. Undefined for all other platforms. + */ + enum ivbgt2_gs_flush_workaround_state ivbgt2_gs_flush_needed; + struct cached_batch_item *cached_items; uint16_t emit, total; diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 71f31b7..accb93e 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -402,6 +402,14 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw, OUT_BATCH(0); ADVANCE_BATCH(); + if (!brw->is_haswell && brw->gt == 2) { + if (brw->batch.ivbgt2_gs_flush_needed == + IVBGT2_GS_FLUSH_NEEDED_BEFORE_DISABLE) { + gen7_emit_cs_stall_flush(brw); + } + brw->batch.ivbgt2_gs_flush_needed = IVBGT2_GS_FLUSH_NEEDED_BEFORE_ENABLE; + } + BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 2602200..c2f036e 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -81,6 +81,15 @@ upload_gs_state(struct brw_context *brw) gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); if (active) { + if (!brw->is_haswell && brw->gt == 2) { + if (brw->batch.ivbgt2_gs_flush_needed == + IVBGT2_GS_FLUSH_NEEDED_BEFORE_ENABLE) { + gen7_emit_cs_stall_flush(brw); + } + brw->batch.ivbgt2_gs_flush_needed = + IVBGT2_GS_FLUSH_NEEDED_BEFORE_DISABLE; + } + BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(stage_state->prog_offset); @@ -159,6 +168,15 @@ upload_gs_state(struct brw_context *brw) OUT_BATCH(dw6); ADVANCE_BATCH(); } else { + if (!brw->is_haswell && brw->gt == 2) { + if (brw->batch.ivbgt2_gs_flush_needed == + IVBGT2_GS_FLUSH_NEEDED_BEFORE_DISABLE) { + gen7_emit_cs_stall_flush(brw); + } + brw->batch.ivbgt2_gs_flush_needed = + IVBGT2_GS_FLUSH_NEEDED_BEFORE_ENABLE; + } + BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); /* prog_bo */ diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 6dcdfe4..c638586 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -122,28 +122,8 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, * * No such restriction exists for Haswell. */ - if (!brw->is_haswell) { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); - /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20] - * CS Stall): - * - * One of the following must also be set: - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1) - * - Stall at Pixel Scoreboard ([1] of DW1) - * - Depth Stall ([13] of DW1) - * - Post-Sync Operation ([13] of DW1) - * - * We choose to do a Post-Sync Operation (Write Immediate Data), since - * it seems like it will incur the least additional performance penalty. - */ - OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_RELOC(brw->batch.workaround_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } + if (!brw->is_haswell) + gen7_emit_cs_stall_flush(brw); } const struct brw_tracked_state gen7_push_constant_space = { diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 6d1ae79..66d0c96 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -194,6 +194,11 @@ brw_new_batch(struct brw_context *brw) */ brw->batch.need_workaround_flush = true; + /* The kernel flushes the pipeline between batches, so no GS workaround + * flush is necessary. + */ + brw->batch.ivbgt2_gs_flush_needed = IVBGT2_GS_FLUSH_UNNECESSARY; + brw->state_batch_count = 0; brw->ib.type = -1; @@ -510,6 +515,37 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw) ADVANCE_BATCH(); } + +/** + * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set. + */ +void +gen7_emit_cs_stall_flush(struct brw_context *brw) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20] + * CS Stall): + * + * One of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * + * We choose to do a Post-Sync Operation (Write Immediate Data), since + * it seems like it will incur the least additional performance penalty. + */ + OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_RELOC(brw->batch.workaround_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(0); + ADVANCE_BATCH(); + brw->batch.ivbgt2_gs_flush_needed = IVBGT2_GS_FLUSH_UNNECESSARY; +} + + /** * Emits a PIPE_CONTROL with a non-zero post-sync operation, for * implementing two workarounds on gen6. From section 1.4.7.1 @@ -560,6 +596,7 @@ intel_emit_post_sync_nonzero_flush(struct brw_context *brw) OUT_BATCH(0); /* address */ OUT_BATCH(0); /* write data */ ADVANCE_BATCH(); + brw->batch.ivbgt2_gs_flush_needed = IVBGT2_GS_FLUSH_UNNECESSARY; BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); @@ -612,6 +649,7 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw) OUT_BATCH(0); /* write address */ OUT_BATCH(0); /* write data */ ADVANCE_BATCH(); + brw->batch.ivbgt2_gs_flush_needed = IVBGT2_GS_FLUSH_UNNECESSARY; } } else { BEGIN_BATCH(4); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index d46f48e..cabbb69 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -59,6 +59,7 @@ void intel_batchbuffer_emit_mi_flush(struct brw_context *brw); void intel_emit_post_sync_nonzero_flush(struct brw_context *brw); void intel_emit_depth_stall_flushes(struct brw_context *brw); void gen7_emit_vs_workaround_flush(struct brw_context *brw); +void gen7_emit_cs_stall_flush(struct brw_context *brw); static INLINE uint32_t float_as_int(float f) { -- 1.8.4.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev