[Mesa-dev] [PATCH 37/51] i965: Refactor aperture testing and restarting

Chris Wilson Tue, 10 Jan 2017 13:29:01 -0800

Refactor the aperture test, roll back and retry logic to a common idiom.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 src/mesa/drivers/dri/i965/brw_batch.h         | 18 ++++++++
 src/mesa/drivers/dri/i965/brw_compute.c       | 36 +++-------------
 src/mesa/drivers/dri/i965/brw_draw.c          | 33 ++++-----------
 src/mesa/drivers/dri/i965/genX_blorp_exec.c   | 49 ++++------------------
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 59 +++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/intel_blit.c        | 49 +++++++---------------
 6 files changed, 112 insertions(+), 132 deletions(-)


diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index 3899f18f83..95cdbca4fd 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -31,6 +31,8 @@
 extern "C" {
 #endif
 
+#include <setjmp.h>
+
 #include <intel_bufmgr.h>
 
 #include "util/list.h"
@@ -64,6 +66,9 @@ typedef struct brw_batch {
    bool state_base_address_emitted;
    int gen;
 
+   jmp_buf jmpbuf;
+   bool repeat;
+   unsigned begin_count;
    bool no_batch_wrap;
 
    struct {
@@ -275,6 +280,19 @@ intel_batchbuffer_space(struct brw_batch *batch)
       - USED_BATCH(batch)*4;
 }
 
+int __brw_batch_begin(struct brw_batch *batch,
+                      const int sz_bytes,
+                      enum brw_gpu_ring ring);
+#define brw_batch_begin(batch, sz, ring) ({                             \
+   int __ret = 0;                                                       \
+   if ((batch)->begin_count++ == 0) {                                   \
+      __ret = __brw_batch_begin((batch), (sz), (ring));                 \
+      if (__ret == 0)                                                   \
+         __ret = setjmp((batch)->jmpbuf);                               \
+   }                                                                    \
+   __ret; })
+void brw_batch_end(struct brw_batch *batch);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
b/src/mesa/drivers/dri/i965/brw_compute.c
index 4e76817661..da10c6fba6 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -178,7 +178,7 @@ brw_dispatch_compute_common(struct gl_context *ctx)
 {
    struct brw_context *brw = brw_context(ctx);
    int estimated_buffer_space_needed;
-   bool fail_next = false;
+   int ret;
 
    if (!_mesa_check_conditional_render(ctx))
       return;
@@ -196,40 +196,16 @@ brw_dispatch_compute_common(struct gl_context *ctx)
    estimated_buffer_space_needed += 1024; /* push constants */
    estimated_buffer_space_needed += 512; /* misc. pad */
 
-   /* Flush the batch if it's approaching full, so that we don't wrap while
-    * we've got validated state that needs to be in the same batch as the
-    * primitives.
-    */
-   intel_batchbuffer_require_space(&brw->batch, estimated_buffer_space_needed,
-                                   RENDER_RING);
-   intel_batchbuffer_save_state(&brw->batch);
+   ret = brw_batch_begin(&brw->batch, estimated_buffer_space_needed,
+                         RENDER_RING);
+   if (ret < 0)
+      return;
 
- retry:
-   brw->batch.no_batch_wrap = true;
    brw_upload_compute_state(brw);
 
    brw_emit_gpgpu_walker(brw);
 
-   brw->batch.no_batch_wrap = false;
-
-   if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
-      if (!fail_next) {
-         intel_batchbuffer_reset_to_saved(&brw->batch);
-         brw_batch_flush(&brw->batch, NULL);
-         fail_next = true;
-         goto retry;
-      } else {
-         if (brw_batch_flush(&brw->batch, NULL) == -ENOSPC) {
-            static bool warned = false;
-
-            if (!warned) {
-               fprintf(stderr, "i965: Single compute shader dispatch "
-                       "exceeded available aperture space\n");
-               warned = true;
-            }
-         }
-      }
-   }
+   brw_batch_end(&brw->batch);
 
    /* Now that we know we haven't run out of aperture space, we can safely
     * reset the dirty bits.
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index afbee5167b..e2007774d7 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -442,7 +442,6 @@ brw_try_draw_prims(struct gl_context *ctx,
 {
    struct brw_context *brw = brw_context(ctx);
    GLuint i;
-   bool fail_next = false;
 
    if (ctx->NewState)
       _mesa_update_state(ctx);
@@ -495,6 +494,7 @@ brw_try_draw_prims(struct gl_context *ctx,
    for (i = 0; i < nr_prims; i++) {
       int estimated_max_prim_size;
       const int sampler_state_size = 16;
+      int ret;
 
       estimated_max_prim_size = 512; /* batchbuffer commands */
       estimated_max_prim_size += BRW_MAX_TEX_UNIT *
@@ -507,9 +507,9 @@ brw_try_draw_prims(struct gl_context *ctx,
        * we've got validated state that needs to be in the same batch as the
        * primitives.
        */
-      intel_batchbuffer_require_space(&brw->batch,
-                                      estimated_max_prim_size, RENDER_RING);
-      intel_batchbuffer_save_state(&brw->batch);
+      ret = brw_batch_begin(&brw->batch, estimated_max_prim_size, RENDER_RING);
+      if (ret < 0)
+         break;
 
       if (brw->num_instances != prims[i].num_instances ||
           brw->basevertex != prims[i].basevertex ||
@@ -581,35 +581,16 @@ brw_try_draw_prims(struct gl_context *ctx,
       else
          gen6_set_prim(brw, &prims[i]);
 
-retry:
-
       /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, 
and
        * that the state updated in the loop outside of this block is that in
-       * *_set_prim or brw_batch_flush(), which only impacts
-       * brw->ctx.NewDriverState.
+       * *_set_prim, which only impacts brw->ctx.NewDriverState.
        */
-      if (brw->ctx.NewDriverState) {
-         brw->batch.no_batch_wrap = true;
+      if (brw->ctx.NewDriverState)
          brw_upload_render_state(brw);
-      }
 
       brw_emit_prim(brw, &prims[i], brw->primitive, xfb_obj, stream);
 
-      brw->batch.no_batch_wrap = false;
-
-      if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
-        if (!fail_next) {
-            intel_batchbuffer_reset_to_saved(&brw->batch);
-            brw_batch_flush(&brw->batch, NULL);
-           fail_next = true;
-           goto retry;
-        } else {
-            int ret = brw_batch_flush(&brw->batch, NULL);
-            WARN_ONCE(ret == -ENOSPC,
-                      "i965: Single primitive emit exceeded "
-                      "available aperture space\n");
-         }
-      }
+      brw_batch_end(&brw->batch);
 
       /* Now that we know we haven't run out of aperture space, we can safely
        * reset the dirty bits.
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c 
b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 6b75a3b727..58a891f383 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -157,9 +157,10 @@ genX(blorp_exec)(struct blorp_batch *batch,
 {
    assert(batch->blorp->driver_ctx == batch->driver_batch);
    struct brw_context *brw = batch->driver_batch;
-   struct gl_context *ctx = &brw->ctx;
    const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1800 : 1500;
-   bool check_aperture_failed_once = false;
+
+   if (brw_batch_begin(&brw->batch, estimated_max_batch_usage, RENDER_RING) < 
0)
+      return;
 
    /* Flush the sampler and render caches.  We definitely need to flush the
     * sampler cache so that we get updated contents from the render cache for
@@ -172,14 +173,6 @@ genX(blorp_exec)(struct blorp_batch *batch,
 
    brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
 
-retry:
-   intel_batchbuffer_require_space(&brw->batch,
-                                   estimated_max_batch_usage, RENDER_RING);
-   intel_batchbuffer_save_state(&brw->batch);
-   brw_bo *saved_bo = brw->batch.bo;
-   uint32_t saved_used = USED_BATCH(&brw->batch);
-   uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;
-
 #if GEN_GEN == 6
    /* Emit workaround flushes when we switch from drawing to blorping. */
    brw_emit_post_sync_nonzero_flush(brw);
@@ -203,35 +196,12 @@ retry:
 
    blorp_exec(batch, params);
 
-   /* Make sure we didn't wrap the batch unintentionally, and make sure we
-    * reserved enough space that a wrap will never happen.
-    */
-   assert(brw->batch.bo == saved_bo);
-   assert((USED_BATCH(brw->batch) - saved_used) * 4 +
-          (saved_state_batch_offset - brw->batch.state_batch_offset) <
-          estimated_max_batch_usage);
-   /* Shut up compiler warnings on release build */
-   (void)saved_bo;
-   (void)saved_used;
-   (void)saved_state_batch_offset;
-
-   /* Check if the blorp op we just did would make our batch likely to fail to
-    * map all the BOs into the GPU at batch exec time later.  If so, flush the
-    * batch and try again with nothing else in the batch.
+   /* Flush the sampler cache so any texturing from the destination is
+    * coherent.
     */
-   if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
-      if (!check_aperture_failed_once) {
-         check_aperture_failed_once = true;
-         intel_batchbuffer_reset_to_saved(&brw->batch);
-        brw_batch_flush(&brw->batch, NULL);
-         goto retry;
-      } else {
-         int ret = brw_batch_flush(&brw->batch, NULL);
-         WARN_ONCE(ret == -ENOSPC,
-                   "i965: blorp emit exceeded available aperture space\n");
-      }
-   }
+   brw_emit_mi_flush(brw);
 
+   brw_batch_end(&brw->batch);
    brw_batch_maybe_flush(&brw->batch);
 
    /* We've smashed all state compared to what the normal 3D pipeline
@@ -240,9 +210,4 @@ retry:
    brw->ctx.NewDriverState |= BRW_NEW_BLORP;
    brw->no_depth_or_stencil = false;
    brw->ib.type = -1;
-
-   /* Flush the sampler cache so any texturing from the destination is
-    * coherent.
-    */
-   brw_emit_mi_flush(brw);
 }
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index e0ba259e5e..4c5a640aff 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -348,7 +348,9 @@ brw_batch_flush(struct brw_batch *batch, struct perf_debug 
*info)
 
    brw->batch.reserved_space = 0;
 
+   brw->batch.begin_count++;
    brw_batch_finish_hook(&brw->batch);
+   brw->batch.begin_count--;
 
    /* Mark the end of the buffer. */
    intel_batchbuffer_emit_dword(&brw->batch, MI_BATCH_BUFFER_END);
@@ -431,3 +433,60 @@ intel_batchbuffer_data(struct brw_context *brw,
    memcpy(brw->batch.map_next, data, bytes);
    brw->batch.map_next += bytes >> 2;
 }
+
+int __brw_batch_begin(struct brw_batch *batch,
+                      const int sz_bytes,
+                      enum brw_gpu_ring ring)
+{
+   assert(batch->begin_count == 1);
+   assert(!batch->repeat);
+
+   intel_batchbuffer_require_space(batch, sz_bytes, ring);
+   intel_batchbuffer_save_state(batch);
+
+   assert(!batch->no_batch_wrap);
+   batch->no_batch_wrap = true;
+
+   return 0;
+}
+
+void brw_batch_end(struct brw_batch *batch)
+{
+   assert(batch->begin_count);
+   if (--batch->begin_count)
+      return;
+
+   assert(batch->no_batch_wrap);
+   batch->no_batch_wrap = false;
+
+   if (dri_bufmgr_check_aperture_space(&batch->bo, 1)) {
+      if (!batch->repeat) {
+         enum brw_gpu_ring ring = batch->ring;
+
+         intel_batchbuffer_reset_to_saved(batch);
+         brw_batch_flush(batch, NULL);
+
+         batch->begin_count++;
+         batch->no_batch_wrap = true;
+
+         batch->ring = ring;
+         batch->repeat = true;
+         longjmp(batch->jmpbuf, 1);
+      }
+
+      if (unlikely(brw_batch_flush(batch, NULL) == -ENOSPC)) {
+         static GLuint msg_id;
+         if (!msg_id) {
+            struct brw_context *brw = container_of(batch, brw, batch);
+            fprintf(stderr, "WARNING: Aperture space exceeded!\n");
+            _mesa_gl_debug(&brw->ctx, &msg_id,
+                           MESA_DEBUG_SOURCE_API,
+                           MESA_DEBUG_TYPE_OTHER,
+                           MESA_DEBUG_SEVERITY_HIGH,
+                           "Aperture space exceeded!\n");
+         }
+      }
+   }
+
+   batch->repeat = false;
+}
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c 
b/src/mesa/drivers/dri/i965/intel_blit.c
index 8c832ac249..6208cc5e18 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -591,10 +591,9 @@ intelEmitCopyBlit(struct brw_context *brw,
                   GLshort w, GLshort h,
                   GLenum logic_op)
 {
-   GLuint CMD, BR13, pass = 0;
+   GLuint CMD, BR13;
    int dst_y2 = dst_y + h;
    int dst_x2 = dst_x + w;
-   brw_bo *aper_array[3];
    bool dst_y_tiled = dst_tiling == I915_TILING_Y;
    bool src_y_tiled = src_tiling == I915_TILING_Y;
    bool use_fast_copy_blit = false;
@@ -604,25 +603,8 @@ intelEmitCopyBlit(struct brw_context *brw,
    if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
       return false;
 
-   /* do space check before going any further */
-   do {
-       aper_array[0] = brw->batch.bo;
-       aper_array[1] = dst_buffer;
-       aper_array[2] = src_buffer;
-
-       if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
-           brw_batch_flush(&brw->batch, NULL);
-           pass++;
-       } else
-           break;
-   } while (pass < 2);
-
-   if (pass >= 2)
-      return false;
-
-   unsigned length = brw->gen >= 8 ? 10 : 8;
+   const unsigned length = brw->gen >= 8 ? 10 : 8;
 
-   intel_batchbuffer_require_space(&brw->batch, length * 4, BLT_RING);
    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
        __func__,
        src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -732,6 +714,9 @@ intelEmitCopyBlit(struct brw_context *brw,
    assert(dst_x < dst_x2);
    assert(dst_y < dst_y2);
 
+   if (brw_batch_begin(&brw->batch, 100, BLT_RING) < 0)
+      return false;
+
    BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled);
    OUT_BATCH(CMD | (length - 2));
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
@@ -762,6 +747,7 @@ intelEmitCopyBlit(struct brw_context *brw,
 
    brw_emit_mi_flush(brw);
 
+   brw_batch_end(&brw->batch);
    return true;
 }
 
@@ -799,10 +785,6 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
        dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
 
    unsigned xy_setup_blt_length = brw->gen >= 8 ? 10 : 8;
-   intel_batchbuffer_require_space(&brw->batch,
-                                   (xy_setup_blt_length * 4) +
-                                   (3 * 4) + dwords * 4, BLT_RING);
-
    opcode = XY_SETUP_BLT_CMD;
    if (cpp == 4)
       opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
@@ -818,6 +800,9 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
    if (dst_tiling != I915_TILING_NONE)
       blit_cmd |= XY_DST_TILED;
 
+   if (brw_batch_begin(&brw->batch, 60 + 4*dwords, BLT_RING) < 0)
+      return false;
+
    BEGIN_BATCH_BLT(xy_setup_blt_length + 3);
    OUT_BATCH(opcode | (xy_setup_blt_length - 2));
    OUT_BATCH(br13);
@@ -847,6 +832,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
 
    brw_emit_mi_flush(brw);
 
+   brw_batch_end(&brw->batch);
    return true;
 }
 
@@ -921,7 +907,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
 {
    uint32_t BR13, CMD;
    int pitch, cpp;
-   brw_bo *aper_array[2];
 
    pitch = mt->pitch;
    cpp = mt->cpp;
@@ -939,16 +924,10 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
    }
    BR13 |= pitch;
 
-   /* do space check before going any further */
-   aper_array[0] = brw->batch.bo;
-   aper_array[1] = mt->bo;
+   if (brw_batch_begin(&brw->batch, 80, BLT_RING) < 0)
+      return;
 
-   if (drm_intel_bufmgr_check_aperture_space(aper_array,
-                                            ARRAY_SIZE(aper_array)) != 0) {
-      brw_batch_flush(&brw->batch, NULL);
-   }
-
-   unsigned length = brw->gen >= 8 ? 7 : 6;
+   const unsigned length = brw->gen >= 8 ? 7 : 6;
    bool dst_y_tiled = mt->tiling == I915_TILING_Y;
 
    /* We need to split the blit into chunks that each fit within the blitter's
@@ -991,4 +970,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
    }
 
    brw_emit_mi_flush(brw);
+
+   brw_batch_end(&brw->batch);
 }
-- 
2.11.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 37/51] i965: Refactor aperture testing and restarting

Reply via email to