[Mesa-dev] [PATCH 6/9] i965: Use 'available' fence for polling query results

Chris Wilson Tue, 02 Oct 2018 11:09:01 -0700

If we always write the 'available' flag after writing the final result
of the query, we can probe that predicate to quickly query whether the
result is ready from userspace. The primary advantage of checking the
predicate is that it allows for more fine-grained queries, we do not
have to wait for the batch to finish before the query is marked as
ready.


We still do check the status of the batch after probing the query so
that if the worst happens and the batch did hang without completing the
query, we do not spin forever (although it is not as nice as completely
eliminating the ioctl, the busy-ioctl is lightweight!).

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenn...@whitecape.org>
Cc: Matt Turner <matts...@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_context.h   |  4 +-
 src/mesa/drivers/dri/i965/gen6_queryobj.c | 54 ++++++++++-------------
 2 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 840332294e6..418941c9194 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -468,8 +468,8 @@ struct brw_query_object {
    bool flushed;
 };
 
-#define GEN6_QUERY_PREDICATE (2)
-#define GEN6_QUERY_RESULTS (0)
+#define GEN6_QUERY_PREDICATE (0)
+#define GEN6_QUERY_RESULTS (1)
 
 static inline unsigned
 gen6_query_predicate_offset(const struct brw_query_object *query)
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index dc70e2a568a..b6832588333 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -40,8 +40,7 @@
 #include "intel_buffer_objects.h"
 
 static inline void
-set_query_availability(struct brw_context *brw, struct brw_query_object *query,
-                       bool available)
+set_query_available(struct brw_context *brw, struct brw_query_object *query)
 {
    /* For platforms that support ARB_query_buffer_object, we write the
     * query availability for "pipelined" queries.
@@ -58,22 +57,12 @@ set_query_availability(struct brw_context *brw, struct 
brw_query_object *query,
     * PIPE_CONTROL with an immediate write will synchronize with
     * those earlier writes, so we write 1 when the value has landed.
     */
-   if (brw->ctx.Extensions.ARB_query_buffer_object &&
-       brw_is_query_pipelined(query)) {
-      unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 
-      if (available) {
-         /* Order available *after* the query results. */
-         flags |= PIPE_CONTROL_FLUSH_ENABLE;
-      } else {
-         /* Make it unavailable *before* any pipelined reads. */
-         flags |= PIPE_CONTROL_CS_STALL;
-      }
-
-      brw_emit_pipe_control_write(brw, flags,
-                                  query->bo, 
gen6_query_predicate_offset(query),
-                                  available);
-   }
+   brw_emit_pipe_control_write(brw,
+                               PIPE_CONTROL_WRITE_IMMEDIATE |
+                               PIPE_CONTROL_FLUSH_ENABLE,
+                               query->bo, gen6_query_predicate_offset(query),
+                               true);
 }
 
 static void
@@ -144,12 +133,12 @@ write_xfb_overflow_streams(struct gl_context *ctx,
 }
 
 static bool
-check_xfb_overflow_streams(uint64_t *results, int count)
+check_xfb_overflow_streams(const uint64_t *results, int count)
 {
    bool overflow = false;
 
    for (int i = 0; i < count; i++) {
-      uint64_t *result_i = &results[4 * i];
+      const uint64_t *result_i = &results[4 * i];
 
       if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) {
          overflow = true;
@@ -221,7 +210,8 @@ emit_pipeline_stat(struct brw_context *brw, struct brw_bo 
*bo,
  */
 static void
 gen6_queryobj_get_results(struct gl_context *ctx,
-                          struct brw_query_object *query)
+                          struct brw_query_object *query,
+                          uint64_t *results)
 {
    struct brw_context *brw = brw_context(ctx);
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
@@ -229,9 +219,6 @@ gen6_queryobj_get_results(struct gl_context *ctx,
    if (query->bo == NULL)
       return;
 
-   brw_bo_wait_rendering(query->bo);
-   uint64_t *results = query->results;
-
    switch (query->Base.Target) {
    case GL_TIME_ELAPSED:
       /* The query BO contains the starting and ending timestamps.
@@ -329,10 +316,10 @@ gen6_alloc_query(struct brw_context *brw, struct 
brw_query_object *query)
 
    query->results = brw_bo_map(brw, query->bo,
                                MAP_COHERENT | MAP_PERSISTENT |
-                               MAP_READ | MAP_ASYNC);
+                               MAP_READ | MAP_WRITE);
 
    /* For ARB_query_buffer_object: The result is not available */
-   set_query_availability(brw, query, false);
+   query->results[GEN6_QUERY_PREDICATE] = false;
 
    return 0;
 }
@@ -487,7 +474,7 @@ gen6_end_query(struct gl_context *ctx, struct 
gl_query_object *q)
    query->flushed = false;
 
    /* For ARB_query_buffer_object: The result is now available */
-   set_query_availability(brw, query, true);
+   set_query_available(brw, query);
 }
 
 /**
@@ -523,7 +510,11 @@ static void gen6_wait_query(struct gl_context *ctx, struct 
gl_query_object *q)
     */
    flush_batch_if_needed(brw, query);
 
-   gen6_queryobj_get_results(ctx, query);
+   uint64_t *results = query->results;
+   if (!results[GEN6_QUERY_PREDICATE]) /* not yet available, must wait */
+      brw_bo_wait_rendering(query->bo);
+
+   gen6_queryobj_get_results(ctx, query, results + GEN6_QUERY_RESULTS);
 }
 
 /**
@@ -552,9 +543,10 @@ static void gen6_check_query(struct gl_context *ctx, 
struct gl_query_object *q)
     */
    flush_batch_if_needed(brw, query);
 
-   if (!brw_bo_busy(query->bo)) {
-      gen6_queryobj_get_results(ctx, query);
-   }
+   uint64_t *results = query->results;
+   if (results[GEN6_QUERY_PREDICATE] || /* already available, can read async */
+       !brw_bo_map_busy(query->bo, MAP_READ)) /* GPU hang, results lost? */
+      gen6_queryobj_get_results(ctx, query, results + GEN6_QUERY_RESULTS);
 }
 
 static void
@@ -565,7 +557,7 @@ gen6_query_counter(struct gl_context *ctx, struct 
gl_query_object *q)
    const int idx = gen6_alloc_query(brw, query) + GEN6_QUERY_RESULTS;
 
    brw_write_timestamp(brw, query->bo, idx);
-   set_query_availability(brw, query, true);
+   set_query_available(brw, query);
 
    query->flushed = false;
 }
-- 
2.19.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/9] i965: Use 'available' fence for polling query results

Reply via email to