We adjust the code to explicitly request flush of batches accessing
BOs they care about. Thanks to that, we can get rid of the implicit
serialization done in panfrost_batch_submit() and
panfrost_set_framebuffer_state(). Finally, panfrost_flush() is
changed to to flush all pending batches.

Signed-off-by: Boris Brezillon <boris.brezil...@collabora.com>
---
 src/gallium/drivers/panfrost/pan_compute.c  |   2 +-
 src/gallium/drivers/panfrost/pan_context.c  | 145 +++++++++++++-------
 src/gallium/drivers/panfrost/pan_job.c      |  15 +-
 src/gallium/drivers/panfrost/pan_resource.c |  26 ++--
 4 files changed, 115 insertions(+), 73 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_compute.c 
b/src/gallium/drivers/panfrost/pan_compute.c
index 4639c1b03c38..036dffbb17be 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -133,7 +133,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
         /* Queue the job */
         panfrost_scoreboard_queue_compute_job(batch, transfer);
 
-        panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+        panfrost_flush_all_batches(ctx, true);
 }
 
 void
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 02726e7cd349..993744a1ffd0 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -150,6 +150,28 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned 
vertex_count)
         return framebuffer;
 }
 
+static void
+panfrost_flush_fbo_deps(struct panfrost_context *ctx)
+{
+        struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
+        for (unsigned i = 0; i < fb->nr_cbufs; i++) {
+                if (!fb->cbufs[i])
+                        continue;
+
+                struct panfrost_resource *rsrc = 
pan_resource(fb->cbufs[i]->texture);
+
+                panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+                panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true);
+        }
+
+        if (fb->zsbuf) {
+                struct panfrost_resource *rsrc = 
pan_resource(fb->zsbuf->texture);
+
+                panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+                panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true);
+        }
+}
+
 static void
 panfrost_clear(
         struct pipe_context *pipe,
@@ -160,6 +182,7 @@ panfrost_clear(
         struct panfrost_context *ctx = pan_context(pipe);
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
+        panfrost_flush_fbo_deps(ctx);
         panfrost_batch_add_fbo_bos(batch);
         panfrost_batch_clear(batch, buffers, color, depth, stencil);
 }
@@ -1324,10 +1347,9 @@ panfrost_flush(
         unsigned flags)
 {
         struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
-        /* Submit the frame itself */
-        panfrost_batch_submit(batch);
+        /* Submit all pending jobs */
+        panfrost_flush_all_batches(ctx, false);
 
         if (fence) {
                 struct panfrost_fence *f = panfrost_fence_create(ctx);
@@ -1433,6 +1455,71 @@ panfrost_statistics_record(
         ctx->tf_prims_generated += prims;
 }
 
+static void
+panfrost_flush_draw_deps(struct panfrost_context *ctx, const struct 
pipe_draw_info *info)
+{
+       struct panfrost_resource *rsrc;
+
+        if (ctx->wallpaper_batch)
+                return;
+
+        panfrost_flush_fbo_deps(ctx);
+
+        for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
+                for (unsigned i = 0; i < ctx->sampler_view_count[stage]; i++) {
+                        struct panfrost_sampler_view *view = 
ctx->sampler_views[stage][i];
+
+                        if (!view)
+                                continue;
+
+                        rsrc = pan_resource(view->base.texture);
+                        panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+                }
+
+                for (unsigned i = 0; i < 32; i++) {
+                        if (!(ctx->ssbo_mask[stage] & (1 << i)))
+                                continue;
+
+                        rsrc = pan_resource(ctx->ssbo[stage][i].buffer);
+                        panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+                        panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true);
+                }
+        }
+
+        if (info->index_size && !info->has_user_indices) {
+                struct panfrost_resource *rsrc = 
pan_resource(info->index.resource);
+
+                panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+        }
+
+        for (unsigned i = 0; ctx->vertex && i < ctx->vertex->num_elements; 
i++) {
+                struct pipe_vertex_element *velem = &ctx->vertex->pipe[i];
+                unsigned vbi = velem->vertex_buffer_index;
+
+                if (!(ctx->vb_mask & (1 << vbi)))
+                        continue;
+
+                struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
+
+                if (!buf->buffer.resource)
+                        continue;
+
+                rsrc = pan_resource(buf->buffer.resource);
+                panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+        }
+
+        for (unsigned i = 0; i < ctx->streamout.num_targets; i++) {
+                struct pipe_stream_output_target *target = 
ctx->streamout.targets[i];
+
+                if (!target)
+                        continue;
+
+                rsrc = pan_resource(target->buffer);
+                panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true);
+                panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true);
+        }
+}
+
 static void
 panfrost_draw_vbo(
         struct pipe_context *pipe,
@@ -1477,6 +1564,7 @@ panfrost_draw_vbo(
                 }
         }
 
+        panfrost_flush_draw_deps(ctx, info);
         ctx->payloads[PIPE_SHADER_VERTEX].offset_start = info->start;
         ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = info->start;
 
@@ -2252,50 +2340,10 @@ panfrost_set_framebuffer_state(struct pipe_context 
*pctx,
 {
         struct panfrost_context *ctx = pan_context(pctx);
 
-        /* Flush when switching framebuffers, but not if the framebuffer
-         * state is being restored by u_blitter
-         */
-
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        bool is_scanout = panfrost_batch_is_scanout(batch);
-        bool has_draws = batch->last_job.gpu;
-
-        /* Bail out early when the current and new states are the same. */
-        if (util_framebuffer_state_equal(&ctx->pipe_framebuffer, fb))
-                return;
-
-        /* The wallpaper logic sets a new FB state before doing the blit and
-         * restore the old one when it's done. Those FB states are reported to
-         * be different because the surface they are pointing to are different,
-         * but those surfaces actually point to the same cbufs/zbufs. In that
-         * case we definitely don't want new FB descs to be emitted/attached
-         * since the job is expected to be flushed just after the blit is done,
-         * so let's just copy the new state and return here.
-         */
-        if (ctx->wallpaper_batch) {
-                util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb);
-                return;
-        }
-
-        if (!is_scanout || has_draws)
-                panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
-        else
-                assert(!ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer 
&&
-                       
!ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer);
-
-        /* Invalidate the FBO job cache since we've just been assigned a new
-         * FB state.
-         */
-        ctx->batch = NULL;
-
+        panfrost_hint_afbc(pan_screen(pctx->screen), fb);
         util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb);
-
-        /* Given that we're rendering, we'd love to have compression */
-        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
-
-        panfrost_hint_afbc(screen, &ctx->pipe_framebuffer);
-        for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
-                ctx->payloads[i].postfix.framebuffer = 0;
+        ctx->batch = NULL;
+        panfrost_invalidate_frame(ctx);
 }
 
 static void *
@@ -2513,6 +2561,7 @@ panfrost_get_query_result(struct pipe_context *pipe,
                           bool wait,
                           union pipe_query_result *vresult)
 {
+        struct panfrost_context *ctx = pan_context(pipe);
         struct panfrost_query *query = (struct panfrost_query *) q;
 
 
@@ -2521,7 +2570,7 @@ panfrost_get_query_result(struct pipe_context *pipe,
         case PIPE_QUERY_OCCLUSION_PREDICATE:
         case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
                 /* Flush first */
-                panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+                panfrost_flush_all_batches(ctx, true);
 
                 /* Read back the query results */
                 unsigned *result = (unsigned *) query->transfer.cpu;
@@ -2537,7 +2586,7 @@ panfrost_get_query_result(struct pipe_context *pipe,
 
         case PIPE_QUERY_PRIMITIVES_GENERATED:
         case PIPE_QUERY_PRIMITIVES_EMITTED:
-                panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+                panfrost_flush_all_batches(ctx, true);
                 vresult->u64 = query->end - query->start;
                 break;
 
diff --git a/src/gallium/drivers/panfrost/pan_job.c 
b/src/gallium/drivers/panfrost/pan_job.c
index 8eda110542c3..c40f3f12ec13 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -384,21 +384,14 @@ panfrost_batch_submit(struct panfrost_batch *batch)
                 fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
 
 out:
-        if (ctx->batch == batch)
-                panfrost_invalidate_frame(ctx);
-
         /* The job has been submitted, let's invalidate the current FBO job
          * cache.
         */
-        assert(!ctx->batch || batch == ctx->batch);
-        ctx->batch = NULL;
+        if (ctx->batch == batch) {
+                panfrost_invalidate_frame(ctx);
+                ctx->batch = NULL;
+        }
 
-        /* We always stall the pipeline for correct results since pipelined
-        * rendering is quite broken right now (to be fixed by the panfrost_job
-        * refactor, just take the perf hit for correctness)
-        */
-        drmSyncobjWait(pan_screen(ctx->base.screen)->fd, &ctx->out_sync, 1,
-                       INT64_MAX, 0, NULL);
         panfrost_free_batch(batch);
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_resource.c 
b/src/gallium/drivers/panfrost/pan_resource.c
index a5869768f9de..25b72478dccd 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -578,10 +578,8 @@ panfrost_transfer_map(struct pipe_context *pctx,
                         is_bound |= fb->cbufs[c]->texture == resource;
         }
 
-        if (is_bound && (usage & PIPE_TRANSFER_READ)) {
-                assert(level == 0);
-                panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
-        }
+        if (is_bound && (usage & PIPE_TRANSFER_READ))
+                 assert(level == 0);
 
         /* TODO: Respect usage flags */
 
@@ -594,9 +592,10 @@ panfrost_transfer_map(struct pipe_context *pctx,
                 /* No flush for writes to uninitialized */
         } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                 if (usage & PIPE_TRANSFER_WRITE) {
-                        /* STUB: flush reading */
-                        //printf("debug: missed reading flush %d\n", 
resource->target);
+                        panfrost_flush_batch_writing_bo(ctx, bo, false);
+                        panfrost_flush_batches_reading_bo(ctx, bo, false);
                 } else if (usage & PIPE_TRANSFER_READ) {
+                        panfrost_flush_batch_writing_bo(ctx, bo, false);
                         /* STUB: flush writing */
                         //printf("debug: missed writing flush %d (%d-%d)\n", 
resource->target, box->x, box->x + box->width);
                 } else {
@@ -604,6 +603,8 @@ panfrost_transfer_map(struct pipe_context *pctx,
                 }
         }
 
+        panfrost_bo_wait(bo, INT64_MAX);
+
         if (rsrc->layout != PAN_LINEAR) {
                 /* Non-linear resources need to be indirectly mapped */
 
@@ -748,11 +749,8 @@ panfrost_generate_mipmap(
          * reorder-type optimizations in place. But for now prioritize
          * correctness. */
 
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        bool has_draws = batch->last_job.gpu;
-
-        if (has_draws)
-                panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
+        panfrost_flush_batch_writing_bo(ctx, rsrc->bo, false);
+        panfrost_bo_wait(rsrc->bo, INT64_MAX);
 
         /* We've flushed the original buffer if needed, now trigger a blit */
 
@@ -765,8 +763,10 @@ panfrost_generate_mipmap(
         /* If the blit was successful, flush once more. If it wasn't, well, let
          * the state tracker deal with it. */
 
-        if (blit_res)
-                panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
+        if (blit_res) {
+                panfrost_flush_batch_writing_bo(ctx, rsrc->bo, false);
+                panfrost_bo_wait(rsrc->bo, INT64_MAX);
+        }
 
         return blit_res;
 }
-- 
2.21.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to