We adjust the code to explicitly request flush of batches accessing BOs they care about. Thanks to that, we can get rid of the implicit serialization done in panfrost_batch_submit() and panfrost_set_framebuffer_state(). Finally, panfrost_flush() is changed to to flush all pending batches.
Signed-off-by: Boris Brezillon <boris.brezil...@collabora.com> --- src/gallium/drivers/panfrost/pan_compute.c | 2 +- src/gallium/drivers/panfrost/pan_context.c | 145 +++++++++++++------- src/gallium/drivers/panfrost/pan_job.c | 15 +- src/gallium/drivers/panfrost/pan_resource.c | 26 ++-- 4 files changed, 115 insertions(+), 73 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index 4639c1b03c38..036dffbb17be 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -133,7 +133,7 @@ panfrost_launch_grid(struct pipe_context *pipe, /* Queue the job */ panfrost_scoreboard_queue_compute_job(batch, transfer); - panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME); + panfrost_flush_all_batches(ctx, true); } void diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 02726e7cd349..993744a1ffd0 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -150,6 +150,28 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count) return framebuffer; } +static void +panfrost_flush_fbo_deps(struct panfrost_context *ctx) +{ + struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer; + for (unsigned i = 0; i < fb->nr_cbufs; i++) { + if (!fb->cbufs[i]) + continue; + + struct panfrost_resource *rsrc = pan_resource(fb->cbufs[i]->texture); + + panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true); + panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true); + } + + if (fb->zsbuf) { + struct panfrost_resource *rsrc = pan_resource(fb->zsbuf->texture); + + panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true); + panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true); + } +} + static void panfrost_clear( struct pipe_context *pipe, @@ -160,6 +182,7 @@ panfrost_clear( struct panfrost_context *ctx = pan_context(pipe); struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + panfrost_flush_fbo_deps(ctx); panfrost_batch_add_fbo_bos(batch); panfrost_batch_clear(batch, buffers, color, depth, stencil); } @@ -1324,10 +1347,9 @@ panfrost_flush( unsigned flags) { struct panfrost_context *ctx = pan_context(pipe); - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - /* Submit the frame itself */ - panfrost_batch_submit(batch); + /* Submit all pending jobs */ + panfrost_flush_all_batches(ctx, false); if (fence) { struct panfrost_fence *f = panfrost_fence_create(ctx); @@ -1433,6 +1455,71 @@ panfrost_statistics_record( ctx->tf_prims_generated += prims; } +static void +panfrost_flush_draw_deps(struct panfrost_context *ctx, const struct pipe_draw_info *info) +{ + struct panfrost_resource *rsrc; + + if (ctx->wallpaper_batch) + return; + + panfrost_flush_fbo_deps(ctx); + + for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) { + for (unsigned i = 0; i < ctx->sampler_view_count[stage]; i++) { + struct panfrost_sampler_view *view = ctx->sampler_views[stage][i]; + + if (!view) + continue; + + rsrc = pan_resource(view->base.texture); + panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true); + } + + for (unsigned i = 0; i < 32; i++) { + if (!(ctx->ssbo_mask[stage] & (1 << i))) + continue; + + rsrc = pan_resource(ctx->ssbo[stage][i].buffer); + panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true); + panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true); + } + } + + if (info->index_size && !info->has_user_indices) { + struct panfrost_resource *rsrc = pan_resource(info->index.resource); + + panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true); + } + + for (unsigned i = 0; ctx->vertex && i < ctx->vertex->num_elements; i++) { + struct pipe_vertex_element *velem = &ctx->vertex->pipe[i]; + unsigned vbi = velem->vertex_buffer_index; + + if (!(ctx->vb_mask & (1 << vbi))) + continue; + + struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi]; + + if (!buf->buffer.resource) + continue; + + rsrc = pan_resource(buf->buffer.resource); + panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true); + } + + for (unsigned i = 0; i < ctx->streamout.num_targets; i++) { + struct pipe_stream_output_target *target = ctx->streamout.targets[i]; + + if (!target) + continue; + + rsrc = pan_resource(target->buffer); + panfrost_flush_batch_writing_bo(ctx, rsrc->bo, true); + panfrost_flush_batches_reading_bo(ctx, rsrc->bo, true); + } +} + static void panfrost_draw_vbo( struct pipe_context *pipe, @@ -1477,6 +1564,7 @@ panfrost_draw_vbo( } } + panfrost_flush_draw_deps(ctx, info); ctx->payloads[PIPE_SHADER_VERTEX].offset_start = info->start; ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = info->start; @@ -2252,50 +2340,10 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, { struct panfrost_context *ctx = pan_context(pctx); - /* Flush when switching framebuffers, but not if the framebuffer - * state is being restored by u_blitter - */ - - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - bool is_scanout = panfrost_batch_is_scanout(batch); - bool has_draws = batch->last_job.gpu; - - /* Bail out early when the current and new states are the same. */ - if (util_framebuffer_state_equal(&ctx->pipe_framebuffer, fb)) - return; - - /* The wallpaper logic sets a new FB state before doing the blit and - * restore the old one when it's done. Those FB states are reported to - * be different because the surface they are pointing to are different, - * but those surfaces actually point to the same cbufs/zbufs. In that - * case we definitely don't want new FB descs to be emitted/attached - * since the job is expected to be flushed just after the blit is done, - * so let's just copy the new state and return here. - */ - if (ctx->wallpaper_batch) { - util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb); - return; - } - - if (!is_scanout || has_draws) - panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME); - else - assert(!ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer && - !ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer); - - /* Invalidate the FBO job cache since we've just been assigned a new - * FB state. - */ - ctx->batch = NULL; - + panfrost_hint_afbc(pan_screen(pctx->screen), fb); util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb); - - /* Given that we're rendering, we'd love to have compression */ - struct panfrost_screen *screen = pan_screen(ctx->base.screen); - - panfrost_hint_afbc(screen, &ctx->pipe_framebuffer); - for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) - ctx->payloads[i].postfix.framebuffer = 0; + ctx->batch = NULL; + panfrost_invalidate_frame(ctx); } static void * @@ -2513,6 +2561,7 @@ panfrost_get_query_result(struct pipe_context *pipe, bool wait, union pipe_query_result *vresult) { + struct panfrost_context *ctx = pan_context(pipe); struct panfrost_query *query = (struct panfrost_query *) q; @@ -2521,7 +2570,7 @@ panfrost_get_query_result(struct pipe_context *pipe, case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: /* Flush first */ - panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME); + panfrost_flush_all_batches(ctx, true); /* Read back the query results */ unsigned *result = (unsigned *) query->transfer.cpu; @@ -2537,7 +2586,7 @@ panfrost_get_query_result(struct pipe_context *pipe, case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: - panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME); + panfrost_flush_all_batches(ctx, true); vresult->u64 = query->end - query->start; break; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 8eda110542c3..c40f3f12ec13 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -384,21 +384,14 @@ panfrost_batch_submit(struct panfrost_batch *batch) fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret); out: - if (ctx->batch == batch) - panfrost_invalidate_frame(ctx); - /* The job has been submitted, let's invalidate the current FBO job * cache. */ - assert(!ctx->batch || batch == ctx->batch); - ctx->batch = NULL; + if (ctx->batch == batch) { + panfrost_invalidate_frame(ctx); + ctx->batch = NULL; + } - /* We always stall the pipeline for correct results since pipelined - * rendering is quite broken right now (to be fixed by the panfrost_job - * refactor, just take the perf hit for correctness) - */ - drmSyncobjWait(pan_screen(ctx->base.screen)->fd, &ctx->out_sync, 1, - INT64_MAX, 0, NULL); panfrost_free_batch(batch); } diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index a5869768f9de..25b72478dccd 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -578,10 +578,8 @@ panfrost_transfer_map(struct pipe_context *pctx, is_bound |= fb->cbufs[c]->texture == resource; } - if (is_bound && (usage & PIPE_TRANSFER_READ)) { - assert(level == 0); - panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME); - } + if (is_bound && (usage & PIPE_TRANSFER_READ)) + assert(level == 0); /* TODO: Respect usage flags */ @@ -594,9 +592,10 @@ panfrost_transfer_map(struct pipe_context *pctx, /* No flush for writes to uninitialized */ } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { if (usage & PIPE_TRANSFER_WRITE) { - /* STUB: flush reading */ - //printf("debug: missed reading flush %d\n", resource->target); + panfrost_flush_batch_writing_bo(ctx, bo, false); + panfrost_flush_batches_reading_bo(ctx, bo, false); } else if (usage & PIPE_TRANSFER_READ) { + panfrost_flush_batch_writing_bo(ctx, bo, false); /* STUB: flush writing */ //printf("debug: missed writing flush %d (%d-%d)\n", resource->target, box->x, box->x + box->width); } else { @@ -604,6 +603,8 @@ panfrost_transfer_map(struct pipe_context *pctx, } } + panfrost_bo_wait(bo, INT64_MAX); + if (rsrc->layout != PAN_LINEAR) { /* Non-linear resources need to be indirectly mapped */ @@ -748,11 +749,8 @@ panfrost_generate_mipmap( * reorder-type optimizations in place. But for now prioritize * correctness. */ - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - bool has_draws = batch->last_job.gpu; - - if (has_draws) - panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME); + panfrost_flush_batch_writing_bo(ctx, rsrc->bo, false); + panfrost_bo_wait(rsrc->bo, INT64_MAX); /* We've flushed the original buffer if needed, now trigger a blit */ @@ -765,8 +763,10 @@ panfrost_generate_mipmap( /* If the blit was successful, flush once more. If it wasn't, well, let * the state tracker deal with it. */ - if (blit_res) - panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME); + if (blit_res) { + panfrost_flush_batch_writing_bo(ctx, rsrc->bo, false); + panfrost_bo_wait(rsrc->bo, INT64_MAX); + } return blit_res; } -- 2.21.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev