On Tue, 2019-04-16 at 01:49 +0000, Alyssa Rosenzweig wrote: > This (fairly large) patch continues work surrounding the panfrost_job > abstraction to improve job lifetime management. In particular, we add > infrastructure to track which BOs are used by a particular job > (currently limited to the vertex buffer BOs), to reference count > these > BOs, and to automatically manage the BOs memory based on the > reference > count. This set of changes serves as a code cleanup, as a way of > future > proofing for allowing flushing BOs, and immediately as a bugfix to > workaround the missing reference counting for vertex buffer BOs. > Meanwhile, there are a few cleanups to vertex buffer handling code > itself, so in the short-term, this allows us to remove the costly VBO > staging workaround, since this patch addresses the underlying causes. > > Signed-off-by: Alyssa Rosenzweig <aly...@rosenzweig.io> > --- > src/gallium/drivers/panfrost/pan_context.c | 37 ++++------ > src/gallium/drivers/panfrost/pan_context.h | 6 +- > src/gallium/drivers/panfrost/pan_drm.c | 6 ++ > src/gallium/drivers/panfrost/pan_job.c | 82 > +++++++++++++++++++++ > src/gallium/drivers/panfrost/pan_job.h | 16 ++++ > src/gallium/drivers/panfrost/pan_resource.c | 70 +++++++++++++++++- > src/gallium/drivers/panfrost/pan_resource.h | 13 ++++ > src/gallium/drivers/panfrost/pan_screen.c | 2 +- > src/gallium/drivers/panfrost/pan_screen.h | 5 +- > 9 files changed, 208 insertions(+), 29 deletions(-) > > diff --git a/src/gallium/drivers/panfrost/pan_context.c > b/src/gallium/drivers/panfrost/pan_context.c > index 0c97c0a2b43..75a975871db 100644 > --- a/src/gallium/drivers/panfrost/pan_context.c > +++ b/src/gallium/drivers/panfrost/pan_context.c > @@ -36,6 +36,8 @@ > #include "util/u_memory.h" > #include "util/u_vbuf.h" > #include "util/half_float.h" > +#include "util/u_helpers.h" > +#include "util/u_format.h" > #include "indices/u_primconvert.h" > #include "tgsi/tgsi_parse.h" > > @@ -737,7 +739,7 @@ panfrost_emit_varying_descriptor( > * excepting some obscure circumstances */ > > static void > -panfrost_emit_vertex_data(struct panfrost_context *ctx) > +panfrost_emit_vertex_data(struct panfrost_context *ctx, struct > panfrost_job *job) > { > /* TODO: Only update the dirtied buffers */ > union mali_attr attrs[PIPE_MAX_ATTRIBS]; > @@ -745,6 +747,8 @@ panfrost_emit_vertex_data(struct panfrost_context > *ctx) > unsigned invocation_count = MALI_NEGATIVE(ctx- > >payload_tiler.prefix.invocation_count); > > for (int i = 0; i < ctx->vertex_buffer_count; ++i) { > + if (!(ctx->vb_mask & (1 << i))) continue; > + > struct pipe_vertex_buffer *buf = &ctx- > >vertex_buffers[i]; > struct panfrost_resource *rsrc = (struct > panfrost_resource *) (buf->buffer.resource); > > @@ -776,9 +780,10 @@ panfrost_emit_vertex_data(struct > panfrost_context *ctx) > > mali_ptr effective_address = rsrc ? (rsrc->bo->gpu + > buf->buffer_offset) : 0; > > - if (effective_address) { > + if (effective_address & 63) { > attrs[i].elements = > panfrost_upload_transient(ctx, rsrc->bo->cpu + buf->buffer_offset, > attrs[i].size) | MALI_ATTR_LINEAR; > } else if (effective_address) { > + panfrost_job_add_bo(job, rsrc->bo); > attrs[i].elements = effective_address | > MALI_ATTR_LINEAR; > } else { > /* Leave unset? */ > @@ -807,7 +812,7 @@ panfrost_emit_for_draw(struct panfrost_context > *ctx, bool with_vertex_data) > struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); > > if (with_vertex_data) { > - panfrost_emit_vertex_data(ctx); > + panfrost_emit_vertex_data(ctx, job); > } > > bool msaa = ctx->rasterizer->base.multisample; > @@ -1252,7 +1257,8 @@ panfrost_link_jobs(struct panfrost_context > *ctx) > > static void > panfrost_submit_frame(struct panfrost_context *ctx, bool > flush_immediate, > - struct pipe_fence_handle **fence) > + struct pipe_fence_handle **fence, > + struct panfrost_job *job) > { > struct pipe_context *gallium = (struct pipe_context *) ctx; > struct panfrost_screen *screen = pan_screen(gallium- > >screen); > @@ -1274,15 +1280,15 @@ panfrost_submit_frame(struct panfrost_context > *ctx, bool flush_immediate, > #ifndef DRY_RUN > > bool is_scanout = panfrost_is_scanout(ctx); > - int fragment_id = screen->driver->submit_vs_fs_job(ctx, > has_draws, is_scanout); > + screen->driver->submit_vs_fs_job(ctx, has_draws, > is_scanout); > > /* If visual, we can stall a frame */ > > if (!flush_immediate) > screen->driver->force_flush_fragment(ctx, fence); > > - screen->last_fragment_id = fragment_id; > screen->last_fragment_flushed = false; > + screen->last_job = job; > > /* If readback, flush now (hurts the pipelined performance) > */ > if (flush_immediate) > @@ -1317,7 +1323,7 @@ panfrost_flush( > bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME; > > /* Submit the frame itself */ > - panfrost_submit_frame(ctx, flush_immediate, fence); > + panfrost_submit_frame(ctx, flush_immediate, fence, job); > > /* Prepare for the next frame */ > panfrost_invalidate_frame(ctx); > @@ -1793,22 +1799,9 @@ panfrost_set_vertex_buffers( > const struct pipe_vertex_buffer *buffers) > { > struct panfrost_context *ctx = pan_context(pctx); > - assert(num_buffers <= PIPE_MAX_ATTRIBS); > - > - /* XXX: Dirty tracking? etc */ > - if (buffers) { > - size_t sz = sizeof(buffers[0]) * num_buffers; > - ctx->vertex_buffers = malloc(sz); > - ctx->vertex_buffer_count = num_buffers; > - memcpy(ctx->vertex_buffers, buffers, sz); > - } else { > - if (ctx->vertex_buffers) { > - free(ctx->vertex_buffers); > - ctx->vertex_buffers = NULL; > - } > > - ctx->vertex_buffer_count = 0; > - } > + util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx- > >vb_mask, buffers, start_slot, num_buffers); > + ctx->vertex_buffer_count = num_buffers; > } > > static void > diff --git a/src/gallium/drivers/panfrost/pan_context.h > b/src/gallium/drivers/panfrost/pan_context.h > index d071da1c62f..e45f98c9b77 100644 > --- a/src/gallium/drivers/panfrost/pan_context.h > +++ b/src/gallium/drivers/panfrost/pan_context.h > @@ -114,6 +114,9 @@ struct panfrost_context { > struct panfrost_job *job; > struct hash_table *jobs; > > + /* panfrost_resource -> panfrost_job */ > + struct hash_table *write_jobs; > + > /* Bit mask for supported PIPE_DRAW for this hardware */ > unsigned draw_modes; > > @@ -187,8 +190,9 @@ struct panfrost_context { > > struct panfrost_vertex_state *vertex; > > - struct pipe_vertex_buffer *vertex_buffers; > + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; > unsigned vertex_buffer_count; > + uint32_t vb_mask; > > struct panfrost_sampler_state > *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; > unsigned sampler_count[PIPE_SHADER_TYPES]; > diff --git a/src/gallium/drivers/panfrost/pan_drm.c > b/src/gallium/drivers/panfrost/pan_drm.c > index 70d8d7498d4..5fecd4fb27e 100644 > --- a/src/gallium/drivers/panfrost/pan_drm.c > +++ b/src/gallium/drivers/panfrost/pan_drm.c > @@ -1,5 +1,6 @@ > /* > * © Copyright 2019 Collabora, Ltd. > + * Copyright 2019 Alyssa Rosenzweig > * > * Permission is hereby granted, free of charge, to any person > obtaining a > * copy of this software and associated documentation files (the > "Software"), > @@ -138,6 +139,7 @@ panfrost_drm_import_bo(struct panfrost_screen > *screen, struct winsys_handle *wha > > bo->gem_handle = gem_handle; > bo->gpu = (mali_ptr) get_bo_offset.offset; > + panfrost_bo_reference(bo); > > // TODO map and unmap on demand? > mmap_bo.handle = gem_handle; > @@ -227,6 +229,7 @@ panfrost_drm_submit_job(struct panfrost_context > *ctx, u64 job_desc, int reqs, st > } > > /* TODO: Add here the transient pools */ > + /* TODO: Add here the BOs listed in the panfrost_job */ > bo_handles[submit.bo_handle_count++] = ctx->shaders.gem_handle; > bo_handles[submit.bo_handle_count++] = ctx- > >scratchpad.gem_handle; > bo_handles[submit.bo_handle_count++] = ctx- > >tiler_heap.gem_handle; > @@ -303,6 +306,9 @@ panfrost_drm_force_flush_fragment(struct > panfrost_context *ctx, > if (!screen->last_fragment_flushed) { > drmSyncobjWait(drm->fd, &ctx->out_sync, 1, INT64_MAX, > 0, NULL); > screen->last_fragment_flushed = true; > + > + /* The job finished up, so we're safe to clean it up > now */ > + panfrost_free_job(ctx, screen->last_job); > } > > if (fence) { > diff --git a/src/gallium/drivers/panfrost/pan_job.c > b/src/gallium/drivers/panfrost/pan_job.c > index 66a8b0d4b07..6c68575158f 100644 > --- a/src/gallium/drivers/panfrost/pan_job.c > +++ b/src/gallium/drivers/panfrost/pan_job.c > @@ -27,6 +27,13 @@ > #include "util/hash_table.h" > #include "util/ralloc.h" > > +static void > +remove_from_ht(struct hash_table *ht, void *key) > +{ > + struct hash_entry *entry = _mesa_hash_table_search(ht, key); > + _mesa_hash_table_remove(ht, entry); > +} > +
This is the same as _mesa_hash_table_remove_key(), no? > struct panfrost_job * > panfrost_create_job(struct panfrost_context *ctx) > { > @@ -35,9 +42,32 @@ panfrost_create_job(struct panfrost_context *ctx) > > job->ctx = ctx; > > + job->bos = _mesa_set_create(job, > + _mesa_hash_pointer, > + _mesa_key_pointer_equal); > + > return job; > } > > +void > +panfrost_free_job(struct panfrost_context *ctx, struct panfrost_job > *job) > +{ > + if (!job) > + return; > + > + set_foreach(job->bos, entry) { > + struct panfrost_bo *bo = (struct panfrost_bo > *)entry->key; > + panfrost_bo_unreference(ctx->base.screen, bo); > + } > + > + remove_from_ht(ctx->jobs, &job->key); > + > + if (ctx->job == job) > + ctx->job = NULL; > + > + ralloc_free(job); > +} > + > struct panfrost_job * > panfrost_get_job(struct panfrost_context *ctx, > struct pipe_surface **cbufs, struct pipe_surface > *zsbuf) > @@ -90,6 +120,53 @@ panfrost_get_job_for_fbo(struct panfrost_context > *ctx) > return job; > } > > +void > +panfrost_job_add_bo(struct panfrost_job *job, struct panfrost_bo > *bo) > +{ > + if (!bo) > + return; > + > + if (_mesa_set_search(job->bos, bo)) > + return; > + > + panfrost_bo_reference(bo); > + _mesa_set_add(job->bos, bo); > +} > + > +void > +panfrost_flush_jobs_writing_resource(struct panfrost_context > *panfrost, > + struct pipe_resource *prsc) > +{ > +#if 0 > + struct hash_entry *entry = _mesa_hash_table_search(panfrost- > >write_jobs, > + prsc); > + if (entry) { > + struct panfrost_job *job = entry->data; > + panfrost_job_submit(panfrost, job); > + } > +#endif > + /* TODO stub */ Did you mean to leave this #if'ed out? > +} > + > +void > +panfrost_flush_jobs_reading_resource(struct panfrost_context > *panfrost, > + struct pipe_resource *prsc) > +{ > + struct panfrost_resource *rsc = pan_resource(prsc); > + > + panfrost_flush_jobs_writing_resource(panfrost, prsc); > + > + hash_table_foreach(panfrost->jobs, entry) { > + struct panfrost_job *job = entry->data; > + > + if (_mesa_set_search(job->bos, rsc->bo)) { > + printf("TODO: submit job for flush\n"); > + //panfrost_job_submit(panfrost, job); > + continue; > + } > + } > +} > + > static bool > panfrost_job_compare(const void *a, const void *b) > { > @@ -109,4 +186,9 @@ panfrost_job_init(struct panfrost_context *ctx) > ctx->jobs = _mesa_hash_table_create(NULL, > panfrost_job_hash, > panfrost_job_compare); > + > + ctx->write_jobs = _mesa_hash_table_create(NULL, > + _mesa_hash_pointer, > + _mesa_key_pointer_equal) > ; > + > } > diff --git a/src/gallium/drivers/panfrost/pan_job.h > b/src/gallium/drivers/panfrost/pan_job.h > index 30f1cf4bd5c..1b28084c599 100644 > --- a/src/gallium/drivers/panfrost/pan_job.h > +++ b/src/gallium/drivers/panfrost/pan_job.h > @@ -55,6 +55,8 @@ struct panfrost_job { > * bitmask) */ > unsigned requirements; > > + /* BOs referenced -- will be used for flushing logic */ > + struct set *bos; > }; > > /* Functions for managing the above */ > @@ -62,6 +64,9 @@ struct panfrost_job { > struct panfrost_job * > panfrost_create_job(struct panfrost_context *ctx); > > +void > +panfrost_free_job(struct panfrost_context *ctx, struct panfrost_job > *job); > + > struct panfrost_job * > panfrost_get_job(struct panfrost_context *ctx, > struct pipe_surface **cbufs, struct pipe_surface > *zsbuf); > @@ -72,4 +77,15 @@ panfrost_get_job_for_fbo(struct panfrost_context > *ctx); > void > panfrost_job_init(struct panfrost_context *ctx); > > +void > +panfrost_job_add_bo(struct panfrost_job *job, struct panfrost_bo > *bo); > + > +void > +panfrost_flush_jobs_writing_resource(struct panfrost_context > *panfrost, > + struct pipe_resource *prsc); > + > +void > +panfrost_flush_jobs_reading_resource(struct panfrost_context > *panfrost, > + struct pipe_resource *prsc); > + > #endif > diff --git a/src/gallium/drivers/panfrost/pan_resource.c > b/src/gallium/drivers/panfrost/pan_resource.c > index 15d522f1963..a0fc64fa1c1 100644 > --- a/src/gallium/drivers/panfrost/pan_resource.c > +++ b/src/gallium/drivers/panfrost/pan_resource.c > @@ -228,6 +228,7 @@ static struct panfrost_bo * > panfrost_create_bo(struct panfrost_screen *screen, const struct > pipe_resource *template) > { > struct panfrost_bo *bo = CALLOC_STRUCT(panfrost_bo); > + panfrost_bo_reference(bo); > > /* Based on the usage, figure out what storing will be used. > There are > * various tradeoffs: > @@ -297,6 +298,8 @@ panfrost_resource_create(struct pipe_screen > *screen, > assert(0); > } > > + util_range_init(&so->valid_buffer_range); > + > if (template->bind & PIPE_BIND_DISPLAY_TARGET || > template->bind & PIPE_BIND_SCANOUT || > template->bind & PIPE_BIND_SHARED) { > @@ -359,6 +362,25 @@ panfrost_destroy_bo(struct panfrost_screen > *screen, struct panfrost_bo *pbo) > } > } > > +void > +panfrost_bo_reference(struct panfrost_bo *bo) > +{ > + p_atomic_inc(&bo->reference_count); > +} > + > +void > +panfrost_bo_unreference(struct pipe_screen *screen, struct > panfrost_bo *bo) > +{ > + /* Check for double free errors */ > + assert(bo->reference_count); > + > + /* When the reference count goes to zero, we need to cleanup > */ > + > + if (p_atomic_dec_zero(&bo->reference_count)) { > + panfrost_destroy_bo(pan_screen(screen), bo); > + } > +} > + Why not use pipe_reference instead of open-coding? That helper contains some neat debug-helpers etc... > static void > panfrost_resource_destroy(struct pipe_screen *screen, > struct pipe_resource *pt) > @@ -370,8 +392,9 @@ panfrost_resource_destroy(struct pipe_screen > *screen, > renderonly_scanout_destroy(rsrc->scanout, pscreen->ro); > > if (rsrc->bo) > - panfrost_destroy_bo(pscreen, rsrc->bo); > + panfrost_bo_unreference(screen, rsrc->bo); > > + util_range_destroy(&rsrc->valid_buffer_range); > FREE(rsrc); > } > > @@ -384,7 +407,8 @@ panfrost_transfer_map(struct pipe_context *pctx, > struct pipe_transfer **out_transfer) > { > int bytes_per_pixel = util_format_get_blocksize(resource- > >format); > - struct panfrost_bo *bo = pan_resource(resource)->bo; > + struct panfrost_resource *rsrc = pan_resource(resource); > + struct panfrost_bo *bo = rsrc->bo; > > struct panfrost_gtransfer *transfer = > CALLOC_STRUCT(panfrost_gtransfer); > transfer->base.level = level; > @@ -405,6 +429,25 @@ panfrost_transfer_map(struct pipe_context *pctx, > panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME); > } > > + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { > + /* TODO: reallocate */ > + //printf("debug: Missed reallocate\n"); > + } else if ((usage & PIPE_TRANSFER_WRITE) > + && resource->target == PIPE_BUFFER > + && !util_ranges_intersect(&rsrc- > >valid_buffer_range, box->x, box->x + box->width)) { > + /* No flush for writes to uninitialized */ > + } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { > + if (usage & PIPE_TRANSFER_WRITE) { > + /* STUB: flush reading */ > + //printf("debug: missed reading flush %d\n", > resource->target); > + } else if (usage & PIPE_TRANSFER_READ) { > + /* STUB: flush writing */ > + //printf("debug: missed writing flush %d > (%d-%d)\n", resource->target, box->x, box->x + box->width); > + } else { > + /* Why are you even mapping?! */ > + } > + } > + > if (bo->layout != PAN_LINEAR) { > /* Non-linear resources need to be indirectly mapped > */ > > @@ -460,9 +503,9 @@ panfrost_transfer_unmap(struct pipe_context > *pctx, > /* Gallium expects writeback here, so we tile */ > > struct panfrost_gtransfer *trans = pan_transfer(transfer); > + struct panfrost_resource *prsrc = (struct panfrost_resource > *) transfer->resource; > > if (trans->map) { > - struct panfrost_resource *prsrc = (struct > panfrost_resource *) transfer->resource; > struct panfrost_bo *bo = prsrc->bo; > > if (transfer->usage & PIPE_TRANSFER_WRITE) { > @@ -480,6 +523,11 @@ panfrost_transfer_unmap(struct pipe_context > *pctx, > free(trans->map); > } > > + > + util_range_add(&prsrc->valid_buffer_range, > + transfer->box.x, > + transfer->box.x + transfer->box.width); > + > /* Derefence the resource */ > pipe_resource_reference(&transfer->resource, NULL); > > @@ -487,6 +535,20 @@ panfrost_transfer_unmap(struct pipe_context > *pctx, > free(transfer); > } > > +static void > +panfrost_transfer_flush_region(struct pipe_context *pctx, > + struct pipe_transfer *transfer, > + const struct pipe_box *box) > +{ > + struct panfrost_resource *rsc = pan_resource(transfer- > >resource); > + > + if (transfer->resource->target == PIPE_BUFFER) { > + util_range_add(&rsc->valid_buffer_range, > + transfer->box.x + box->x, > + transfer->box.x + box->x + > box->width); > + } > +} > + > static struct pb_slab * > panfrost_slab_alloc(void *priv, unsigned heap, unsigned entry_size, > unsigned group_index) > { > @@ -564,7 +626,7 @@ static const struct u_transfer_vtbl transfer_vtbl > = { > .resource_destroy = panfrost_resource_destroy, > .transfer_map = panfrost_transfer_map, > .transfer_unmap = panfrost_transfer_unmap, > - .transfer_flush_region = u_default_transfer_flush_region, > + .transfer_flush_region = panfrost_transfer_flush_region, > .get_internal_format = > panfrost_resource_get_internal_format, > .set_stencil = panfrost_resource_set_stencil, > .get_stencil = panfrost_resource_get_stencil, > diff --git a/src/gallium/drivers/panfrost/pan_resource.h > b/src/gallium/drivers/panfrost/pan_resource.h > index a1315ab1b43..b9d6cd2b023 100644 > --- a/src/gallium/drivers/panfrost/pan_resource.h > +++ b/src/gallium/drivers/panfrost/pan_resource.h > @@ -30,6 +30,7 @@ > #include "pan_screen.h" > #include "pan_allocate.h" > #include "drm-uapi/drm.h" > +#include "util/u_range.h" > > /* Describes the memory layout of a BO */ > > @@ -45,6 +46,10 @@ struct panfrost_slice { > }; > > struct panfrost_bo { > + /* Reference count */ > + unsigned reference_count; > + > + /* Description of the mip levels */ > struct panfrost_slice slices[MAX_MIP_LEVELS]; > > /* Mapping for the entire object (all levels) */ > @@ -83,6 +88,12 @@ struct panfrost_bo { > int gem_handle; > }; > > +void > +panfrost_bo_reference(struct panfrost_bo *bo); > + > +void > +panfrost_bo_unreference(struct pipe_screen *screen, struct > panfrost_bo *bo); > + > struct panfrost_resource { > struct pipe_resource base; > > @@ -90,6 +101,8 @@ struct panfrost_resource { > struct renderonly_scanout *scanout; > > struct panfrost_resource *separate_stencil; > + > + struct util_range valid_buffer_range; > }; > > static inline struct panfrost_resource * > diff --git a/src/gallium/drivers/panfrost/pan_screen.c > b/src/gallium/drivers/panfrost/pan_screen.c > index 71c6175d069..6d3aca594f1 100644 > --- a/src/gallium/drivers/panfrost/pan_screen.c > +++ b/src/gallium/drivers/panfrost/pan_screen.c > @@ -597,8 +597,8 @@ panfrost_create_screen(int fd, struct renderonly > *ro) > screen->base.fence_reference = panfrost_fence_reference; > screen->base.fence_finish = panfrost_fence_finish; > > - screen->last_fragment_id = -1; > screen->last_fragment_flushed = true; > + screen->last_job = NULL; > > panfrost_resource_screen_init(screen); > > diff --git a/src/gallium/drivers/panfrost/pan_screen.h > b/src/gallium/drivers/panfrost/pan_screen.h > index cbadf813675..4e5efa91f22 100644 > --- a/src/gallium/drivers/panfrost/pan_screen.h > +++ b/src/gallium/drivers/panfrost/pan_screen.h > @@ -92,8 +92,11 @@ struct panfrost_screen { > /* TODO: Where? */ > struct panfrost_resource *display_target; > > - int last_fragment_id; > + /* While we're busy building up the job for frame N, the GPU > is > + * still busy executing frame N-1. So hold a reference to > + * yesterjob */ > int last_fragment_flushed; > + struct panfrost_job *last_job; > }; > > #endif /* PAN_SCREEN_H */ _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev