Very nice! I'm quite happy with this version, all considered, so R-b!
On Wed, Sep 18, 2019 at 03:24:35PM +0200, Boris Brezillon wrote: > This is needed if we want to free the panfrost_batch object at submit > time in order to not have to GC the batch on the next job submission. > > Signed-off-by: Boris Brezillon <boris.brezil...@collabora.com> > --- > Changes in v3: > * Move the patch later in the series and squash "panfrost: Cache GPU > accesses to BOs" in it > * Add extra comments to explain what we're doing > --- > src/gallium/drivers/panfrost/pan_bo.c | 112 ++++++++++++++++++++----- > src/gallium/drivers/panfrost/pan_bo.h | 9 ++ > src/gallium/drivers/panfrost/pan_job.c | 11 +++ > 3 files changed, 109 insertions(+), 23 deletions(-) > > diff --git a/src/gallium/drivers/panfrost/pan_bo.c > b/src/gallium/drivers/panfrost/pan_bo.c > index 9daddf9d0cc2..37602688d630 100644 > --- a/src/gallium/drivers/panfrost/pan_bo.c > +++ b/src/gallium/drivers/panfrost/pan_bo.c > @@ -23,6 +23,7 @@ > * Authors (Collabora): > * Alyssa Rosenzweig <alyssa.rosenzw...@collabora.com> > */ > +#include <errno.h> > #include <stdio.h> > #include <fcntl.h> > #include <xf86drm.h> > @@ -101,6 +102,63 @@ panfrost_bo_free(struct panfrost_bo *bo) > ralloc_free(bo); > } > > +/* Returns true if the BO is ready, false otherwise. > + * access_type is encoding the type of access one wants to ensure is done. > + * Say you want to make sure all writers are done writing, you should pass > + * PAN_BO_ACCESS_WRITE. > + * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW. > + * PAN_BO_ACCESS_READ would work too as waiting for readers implies > + * waiting for writers as well, but we want to make things explicit and > waiting > + * only for readers is impossible. > + */ > +bool > +panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, > + uint32_t access_type) > +{ > + struct drm_panfrost_wait_bo req = { > + .handle = bo->gem_handle, > + .timeout_ns = timeout_ns, > + }; > + int ret; > + > + assert(access_type == PAN_BO_ACCESS_WRITE || > + access_type == PAN_BO_ACCESS_RW); > + > + /* If the BO has been exported or imported we can't rely on the > cached > + * state, we need to call the WAIT_BO ioctl. > + */ > + if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) { > + /* If ->gpu_access is 0, the BO is idle, no need to wait. */ > + if (!bo->gpu_access) > + return true; > + > + /* If the caller only wants to wait for writers and no > + * writes are pending, we don't have to wait. > + */ > + if (access_type == PAN_BO_ACCESS_WRITE && > + !(bo->gpu_access & PAN_BO_ACCESS_WRITE)) > + return true; > + } > + > + /* The ioctl returns >= 0 value when the BO we are waiting for is > ready > + * -1 otherwise. > + */ > + ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req); > + if (ret != -1) { > + /* Set gpu_access to 0 so that the next call to bo_wait() > + * doesn't have to call the WAIT_BO ioctl. > + */ > + bo->gpu_access = 0; > + return true; > + } > + > + /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed > + * is invalid, which shouldn't happen here. > + */ > + assert(errno == ETIMEDOUT || errno == EBUSY); > + return false; > +} > + > /* Helper to calculate the bucket index of a BO */ > > static unsigned > @@ -137,9 +195,8 @@ pan_bucket(struct panfrost_screen *screen, unsigned size) > * BO. */ > > static struct panfrost_bo * > -panfrost_bo_cache_fetch( > - struct panfrost_screen *screen, > - size_t size, uint32_t flags) > +panfrost_bo_cache_fetch(struct panfrost_screen *screen, > + size_t size, uint32_t flags, bool dontwait) > { > pthread_mutex_lock(&screen->bo_cache_lock); > struct list_head *bucket = pan_bucket(screen, size); > @@ -147,27 +204,30 @@ panfrost_bo_cache_fetch( > > /* Iterate the bucket looking for something suitable */ > list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) { > - if (entry->size >= size && > - entry->flags == flags) { > - int ret; > - struct drm_panfrost_madvise madv; > + if (entry->size < size || entry->flags != flags) > + continue; > > - /* This one works, splice it out of the cache */ > - list_del(&entry->link); > + if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, > + PAN_BO_ACCESS_RW)) > + continue; > > - madv.handle = entry->gem_handle; > - madv.madv = PANFROST_MADV_WILLNEED; > - madv.retained = 0; > + struct drm_panfrost_madvise madv = { > + .handle = entry->gem_handle, > + .madv = PANFROST_MADV_WILLNEED, > + }; > + int ret; > > - ret = drmIoctl(screen->fd, > DRM_IOCTL_PANFROST_MADVISE, &madv); > - if (!ret && !madv.retained) { > - panfrost_bo_free(entry); > - continue; > - } > - /* Let's go! */ > - bo = entry; > - break; > + /* This one works, splice it out of the cache */ > + list_del(&entry->link); > + > + ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, > &madv); > + if (!ret && !madv.retained) { > + panfrost_bo_free(entry); > + continue; > } > + /* Let's go! */ > + bo = entry; > + break; > } > pthread_mutex_unlock(&screen->bo_cache_lock); > > @@ -281,12 +341,18 @@ panfrost_bo_create(struct panfrost_screen *screen, > size_t size, > if (flags & PAN_BO_GROWABLE) > assert(flags & PAN_BO_INVISIBLE); > > - /* Before creating a BO, we first want to check the cache, otherwise, > - * the cache misses and we need to allocate a BO fresh from the > kernel > + /* Before creating a BO, we first want to check the cache but without > + * waiting for BO readiness (BOs in the cache can still be referenced > + * by jobs that are not finished yet). > + * If the cached allocation fails we fall back on fresh BO > allocation, > + * and if that fails too, we try one more time to allocate from the > + * cache, but this time we accept to wait. > */ > - bo = panfrost_bo_cache_fetch(screen, size, flags); > + bo = panfrost_bo_cache_fetch(screen, size, flags, true); > if (!bo) > bo = panfrost_bo_alloc(screen, size, flags); > + if (!bo) > + bo = panfrost_bo_cache_fetch(screen, size, flags, false); > > if (!bo) > fprintf(stderr, "BO creation failed\n"); > diff --git a/src/gallium/drivers/panfrost/pan_bo.h > b/src/gallium/drivers/panfrost/pan_bo.h > index e4743f820aeb..022a56d9ca34 100644 > --- a/src/gallium/drivers/panfrost/pan_bo.h > +++ b/src/gallium/drivers/panfrost/pan_bo.h > @@ -100,6 +100,12 @@ struct panfrost_bo { > int gem_handle; > > uint32_t flags; > + > + /* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending > + * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl > + * when the BO is idle. > + */ > + uint32_t gpu_access; > }; > > static inline uint32_t > @@ -113,6 +119,9 @@ panfrost_bo_access_for_stage(enum pipe_shader_type stage) > PAN_BO_ACCESS_VERTEX_TILER; > } > > +bool > +panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, > + uint32_t access_type); > void > panfrost_bo_reference(struct panfrost_bo *bo); > void > diff --git a/src/gallium/drivers/panfrost/pan_job.c > b/src/gallium/drivers/panfrost/pan_job.c > index 235cb21dc8c8..a56f4044fda0 100644 > --- a/src/gallium/drivers/panfrost/pan_job.c > +++ b/src/gallium/drivers/panfrost/pan_job.c > @@ -810,8 +810,19 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch, > > hash_table_foreach(batch->bos, entry) { > struct panfrost_bo *bo = (struct panfrost_bo *)entry->key; > + uint32_t flags = (uintptr_t)entry->data; > + > assert(bo->gem_handle > 0); > bo_handles[submit.bo_handle_count++] = bo->gem_handle; > + > + /* Update the BO access flags so that panfrost_bo_wait() > knows > + * about all pending accesses. > + * We only keep the READ/WRITE info since this is all the BO > + * wait logic cares about. > + * We also preserve existing flags as this batch might not > + * be the first one to access the BO. > + */ > + bo->gpu_access |= flags & (PAN_BO_ACCESS_RW); > } > > submit.bo_handles = (u64) (uintptr_t) bo_handles; > -- > 2.21.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev