Patches 2-5 have my R-b, looks good! :)
On Wed, Aug 07, 2019 at 10:36:54AM +0200, Tomeu Vizoso wrote: > Instead of all shaders being stored in a single BO, have each shader in > its own. > > This removes the need for a 16MB allocation per context, and allows us > to place transient blend shaders in BOs marked as executable (before > they were allocated in the transient pool, which shouldn't be > executable). > > v2: - Store compiled blend shaders in a malloc'ed buffer, to avoid > reading from GPU-accessible memory when patching (Alyssa). > - Free struct panfrost_blend_shader (Alyssa). > - Give the job a reference to regular shaders when emitting > (Alyssa). > > Signed-off-by: Tomeu Vizoso <tomeu.viz...@collabora.com> > --- > src/gallium/drivers/panfrost/pan_allocate.h | 2 + > src/gallium/drivers/panfrost/pan_assemble.c | 5 ++- > src/gallium/drivers/panfrost/pan_blend.h | 13 ++++-- > src/gallium/drivers/panfrost/pan_blend_cso.c | 41 +++++++++++++------ > .../drivers/panfrost/pan_blend_shaders.c | 13 ++---- > src/gallium/drivers/panfrost/pan_bo_cache.c | 5 +-- > src/gallium/drivers/panfrost/pan_context.c | 14 +++++-- > src/gallium/drivers/panfrost/pan_context.h | 3 +- > src/gallium/drivers/panfrost/pan_drm.c | 5 ++- > 9 files changed, 66 insertions(+), 35 deletions(-) > > diff --git a/src/gallium/drivers/panfrost/pan_allocate.h > b/src/gallium/drivers/panfrost/pan_allocate.h > index 8d925ee38a4c..0e06567d2069 100644 > --- a/src/gallium/drivers/panfrost/pan_allocate.h > +++ b/src/gallium/drivers/panfrost/pan_allocate.h > @@ -59,6 +59,8 @@ struct panfrost_bo { > size_t size; > > int gem_handle; > + > + uint32_t flags; > }; > > struct panfrost_memory { > diff --git a/src/gallium/drivers/panfrost/pan_assemble.c > b/src/gallium/drivers/panfrost/pan_assemble.c > index 15f77585a25c..dd877056d3b5 100644 > --- a/src/gallium/drivers/panfrost/pan_assemble.c > +++ b/src/gallium/drivers/panfrost/pan_assemble.c > @@ -43,6 +43,7 @@ panfrost_shader_compile( > gl_shader_stage stage, > struct panfrost_shader_state *state) > { > + struct panfrost_screen *screen = pan_screen(ctx->base.screen); > uint8_t *dst; > > nir_shader *s; > @@ -80,7 +81,9 @@ panfrost_shader_compile( > * I bet someone just thought that would be a cute pun. At least, > * that's how I'd do it. */ > > - meta->shader = panfrost_upload(&ctx->shaders, dst, size) | > program.first_tag; > + state->bo = panfrost_drm_create_bo(screen, size, > PAN_ALLOCATE_EXECUTE); > + memcpy(state->bo->cpu, dst, size); > + meta->shader = state->bo->gpu | program.first_tag; > > util_dynarray_fini(&program.compiled); > > diff --git a/src/gallium/drivers/panfrost/pan_blend.h > b/src/gallium/drivers/panfrost/pan_blend.h > index a881e0945f48..a29353ff0014 100644 > --- a/src/gallium/drivers/panfrost/pan_blend.h > +++ b/src/gallium/drivers/panfrost/pan_blend.h > @@ -33,8 +33,10 @@ > /* An internal blend shader descriptor, from the compiler */ > > struct panfrost_blend_shader { > - /* The compiled shader in GPU memory */ > - struct panfrost_transfer shader; > + struct panfrost_context *ctx; > + > + /* The compiled shader */ > + void *buffer; > > /* Byte count of the shader */ > unsigned size; > @@ -53,8 +55,11 @@ struct panfrost_blend_shader { > /* A blend shader descriptor ready for actual use */ > > struct panfrost_blend_shader_final { > - /* The upload, possibly to transient memory */ > - mali_ptr gpu; > + /* The compiled shader in GPU memory, possibly patched */ > + struct panfrost_bo *bo; > + > + /* First instruction tag (for tagging the pointer) */ > + unsigned first_tag; > > /* Same meaning as panfrost_blend_shader */ > unsigned work_count; > diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c > b/src/gallium/drivers/panfrost/pan_blend_cso.c > index f685b25b41b3..08a86980ca88 100644 > --- a/src/gallium/drivers/panfrost/pan_blend_cso.c > +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c > @@ -151,11 +151,24 @@ panfrost_bind_blend_state(struct pipe_context *pipe, > ctx->dirty |= PAN_DIRTY_FS; > } > > +static void > +panfrost_delete_blend_shader(struct hash_entry *entry) > +{ > + struct panfrost_blend_shader *shader = (struct panfrost_blend_shader > *)entry->data; > + free(shader->buffer); > + free(shader); > +} > + > static void > panfrost_delete_blend_state(struct pipe_context *pipe, > - void *blend) > + void *cso) > { > - /* TODO: Free shader binary? */ > + struct panfrost_blend_state *blend = (struct panfrost_blend_state *) > cso; > + > + for (unsigned c = 0; c < 4; ++c) { > + struct panfrost_blend_rt *rt = &blend->rt[c]; > + _mesa_hash_table_u64_clear(rt->shaders, > panfrost_delete_blend_shader); > + } > ralloc_free(blend); > } > > @@ -208,6 +221,9 @@ panfrost_blend_constant(float *out, float *in, unsigned > mask) > struct panfrost_blend_final > panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti) > { > + struct panfrost_screen *screen = pan_screen(ctx->base.screen); > + struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); > + > /* Grab the format, falling back gracefully if called invalidly > (which > * has to happen for no-color-attachment FBOs, for instance) */ > struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer; > @@ -241,23 +257,24 @@ panfrost_get_blend_for_context(struct panfrost_context > *ctx, unsigned rti) > struct panfrost_blend_shader *shader = > panfrost_get_blend_shader(ctx, blend, fmt, rti); > final.is_shader = true; > final.shader.work_count = shader->work_count; > + final.shader.first_tag = shader->first_tag; > + > + /* Upload the shader */ > + final.shader.bo = panfrost_drm_create_bo(screen, shader->size, > PAN_ALLOCATE_EXECUTE); > + memcpy(final.shader.bo->cpu, shader->buffer, shader->size); > + > + /* Pass BO ownership to job */ > + panfrost_job_add_bo(job, final.shader.bo); > + panfrost_bo_unreference(ctx->base.screen, final.shader.bo); > > if (shader->patch_index) { > /* We have to specialize the blend shader to use constants, > so > - * patch in the current constants and upload to transient > - * memory */ > + * patch in the current constants */ > > - float *patch = (float *) (shader->shader.cpu + > shader->patch_index); > + float *patch = (float *) (final.shader.bo->cpu + > shader->patch_index); > memcpy(patch, ctx->blend_color.color, sizeof(float) * 4); > - > - final.shader.gpu = panfrost_upload_transient( > - ctx, shader->shader.cpu, > shader->size); > - } else { > - /* No need to specialize further, use the preuploaded */ > - final.shader.gpu = shader->shader.gpu; > } > > - final.shader.gpu |= shader->first_tag; > return final; > } > > diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c > b/src/gallium/drivers/panfrost/pan_blend_shaders.c > index 0063290d7e77..2ee86b4e7db3 100644 > --- a/src/gallium/drivers/panfrost/pan_blend_shaders.c > +++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c > @@ -133,6 +133,8 @@ panfrost_compile_blend_shader( > { > struct panfrost_blend_shader res; > > + res.ctx = ctx; > + > /* Build the shader */ > > nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, > &midgard_nir_options, NULL); > @@ -172,21 +174,14 @@ panfrost_compile_blend_shader( > midgard_program program; > midgard_compile_shader_nir(&ctx->compiler, shader, &program, true); > > - /* Upload the shader */ > - > - int size = program.compiled.size; > - uint8_t *dst = program.compiled.data; > - > - res.shader.cpu = mem_dup(dst, size); > - res.shader.gpu = panfrost_upload(&ctx->shaders, dst, size); > - > /* At least two work registers are needed due to an encoding quirk */ > res.work_count = MAX2(program.work_register_count, 2); > > /* Allow us to patch later */ > res.patch_index = program.blend_patch_offset; > res.first_tag = program.first_tag; > - res.size = size; > + res.size = program.compiled.size; > + res.buffer = program.compiled.data; > > return res; > } > diff --git a/src/gallium/drivers/panfrost/pan_bo_cache.c > b/src/gallium/drivers/panfrost/pan_bo_cache.c > index fba495c1dd69..7378d0a8abea 100644 > --- a/src/gallium/drivers/panfrost/pan_bo_cache.c > +++ b/src/gallium/drivers/panfrost/pan_bo_cache.c > @@ -84,11 +84,10 @@ panfrost_bo_cache_fetch( > { > struct list_head *bucket = pan_bucket(screen, size); > > - /* TODO: Honour flags? */ > - > /* Iterate the bucket looking for something suitable */ > list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) { > - if (entry->size >= size) { > + if (entry->size >= size && > + entry->flags == flags) { > /* This one works, splice it out of the cache */ > list_del(&entry->link); > > diff --git a/src/gallium/drivers/panfrost/pan_context.c > b/src/gallium/drivers/panfrost/pan_context.c > index 284c246677df..2fc5ac361979 100644 > --- a/src/gallium/drivers/panfrost/pan_context.c > +++ b/src/gallium/drivers/panfrost/pan_context.c > @@ -1062,6 +1062,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, > bool with_vertex_data) > > panfrost_patch_shader_state(ctx, variant, > PIPE_SHADER_FRAGMENT, false); > > + panfrost_job_add_bo(job, variant->bo); > + > #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name > > COPY(shader); > @@ -1140,7 +1142,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, > bool with_vertex_data) > > if (blend.is_shader) { > ctx->fragment_shader_core.blend.shader = > - blend.shader.gpu; > + blend.shader.bo->gpu | > blend.shader.first_tag; > } else { > ctx->fragment_shader_core.blend.shader = 0; > } > @@ -1215,7 +1217,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, > bool with_vertex_data) > assert(!(is_srgb && blend.is_shader)); > > if (blend.is_shader) { > - rts[i].blend.shader = > blend.shader.gpu; > + rts[i].blend.shader = > blend.shader.bo->gpu | blend.shader.first_tag; > } else { > rts[i].blend.equation = > *blend.equation.equation; > rts[i].blend.constant = > blend.equation.constant; > @@ -1904,6 +1906,12 @@ panfrost_delete_shader_state( > DBG("Deleting TGSI shader leaks duplicated tokens\n"); > } > > + for (unsigned i = 0; i < cso->variant_count; ++i) { > + struct panfrost_shader_state *shader_state = > &cso->variants[i]; > + panfrost_bo_unreference(pctx->screen, shader_state->bo); > + shader_state->bo = NULL; > + } > + > free(so); > } > > @@ -2528,7 +2536,6 @@ panfrost_destroy(struct pipe_context *pipe) > util_blitter_destroy(panfrost->blitter_wallpaper); > > panfrost_drm_free_slab(screen, &panfrost->scratchpad); > - panfrost_drm_free_slab(screen, &panfrost->shaders); > panfrost_drm_free_slab(screen, &panfrost->tiler_heap); > panfrost_drm_free_slab(screen, &panfrost->tiler_dummy); > > @@ -2673,7 +2680,6 @@ panfrost_setup_hardware(struct panfrost_context *ctx) > struct panfrost_screen *screen = pan_screen(gallium->screen); > > panfrost_drm_allocate_slab(screen, &ctx->scratchpad, 64*4, false, 0, > 0, 0); > - panfrost_drm_allocate_slab(screen, &ctx->shaders, 4096, true, > PAN_ALLOCATE_EXECUTE, 0, 0); > panfrost_drm_allocate_slab(screen, &ctx->tiler_heap, 4096, false, > PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); > panfrost_drm_allocate_slab(screen, &ctx->tiler_dummy, 1, false, > PAN_ALLOCATE_INVISIBLE, 0, 0); > } > diff --git a/src/gallium/drivers/panfrost/pan_context.h > b/src/gallium/drivers/panfrost/pan_context.h > index e8d2417e0cb2..9a34e243b74b 100644 > --- a/src/gallium/drivers/panfrost/pan_context.h > +++ b/src/gallium/drivers/panfrost/pan_context.h > @@ -108,7 +108,6 @@ struct panfrost_context { > struct pipe_framebuffer_state pipe_framebuffer; > > struct panfrost_memory cmdstream_persistent; > - struct panfrost_memory shaders; > struct panfrost_memory scratchpad; > struct panfrost_memory tiler_heap; > struct panfrost_memory tiler_dummy; > @@ -224,6 +223,8 @@ struct panfrost_shader_state { > > /* Should we enable helper invocations */ > bool helper_invocations; > + > + struct panfrost_bo *bo; > }; > > /* A collection of varyings (the CSO) */ > diff --git a/src/gallium/drivers/panfrost/pan_drm.c > b/src/gallium/drivers/panfrost/pan_drm.c > index 42cf17503344..8ae541ae11b6 100644 > --- a/src/gallium/drivers/panfrost/pan_drm.c > +++ b/src/gallium/drivers/panfrost/pan_drm.c > @@ -88,6 +88,9 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, > size_t size, > /* Kernel will fail (confusingly) with EPERM otherwise */ > assert(size > 0); > > + /* To maximize BO cache usage, don't allocate tiny BOs */ > + size = MAX2(size, 4096); > + > unsigned translated_flags = 0; > > /* TODO: translate flags to kernel flags, if the kernel supports */ > @@ -120,6 +123,7 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, > size_t size, > bo->size = create_bo.size; > bo->gpu = create_bo.offset; > bo->gem_handle = create_bo.handle; > + bo->flags = flags; > } > > /* Only mmap now if we know we need to. For CPU-invisible buffers, we > @@ -285,7 +289,6 @@ panfrost_drm_submit_vs_fs_job(struct panfrost_context > *ctx, bool has_draws, bool > struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); > > /* TODO: Add here the transient pools */ > - panfrost_job_add_bo(job, ctx->shaders.bo); > panfrost_job_add_bo(job, ctx->scratchpad.bo); > panfrost_job_add_bo(job, ctx->tiler_heap.bo); > panfrost_job_add_bo(job, job->polygon_list); > -- > 2.20.1 >
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev