On Thu, Jan 31, 2013 at 4:29 PM, Jerome Glisse <j.gli...@gmail.com> wrote: > On Wed, Jan 30, 2013 at 10:35 PM, Marek Olšák <mar...@gmail.com> wrote: >> On Wed, Jan 30, 2013 at 6:14 PM, <j.gli...@gmail.com> wrote: >>> From: Jerome Glisse <jgli...@redhat.com> >>> >>> We are now seing cs that can go over the vram+gtt size to avoid >>> failing flush early cs that goes over 70% (gtt+vram) usage. 70% >>> is use to allow some fragmentation. >>> >>> Signed-off-by: Jerome Glisse <jgli...@redhat.com> >>> --- >>> src/gallium/drivers/r600/evergreen_state.c | 4 ++++ >>> src/gallium/drivers/r600/r600.h | 1 + >>> src/gallium/drivers/r600/r600_buffer.c | 1 + >>> src/gallium/drivers/r600/r600_hw_context.c | 12 ++++++++++++ >>> src/gallium/drivers/r600/r600_pipe.c | 3 +++ >>> src/gallium/drivers/r600/r600_pipe.h | 21 +++++++++++++++++++++ >>> src/gallium/drivers/r600/r600_state.c | 3 +++ >>> src/gallium/drivers/r600/r600_state_common.c | 17 ++++++++++++++++- >>> src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +++++++++++ >>> src/gallium/winsys/radeon/drm/radeon_winsys.h | 10 ++++++++++ >>> 10 files changed, 82 insertions(+), 1 deletion(-) >>> >>> diff --git a/src/gallium/drivers/r600/evergreen_state.c >>> b/src/gallium/drivers/r600/evergreen_state.c >>> index be1c427..84f8dce 100644 >>> --- a/src/gallium/drivers/r600/evergreen_state.c >>> +++ b/src/gallium/drivers/r600/evergreen_state.c >>> @@ -1668,6 +1668,8 @@ static void evergreen_set_framebuffer_state(struct >>> pipe_context *ctx, >>> surf = (struct r600_surface*)state->cbufs[i]; >>> rtex = (struct r600_texture*)surf->base.texture; >>> >>> + r600_context_add_resource_size(ctx, >>> state->cbufs[i]->texture); >>> + >>> if (!surf->color_initialized) { >>> evergreen_init_color_surface(rctx, surf); >>> } >>> @@ -1699,6 +1701,8 @@ static void evergreen_set_framebuffer_state(struct >>> pipe_context *ctx, >>> if (state->zsbuf) { >>> surf = (struct r600_surface*)state->zsbuf; >>> >>> + r600_context_add_resource_size(ctx, state->zsbuf->texture); >>> + >>> if (!surf->depth_initialized) { >>> evergreen_init_depth_surface(rctx, surf); >>> } >>> diff --git a/src/gallium/drivers/r600/r600.h >>> b/src/gallium/drivers/r600/r600.h >>> index a383c90..b9f7d3d 100644 >>> --- a/src/gallium/drivers/r600/r600.h >>> +++ b/src/gallium/drivers/r600/r600.h >>> @@ -50,6 +50,7 @@ struct r600_resource { >>> >>> /* Resource state. */ >>> unsigned domains; >>> + uint64_t size; >> >> Don't add this. Use r600_resource::buf::size instead, which is already >> initialized. >> >> >>> }; >>> >>> #define R600_BLOCK_MAX_BO 32 >>> diff --git a/src/gallium/drivers/r600/r600_buffer.c >>> b/src/gallium/drivers/r600/r600_buffer.c >>> index 6df0d91..92f549a 100644 >>> --- a/src/gallium/drivers/r600/r600_buffer.c >>> +++ b/src/gallium/drivers/r600/r600_buffer.c >>> @@ -250,6 +250,7 @@ bool r600_init_resource(struct r600_screen *rscreen, >>> break; >>> } >>> >>> + res->size = size; >>> res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, >>> use_reusable_pool, >>> initial_domain); >>> diff --git a/src/gallium/drivers/r600/r600_hw_context.c >>> b/src/gallium/drivers/r600/r600_hw_context.c >>> index ebafd97..44d3b4d 100644 >>> --- a/src/gallium/drivers/r600/r600_hw_context.c >>> +++ b/src/gallium/drivers/r600/r600_hw_context.c >>> @@ -359,6 +359,16 @@ out_err: >>> void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, >>> boolean count_draw_in) >>> { >>> + if (!ctx->ws->cs_memory_below_limit(ctx->rings.gfx.cs, ctx->vram, >>> ctx->gtt)) { >>> + ctx->gtt = 0; >>> + ctx->vram = 0; >>> + ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC); >>> + return; >>> + } >>> + /* all will be accounted once relocation are emited */ >>> + ctx->gtt = 0; >>> + ctx->vram = 0; >>> + >>> /* The number of dwords we already used in the CS so far. */ >>> num_dw += ctx->rings.gfx.cs->cdw; >>> >>> @@ -784,6 +794,8 @@ void r600_begin_new_cs(struct r600_context *ctx) >>> >>> ctx->pm4_dirty_cdwords = 0; >>> ctx->flags = 0; >>> + ctx->gtt = 0; >>> + ctx->vram = 0; >>> >>> /* Begin a new CS. */ >>> r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd); >>> diff --git a/src/gallium/drivers/r600/r600_pipe.c >>> b/src/gallium/drivers/r600/r600_pipe.c >>> index a59578d..cb50cfe 100644 >>> --- a/src/gallium/drivers/r600/r600_pipe.c >>> +++ b/src/gallium/drivers/r600/r600_pipe.c >>> @@ -333,6 +333,9 @@ static struct pipe_context *r600_create_context(struct >>> pipe_screen *screen, void >>> rctx->chip_class = rscreen->chip_class; >>> rctx->keep_tiling_flags = rscreen->info.drm_minor >= 12; >>> >>> + rctx->gtt = 0; >>> + rctx->vram = 0; >> >> There is no reason to initialize anything to 0 in context_create. The >> whole context structure is calloc'd. >> >> >>> + >>> LIST_INITHEAD(&rctx->active_nontimer_queries); >>> LIST_INITHEAD(&rctx->dirty); >>> LIST_INITHEAD(&rctx->enable_list); >>> diff --git a/src/gallium/drivers/r600/r600_pipe.h >>> b/src/gallium/drivers/r600/r600_pipe.h >>> index 3ff42d3..beb4b33 100644 >>> --- a/src/gallium/drivers/r600/r600_pipe.h >>> +++ b/src/gallium/drivers/r600/r600_pipe.h >>> @@ -447,6 +447,10 @@ struct r600_context { >>> unsigned backend_mask; >>> unsigned max_db; /* for OQ */ >>> >>> + /* current unaccounted memory usage */ >>> + uint64_t vram; >>> + uint64_t gtt; >>> + >>> /* Miscellaneous state objects. */ >>> void *custom_dsa_flush; >>> void *custom_blend_resolve; >>> @@ -998,4 +1002,21 @@ static INLINE unsigned u_max_layer(struct >>> pipe_resource *r, unsigned level) >>> } >>> } >>> >>> +static INLINE void r600_context_add_resource_size(struct pipe_context >>> *ctx, struct pipe_resource *r) >>> +{ >>> + struct r600_context *rctx = (struct r600_context *)ctx; >>> + struct r600_resource *rr = (struct r600_resource *)r; >>> + >>> + if (r == NULL) { >>> + return; >>> + } >> >> Do you really need to check for NULL here? An assertion would be sufficient. >> >> We can also skip this if the buffer is already in the relocation list. >> We have a query for that: rctx->ws->cs_is_buffer_referenced. > > Yes there is some case where i was getting null at some point. I don't > want to have a precise accounting but rather a good estimate of it and > i don't want to waste cpu cycle checking if a buffer is already in > relocation list. More over at each draw time the counter are reseted > and the value computed by the relocation process is taken into account > so often the estimation is pretty good. In my testing (where i over > allocate by a random M amount each buffer) the margin in various gl > (heaven, reaction, xonotic, unreal, vdrift, ....) was always in 10% of > the limit (below or above), given there is room for fragmentation it > was working ok. > > So i think this gave a very good estimate and a quick one. > >> >>> + >>> + if (rr->domains & RADEON_DOMAIN_GTT) { >>> + rctx->gtt += rr->size; >>> + } >>> + if (rr->domains & RADEON_DOMAIN_VRAM) { >>> + rctx->vram += rr->size; >>> + } >>> +} >>> + >>> #endif >>> diff --git a/src/gallium/drivers/r600/r600_state.c >>> b/src/gallium/drivers/r600/r600_state.c >>> index 6b4b2c3..58b5aaa 100644 >>> --- a/src/gallium/drivers/r600/r600_state.c >>> +++ b/src/gallium/drivers/r600/r600_state.c >>> @@ -1544,6 +1544,7 @@ static void r600_set_framebuffer_state(struct >>> pipe_context *ctx, >>> >>> surf = (struct r600_surface*)state->cbufs[i]; >>> rtex = (struct r600_texture*)surf->base.texture; >>> + r600_context_add_resource_size(ctx, >>> state->cbufs[i]->texture); >>> >>> if (!surf->color_initialized || force_cmask_fmask) { >>> r600_init_color_surface(rctx, surf, >>> force_cmask_fmask); >>> @@ -1576,6 +1577,8 @@ static void r600_set_framebuffer_state(struct >>> pipe_context *ctx, >>> if (state->zsbuf) { >>> surf = (struct r600_surface*)state->zsbuf; >>> >>> + r600_context_add_resource_size(ctx, state->zsbuf->texture); >>> + >>> if (!surf->depth_initialized) { >>> r600_init_depth_surface(rctx, surf); >>> } >>> diff --git a/src/gallium/drivers/r600/r600_state_common.c >>> b/src/gallium/drivers/r600/r600_state_common.c >>> index 9386f61..fce2537 100644 >>> --- a/src/gallium/drivers/r600/r600_state_common.c >>> +++ b/src/gallium/drivers/r600/r600_state_common.c >>> @@ -479,7 +479,8 @@ static void r600_set_index_buffer(struct pipe_context >>> *ctx, >>> >>> if (ib) { >>> pipe_resource_reference(&rctx->index_buffer.buffer, >>> ib->buffer); >>> - memcpy(&rctx->index_buffer, ib, sizeof(*ib)); >>> + memcpy(&rctx->index_buffer, ib, sizeof(*ib)); >>> + r600_context_add_resource_size(ctx, ib->buffer); >>> } else { >>> pipe_resource_reference(&rctx->index_buffer.buffer, NULL); >>> } >>> @@ -516,6 +517,7 @@ static void r600_set_vertex_buffers(struct pipe_context >>> *ctx, >>> vb[i].buffer_offset = >>> input[i].buffer_offset; >>> >>> pipe_resource_reference(&vb[i].buffer, input[i].buffer); >>> new_buffer_mask |= 1 << i; >>> + r600_context_add_resource_size(ctx, >>> input[i].buffer); >>> } else { >>> >>> pipe_resource_reference(&vb[i].buffer, NULL); >>> disable_mask |= 1 << i; >>> @@ -613,6 +615,7 @@ static void r600_set_sampler_views(struct pipe_context >>> *pipe, unsigned shader, >>> >>> pipe_sampler_view_reference((struct >>> pipe_sampler_view **)&dst->views.views[i], views[i]); >>> new_mask |= 1 << i; >>> + r600_context_add_resource_size(pipe, >>> views[i]->texture); >>> } else { >>> pipe_sampler_view_reference((struct >>> pipe_sampler_view **)&dst->views.views[i], NULL); >>> disable_mask |= 1 << i; >>> @@ -806,6 +809,10 @@ static void r600_bind_ps_state(struct pipe_context >>> *ctx, void *state) >>> rctx->ps_shader = (struct r600_pipe_shader_selector *)state; >>> r600_context_pipe_state_set(rctx, >>> &rctx->ps_shader->current->rstate); >>> >>> + if (rctx->ps_shader->current) { >> >> This conditional is useless. rctx->ps_shader->current is never NULL at >> this point. >> >> >>> + r600_context_add_resource_size(ctx, (struct pipe_resource >>> *)rctx->ps_shader->current->bo); >>> + } >>> + >>> if (rctx->chip_class <= R700) { >>> bool multiwrite = >>> rctx->ps_shader->current->shader.fs_write_all; >>> >>> @@ -835,6 +842,10 @@ static void r600_bind_vs_state(struct pipe_context >>> *ctx, void *state) >>> if (state) { >>> r600_context_pipe_state_set(rctx, >>> &rctx->vs_shader->current->rstate); >>> >>> + if (rctx->ps_shader->current) { >>> + r600_context_add_resource_size(ctx, (struct >>> pipe_resource *)rctx->ps_shader->current->bo); >>> + } >> >> Maybe you wanted to use vs_shader instead? And vs_shader->current is >> never NULL here too. >> >> >>> + >>> /* Update clip misc state. */ >>> if (rctx->vs_shader->current->pa_cl_vs_out_cntl != >>> rctx->clip_misc_state.pa_cl_vs_out_cntl || >>> rctx->vs_shader->current->shader.clip_dist_write != >>> rctx->clip_misc_state.clip_dist_write) { >>> @@ -938,10 +949,13 @@ static void r600_set_constant_buffer(struct >>> pipe_context *ctx, uint shader, uint >>> } else { >>> u_upload_data(rctx->uploader, 0, >>> input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer); >>> } >>> + /* account it in gtt */ >>> + rctx->gtt += input->buffer_size; >> >> Are you sure about this? The buffer is usually much larger than >> input->buffer_size and the kernel only cares about the whole buffer. >> >> The rest looks good. >> >> Marek > > As stated above i am looking at an estimate and each draw call will > account for the proper index buffer size so in the end my testing > showed i always had a very close estimate.
Sounds good. Some comment in the code wouldn't hurt, so that other people don't consider it as a bug. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev