Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu>
On Sat, Jan 2, 2016 at 12:09 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > Instead of iterating over all the buffer resources looking for coherent > buffers, we keep track of a context-wide count. This will save some > iterations (and CPU cycles) in 99.99% case because usually coherent > buffers are not so used. > > Changes from v4: > - fix flag for textures > > Changes from v3: > - check if views[i] and views[i]->texture are not NULL > - fix use of nv50->textures_coherent > - check if vb[i].buffer is not NULL > - clear out the flag for UBO > > Changes from v2: > - forgot to apply some changes for nv50 (texture/vertex bufs) > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/gallium/drivers/nouveau/nv50/nv50_context.h | 3 ++ > src/gallium/drivers/nouveau/nv50/nv50_state.c | 25 +++++++++++++++ > src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 42 > +++++-------------------- > src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 3 ++ > src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 36 +++++++++++++++++++++ > src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 41 +++++------------------- > 6 files changed, 82 insertions(+), 68 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h > b/src/gallium/drivers/nouveau/nv50/nv50_context.h > index 2cebcd9..712d00e 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h > +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h > @@ -134,9 +134,11 @@ struct nv50_context { > struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; > uint16_t constbuf_dirty[3]; > uint16_t constbuf_valid[3]; > + uint16_t constbuf_coherent[3]; > > struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; > unsigned num_vtxbufs; > + uint32_t vtxbufs_coherent; > struct pipe_index_buffer idxbuf; > uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ > uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ > @@ -148,6 +150,7 @@ struct nv50_context { > > struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS]; > unsigned num_textures[3]; > + uint32_t textures_coherent[3]; > struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; > unsigned num_samplers[3]; > > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c > b/src/gallium/drivers/nouveau/nv50/nv50_state.c > index de65597..cb04043 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c > @@ -664,6 +664,17 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, > int s, > if (old) > nv50_screen_tic_unlock(nv50->screen, old); > > + if (views[i] && views[i]->texture) { > + struct pipe_resource *res = views[i]->texture; > + if (res->target == PIPE_BUFFER && > + (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) > + nv50->textures_coherent[s] |= 1 << i; > + else > + nv50->textures_coherent[s] &= ~(1 << i); > + } else { > + nv50->textures_coherent[s] &= ~(1 << i); > + } > + > pipe_sampler_view_reference(&nv50->textures[s][i], views[i]); > } > > @@ -847,13 +858,19 @@ nv50_set_constant_buffer(struct pipe_context *pipe, > uint shader, uint index, > nv50->constbuf[s][i].u.data = cb->user_buffer; > nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000); > nv50->constbuf_valid[s] |= 1 << i; > + nv50->constbuf_coherent[s] &= ~(1 << i); > } else > if (res) { > nv50->constbuf[s][i].offset = cb->buffer_offset; > nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), > 0x10000); > nv50->constbuf_valid[s] |= 1 << i; > + if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) > + nv50->constbuf_coherent[s] |= 1 << i; > + else > + nv50->constbuf_coherent[s] &= ~(1 << i); > } else { > nv50->constbuf_valid[s] &= ~(1 << i); > + nv50->constbuf_coherent[s] &= ~(1 << i); > } > nv50->constbuf_dirty[s] |= 1 << i; > > @@ -1003,6 +1020,7 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, > if (!vb) { > nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot); > nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot); > + nv50->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot); > return; > } > > @@ -1015,9 +1033,16 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, > nv50->vbo_constant |= 1 << dst_index; > else > nv50->vbo_constant &= ~(1 << dst_index); > + nv50->vtxbufs_coherent &= ~(1 << dst_index); > } else { > nv50->vbo_user &= ~(1 << dst_index); > nv50->vbo_constant &= ~(1 << dst_index); > + > + if (vb[i].buffer && > + vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) > + nv50->vtxbufs_coherent |= (1 << dst_index); > + else > + nv50->vtxbufs_coherent &= ~(1 << dst_index); > } > } > } > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > index 2d1aa6a..60fa2bc 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > @@ -765,7 +765,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct > pipe_draw_info *info) > struct nv50_context *nv50 = nv50_context(pipe); > struct nouveau_pushbuf *push = nv50->base.pushbuf; > bool tex_dirty = false; > - int i, s; > + int s; > > /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ > nv50->vb_elt_first = info->min_index + info->index_bias; > @@ -794,27 +794,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct > pipe_draw_info *info) > > push->kick_notify = nv50_draw_vbo_kick_notify; > > - /* TODO: Instead of iterating over all the buffer resources looking for > - * coherent buffers, keep track of a context-wide count. > - */ > for (s = 0; s < 3 && !nv50->cb_dirty; ++s) { > - uint32_t valid = nv50->constbuf_valid[s]; > - > - while (valid && !nv50->cb_dirty) { > - const unsigned i = ffs(valid) - 1; > - struct pipe_resource *res; > - > - valid &= ~(1 << i); > - if (nv50->constbuf[s][i].user) > - continue; > - > - res = nv50->constbuf[s][i].u.buf; > - if (!res) > - continue; > - > - if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) > - nv50->cb_dirty = true; > - } > + if (nv50->constbuf_coherent[s]) > + nv50->cb_dirty = true; > } > > /* If there are any coherent constbufs, flush the cache */ > @@ -825,15 +807,10 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct > pipe_draw_info *info) > } > > for (s = 0; s < 3 && !tex_dirty; ++s) { > - for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) { > - if (!nv50->textures[s][i] || > - nv50->textures[s][i]->texture->target != PIPE_BUFFER) > - continue; > - if (nv50->textures[s][i]->texture->flags & > - PIPE_RESOURCE_FLAG_MAP_COHERENT) > - tex_dirty = true; > - } > + if (nv50->textures_coherent[s]) > + tex_dirty = true; > } > + > if (tex_dirty) { > BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); > PUSH_DATA (push, 0x20); > @@ -853,12 +830,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct > pipe_draw_info *info) > PUSH_DATA (push, info->start_instance); > } > > - for (i = 0; i < nv50->num_vtxbufs && !nv50->base.vbo_dirty; ++i) { > - if (!nv50->vtxbuf[i].buffer) > - continue; > - if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) > - nv50->base.vbo_dirty = true; > - } > + nv50->base.vbo_dirty |= !!nv50->vtxbufs_coherent; > > if (nv50->base.vbo_dirty) { > BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1); > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > index 39b73ec..1219548 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > @@ -134,10 +134,12 @@ struct nvc0_context { > struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS]; > uint16_t constbuf_dirty[6]; > uint16_t constbuf_valid[6]; > + uint16_t constbuf_coherent[6]; > bool cb_dirty; > > struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; > unsigned num_vtxbufs; > + uint32_t vtxbufs_coherent; > struct pipe_index_buffer idxbuf; > uint32_t constant_vbos; > uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ > @@ -149,6 +151,7 @@ struct nvc0_context { > struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS]; > unsigned num_textures[6]; > uint32_t textures_dirty[6]; > + uint32_t textures_coherent[6]; > struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS]; > unsigned num_samplers[6]; > uint16_t samplers_dirty[6]; > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > index 41a824a..24a6c22 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > @@ -554,6 +554,17 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, > int s, > continue; > nvc0->textures_dirty[s] |= 1 << i; > > + if (views[i] && views[i]->texture) { > + struct pipe_resource *res = views[i]->texture; > + if (res->target == PIPE_BUFFER && > + (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) > + nvc0->textures_coherent[s] |= 1 << i; > + else > + nvc0->textures_coherent[s] &= ~(1 << i); > + } else { > + nvc0->textures_coherent[s] &= ~(1 << i); > + } > + > if (old) { > nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); > nvc0_screen_tic_unlock(nvc0->screen, old); > @@ -596,6 +607,17 @@ nvc0_stage_set_sampler_views_range(struct nvc0_context > *nvc0, const unsigned s, > continue; > nvc0->textures_dirty[s] |= 1 << i; > > + if (views[p] && views[p]->texture) { > + struct pipe_resource *res = views[p]->texture; > + if (res->target == PIPE_BUFFER && > + (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) > + nvc0->textures_coherent[s] |= 1 << i; > + else > + nvc0->textures_coherent[s] &= ~(1 << i); > + } else { > + nvc0->textures_coherent[s] &= ~(1 << i); > + } > + > if (nvc0->textures[s][i]) { > struct nv50_tic_entry *old = > nv50_tic_entry(nvc0->textures[s][i]); > nouveau_bufctx_reset(bctx, bin + i); > @@ -842,14 +864,20 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, > uint shader, uint index, > nvc0->constbuf[s][i].u.data = cb->user_buffer; > nvc0->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000); > nvc0->constbuf_valid[s] |= 1 << i; > + nvc0->constbuf_coherent[s] &= ~(1 << i); > } else > if (cb) { > nvc0->constbuf[s][i].offset = cb->buffer_offset; > nvc0->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), > 0x10000); > nvc0->constbuf_valid[s] |= 1 << i; > + if (res && res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) > + nvc0->constbuf_coherent[s] |= 1 << i; > + else > + nvc0->constbuf_coherent[s] &= ~(1 << i); > } > else { > nvc0->constbuf_valid[s] &= ~(1 << i); > + nvc0->constbuf_coherent[s] &= ~(1 << i); > } > } > > @@ -1009,6 +1037,7 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, > if (!vb) { > nvc0->vbo_user &= ~(((1ull << count) - 1) << start_slot); > nvc0->constant_vbos &= ~(((1ull << count) - 1) << start_slot); > + nvc0->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot); > return; > } > > @@ -1021,9 +1050,16 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, > nvc0->constant_vbos |= 1 << dst_index; > else > nvc0->constant_vbos &= ~(1 << dst_index); > + nvc0->vtxbufs_coherent &= ~(1 << dst_index); > } else { > nvc0->vbo_user &= ~(1 << dst_index); > nvc0->constant_vbos &= ~(1 << dst_index); > + > + if (vb[i].buffer && > + vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) > + nvc0->vtxbufs_coherent |= (1 << dst_index); > + else > + nvc0->vtxbufs_coherent &= ~(1 << dst_index); > } > } > } > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c > index 235b1af..2517533 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c > @@ -871,7 +871,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct > pipe_draw_info *info) > { > struct nvc0_context *nvc0 = nvc0_context(pipe); > struct nouveau_pushbuf *push = nvc0->base.pushbuf; > - int i, s; > + int s; > > /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ > nvc0->vb_elt_first = info->min_index + info->index_bias; > @@ -922,27 +922,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct > pipe_draw_info *info) > > push->kick_notify = nvc0_draw_vbo_kick_notify; > > - /* TODO: Instead of iterating over all the buffer resources looking for > - * coherent buffers, keep track of a context-wide count. > - */ > for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) { > - uint32_t valid = nvc0->constbuf_valid[s]; > - > - while (valid && !nvc0->cb_dirty) { > - const unsigned i = ffs(valid) - 1; > - struct pipe_resource *res; > - > - valid &= ~(1 << i); > - if (nvc0->constbuf[s][i].user) > - continue; > - > - res = nvc0->constbuf[s][i].u.buf; > - if (!res) > - continue; > - > - if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) > - nvc0->cb_dirty = true; > - } > + if (nvc0->constbuf_coherent[s]) > + nvc0->cb_dirty = true; > } > > if (nvc0->cb_dirty) { > @@ -951,14 +933,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct > pipe_draw_info *info) > } > > for (s = 0; s < 5; ++s) { > + if (!nvc0->textures_coherent[s]) > + continue; > + > for (int i = 0; i < nvc0->num_textures[s]; ++i) { > struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); > - struct pipe_resource *res; > - if (!tic) > - continue; > - res = nvc0->textures[s][i]->texture; > - if (res->target != PIPE_BUFFER || > - !(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) > + if (!(nvc0->textures_coherent[s] & (1 << i))) > continue; > > BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); > @@ -984,12 +964,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct > pipe_draw_info *info) > PUSH_DATA (push, info->start_instance); > } > > - for (i = 0; i < nvc0->num_vtxbufs && !nvc0->base.vbo_dirty; ++i) { > - if (!nvc0->vtxbuf[i].buffer) > - continue; > - if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) > - nvc0->base.vbo_dirty = true; > - } > + nvc0->base.vbo_dirty |= !!nvc0->vtxbufs_coherent; > > if (!nvc0->base.vbo_dirty && nvc0->idxbuf.buffer && > nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) > -- > 2.6.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev