Fixes the test on Kepler Tested-by: Nick Sarnie <commendsar...@gmail.com>
On Sat, Jan 30, 2016 at 10:10 AM, Ilia Mirkin <imir...@alum.mit.edu> wrote: > It appears that the nvidia render engine is quite picky when it comes to > linear surfaces. It doesn't like non-256-byte aligned offsets, and > apparently doesn't even do non-256-byte strides. > > This makes arb_clear_buffer_object-unaligned pass on both nv50 and nvc0. > > As a side-effect this also allows RGB32 clears to work via GPU data > upload instead of synchronizing the buffer to the CPU (nvc0 only). > > Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> > Cc: mesa-sta...@lists.freedesktop.org > --- > src/gallium/drivers/nouveau/nv50/nv50_surface.c | 190 ++++++++++++++------ > src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 229 > ++++++++++++++++++------ > 2 files changed, 307 insertions(+), 112 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c > b/src/gallium/drivers/nouveau/nv50/nv50_surface.c > index 86be1b4..618c39c 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c > @@ -595,6 +595,82 @@ nv50_clear(struct pipe_context *pipe, unsigned > buffers, > } > > static void > +nv50_clear_buffer_push(struct pipe_context *pipe, > + struct pipe_resource *res, > + unsigned offset, unsigned size, > + const void *data, int data_size) > +{ > + struct nv50_context *nv50 = nv50_context(pipe); > + struct nouveau_pushbuf *push = nv50->base.pushbuf; > + struct nv04_resource *buf = nv04_resource(res); > + unsigned count = (size + 3) / 4; > + unsigned xcoord = offset & 0xff; > + unsigned tmp, i; > + > + if (data_size == 1) { > + tmp = *(unsigned char *)data; > + tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp; > + data = &tmp; > + data_size = 4; > + } else if (data_size == 2) { > + tmp = *(unsigned short *)data; > + tmp = (tmp << 16) | tmp; > + data = &tmp; > + data_size = 4; > + } > + > + unsigned data_words = data_size / 4; > + > + nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | > NOUVEAU_BO_WR); > + nouveau_pushbuf_bufctx(push, nv50->bufctx); > + nouveau_pushbuf_validate(push); > + > + offset &= ~0xff; > + > + BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); > + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); > + PUSH_DATA (push, 1); > + BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); > + PUSH_DATA (push, 262144); > + PUSH_DATA (push, 65536); > + PUSH_DATA (push, 1); > + PUSH_DATAh(push, buf->address + offset); > + PUSH_DATA (push, buf->address + offset); > + BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); > + BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); > + PUSH_DATA (push, size); > + PUSH_DATA (push, 1); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, 1); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, 1); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, xcoord); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, 0); > + > + while (count) { > + unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / > data_words; > + unsigned nr = nr_data * data_words; > + > + BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr); > + for (i = 0; i < nr_data; i++) > + PUSH_DATAp(push, data, data_words); > + > + count -= nr; > + } > + > + if (buf->mm) { > + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); > + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); > + } > + > + nouveau_bufctx_reset(nv50->bufctx, 0); > +} > + > +static void > nv50_clear_buffer(struct pipe_context *pipe, > struct pipe_resource *res, > unsigned offset, unsigned size, > @@ -643,77 +719,85 @@ nv50_clear_buffer(struct pipe_context *pipe, > > assert(size % data_size == 0); > > + if (offset & 0xff) { > + unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset); > + assert(fixup_size % data_size == 0); > + nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, > data_size); > + offset += fixup_size; > + size -= fixup_size; > + if (!size) > + return; > + } > + > elements = size / data_size; > height = (elements + 8191) / 8192; > width = elements / height; > + width &= ~0xff; > > - BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4); > - PUSH_DATAf(push, color.f[0]); > - PUSH_DATAf(push, color.f[1]); > - PUSH_DATAf(push, color.f[2]); > - PUSH_DATAf(push, color.f[3]); > + if (width) { > + BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4); > + PUSH_DATAf(push, color.f[0]); > + PUSH_DATAf(push, color.f[1]); > + PUSH_DATAf(push, color.f[2]); > + PUSH_DATAf(push, color.f[3]); > > - if (nouveau_pushbuf_space(push, 32, 1, 0)) > - return; > + if (nouveau_pushbuf_space(push, 32, 1, 0)) > + return; > > - PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR); > + PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR); > > - BEGIN_NV04(push, NV50_3D(SCREEN_SCISSOR_HORIZ), 2); > - PUSH_DATA (push, width << 16); > - PUSH_DATA (push, height << 16); > - BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2); > - PUSH_DATA (push, 8192 << 16); > - PUSH_DATA (push, 8192 << 16); > - nv50->scissors_dirty |= 1; > + BEGIN_NV04(push, NV50_3D(SCREEN_SCISSOR_HORIZ), 2); > + PUSH_DATA (push, width << 16); > + PUSH_DATA (push, height << 16); > + BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2); > + PUSH_DATA (push, 8192 << 16); > + PUSH_DATA (push, 8192 << 16); > + nv50->scissors_dirty |= 1; > > - BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); > - PUSH_DATA (push, 1); > - BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5); > - PUSH_DATAh(push, buf->bo->offset + buf->offset + offset); > - PUSH_DATA (push, buf->bo->offset + buf->offset + offset); > - PUSH_DATA (push, nv50_format_table[dst_fmt].rt); > - PUSH_DATA (push, 0); > - PUSH_DATA (push, 0); > - BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); > - PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size)); > - PUSH_DATA (push, height); > - BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); > - PUSH_DATA (push, 0); > - BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1); > - PUSH_DATA (push, 0); > + BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); > + PUSH_DATA (push, 1); > + BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5); > + PUSH_DATAh(push, buf->address + offset); > + PUSH_DATA (push, buf->address + offset); > + PUSH_DATA (push, nv50_format_table[dst_fmt].rt); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, 0); > + BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); > + PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size)); > + PUSH_DATA (push, height); > + BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); > + PUSH_DATA (push, 0); > + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1); > + PUSH_DATA (push, 0); > > - /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ > + /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ > > - BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2); > - PUSH_DATA (push, (width << 16)); > - PUSH_DATA (push, (height << 16)); > + BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2); > + PUSH_DATA (push, (width << 16)); > + PUSH_DATA (push, (height << 16)); > > - BEGIN_NV04(push, NV50_3D(COND_MODE), 1); > - PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); > + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); > + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); > + > + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); > + PUSH_DATA (push, 0x3c); > > - BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); > - PUSH_DATA (push, 0x3c); > + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); > + PUSH_DATA (push, nv50->cond_condmode); > + > + if (buf->mm) { > + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); > + nouveau_fence_ref(nv50->screen->base.fence.current, > &buf->fence_wr); > + } > + } > > if (width * height != elements) { > offset += width * height * data_size; > width = elements - width * height; > - height = 1; > - BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2); > - PUSH_DATAh(push, buf->bo->offset + buf->offset + offset); > - PUSH_DATA (push, buf->bo->offset + buf->offset + offset); > - BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); > - PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size)); > - PUSH_DATA (push, height); > - BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); > - PUSH_DATA (push, 0x3c); > + nv50_clear_buffer_push(pipe, res, offset, width * data_size, > + data, data_size); > } > > - BEGIN_NV04(push, NV50_3D(COND_MODE), 1); > - PUSH_DATA (push, nv50->cond_condmode); > - > - nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); > - nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); > - > nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; > } > > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c > index 4e43c4e..be4c531 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c > @@ -357,27 +357,132 @@ nvc0_clear_render_target(struct pipe_context *pipe, > } > > static void > -nvc0_clear_buffer_cpu(struct pipe_context *pipe, > - struct pipe_resource *res, > - unsigned offset, unsigned size, > - const void *data, int data_size) > +nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe, > + struct pipe_resource *res, > + unsigned offset, unsigned size, > + const void *data, int data_size) > { > + struct nvc0_context *nvc0 = nvc0_context(pipe); > + struct nouveau_pushbuf *push = nvc0->base.pushbuf; > struct nv04_resource *buf = nv04_resource(res); > - struct pipe_transfer *pt; > - struct pipe_box box; > - unsigned elements, i; > + unsigned i; > > - elements = size / data_size; > + nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | > NOUVEAU_BO_WR); > + nouveau_pushbuf_bufctx(push, nvc0->bufctx); > + nouveau_pushbuf_validate(push); > + > + unsigned count = (size + 3) / 4; > + unsigned data_words = data_size / 4; > + > + while (count) { > + unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / > data_words; > + unsigned nr = nr_data * data_words; > > - u_box_1d(offset, size, &box); > + if (!PUSH_SPACE(push, nr + 9)) > + break; > + > + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); > + PUSH_DATAh(push, buf->address + offset); > + PUSH_DATA (push, buf->address + offset); > + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); > + PUSH_DATA (push, MIN2(size, nr * 4)); > + PUSH_DATA (push, 1); > + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); > + PUSH_DATA (push, 0x100111); > > - uint8_t *map = buf->vtbl->transfer_map(pipe, res, 0, > PIPE_TRANSFER_WRITE, > - &box, &pt); > + /* must not be interrupted (trap on QUERY fence, 0x50 works > however) */ > + BEGIN_NIC0(push, NVC0_M2MF(DATA), nr); > + for (i = 0; i < nr_data; i++) > + PUSH_DATAp(push, data, data_words); > > - for (i = 0; i < elements; ++i) > - memcpy(&map[i*data_size], data, data_size); > + count -= nr; > + offset += nr * 4; > + size -= nr * 4; > + } > + > + if (buf->mm) { > + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); > + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); > + } > + > + nouveau_bufctx_reset(nvc0->bufctx, 0); > +} > + > +static void > +nvc0_clear_buffer_push_nve4(struct pipe_context *pipe, > + struct pipe_resource *res, > + unsigned offset, unsigned size, > + const void *data, int data_size) > +{ > + struct nvc0_context *nvc0 = nvc0_context(pipe); > + struct nouveau_pushbuf *push = nvc0->base.pushbuf; > + struct nv04_resource *buf = nv04_resource(res); > + unsigned i; > + > + nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | > NOUVEAU_BO_WR); > + nouveau_pushbuf_bufctx(push, nvc0->bufctx); > + nouveau_pushbuf_validate(push); > + > + unsigned count = (size + 3) / 4; > + unsigned data_words = data_size / 4; > + > + while (count) { > + unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / > data_words; > + unsigned nr = nr_data * data_words; > + > + if (!PUSH_SPACE(push, nr + 10)) > + break; > + > + BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2); > + PUSH_DATAh(push, buf->address + offset); > + PUSH_DATA (push, buf->address + offset); > + BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2); > + PUSH_DATA (push, MIN2(size, nr * 4)); > + PUSH_DATA (push, 1); > + /* must not be interrupted (trap on QUERY fence, 0x50 works > however) */ > + BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), nr + 1); > + PUSH_DATA (push, 0x1001); > + for (i = 0; i < nr_data; i++) > + PUSH_DATAp(push, data, data_words); > + > + count -= nr; > + offset += nr * 4; > + size -= nr * 4; > + } > + > + if (buf->mm) { > + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); > + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); > + } > + > + nouveau_bufctx_reset(nvc0->bufctx, 0); > +} > + > +static void > +nvc0_clear_buffer_push(struct pipe_context *pipe, > + struct pipe_resource *res, > + unsigned offset, unsigned size, > + const void *data, int data_size) > +{ > + struct nvc0_context *nvc0 = nvc0_context(pipe); > + unsigned tmp; > + > + if (data_size == 1) { > + tmp = *(unsigned char *)data; > + tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp; > + data = &tmp; > + data_size = 4; > + } else if (data_size == 2) { > + tmp = *(unsigned short *)data; > + tmp = (tmp << 16) | tmp; > + data = &tmp; > + data_size = 4; > + } > > - buf->vtbl->transfer_unmap(pipe, pt); > + if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) > + nvc0_clear_buffer_push_nvc0(pipe, res, offset, size, data, > data_size); > + else > + nvc0_clear_buffer_push_nve4(pipe, res, offset, size, data, > data_size); > } > > static void > @@ -402,10 +507,8 @@ nvc0_clear_buffer(struct pipe_context *pipe, > memcpy(&color.ui, data, 16); > break; > case 12: > - /* This doesn't work, RGB32 is not a valid RT format. > - * dst_fmt = PIPE_FORMAT_R32G32B32_UINT; > - * memcpy(&color.ui, data, 12); > - * memset(&color.ui[3], 0, 4); > + /* RGB32 is not a valid RT format. This will be handled by the > pushbuf > + * uploader. > */ > break; > case 8: > @@ -437,67 +540,75 @@ nvc0_clear_buffer(struct pipe_context *pipe, > assert(size % data_size == 0); > > if (data_size == 12) { > - /* TODO: Find a way to do this with the GPU! */ > - nvc0_clear_buffer_cpu(pipe, res, offset, size, data, data_size); > + nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size); > return; > } > > + if (offset & 0xff) { > + unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset); > + assert(fixup_size % data_size == 0); > + nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, > data_size); > + offset += fixup_size; > + size -= fixup_size; > + if (!size) > + return; > + } > + > elements = size / data_size; > height = (elements + 16383) / 16384; > width = elements / height; > + width &= ~0xff; > > - if (!PUSH_SPACE(push, 40)) > - return; > - > - PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR); > - > - BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4); > - PUSH_DATAf(push, color.f[0]); > - PUSH_DATAf(push, color.f[1]); > - PUSH_DATAf(push, color.f[2]); > - PUSH_DATAf(push, color.f[3]); > - BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); > - PUSH_DATA (push, width << 16); > - PUSH_DATA (push, height << 16); > - > - IMMED_NVC0(push, NVC0_3D(RT_CONTROL), 1); > - > - BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9); > - PUSH_DATAh(push, buf->address + offset); > - PUSH_DATA (push, buf->address + offset); > - PUSH_DATA (push, width * data_size); > - PUSH_DATA (push, height); > - PUSH_DATA (push, nvc0_format_table[dst_fmt].rt); > - PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR); > - PUSH_DATA (push, 1); > - PUSH_DATA (push, 0); > - PUSH_DATA (push, 0); > - > - IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0); > - IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0); > + if (width) { > + if (!PUSH_SPACE(push, 40)) > + return; > > - IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); > + PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR); > > - IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c); > + BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4); > + PUSH_DATAf(push, color.f[0]); > + PUSH_DATAf(push, color.f[1]); > + PUSH_DATAf(push, color.f[2]); > + PUSH_DATAf(push, color.f[3]); > + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); > + PUSH_DATA (push, width << 16); > + PUSH_DATA (push, height << 16); > > - if (width * height != elements) { > - offset += width * height * data_size; > - width = elements - width * height; > - height = 1; > + IMMED_NVC0(push, NVC0_3D(RT_CONTROL), 1); > > - BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 4); > + BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9); > PUSH_DATAh(push, buf->address + offset); > PUSH_DATA (push, buf->address + offset); > PUSH_DATA (push, width * data_size); > PUSH_DATA (push, height); > + PUSH_DATA (push, nvc0_format_table[dst_fmt].rt); > + PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR); > + PUSH_DATA (push, 1); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, 0); > + > + IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0); > + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0); > + > + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); > > IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c); > + > + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); > + > + if (buf->mm) { > + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); > + nouveau_fence_ref(nvc0->screen->base.fence.current, > &buf->fence_wr); > + } > } > > - IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); > + if (width * height != elements) { > + offset += width * height * data_size; > + width = elements - width * height; > + nvc0_clear_buffer_push(pipe, res, offset, width * data_size, > + data, data_size); > + } > > - nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); > - nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); > nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; > } > > -- > 2.4.10 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev