Hello, You have defined some "define" but you don't use it everywhere, for instance : cs->buf[cs->cdw++] = PKT3(0x24 /* PKT3_DRAW_INDIRECT */, 1, rctx->b.predicate_drawing); instead of simply : cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDIRECT, 1, rctx->b.predicate_drawing);
There is 5 instances like that. Regards. - Benjamin 2014-11-08 23:52 GMT+01:00 Glenn Kennard <glenn.kenn...@gmail.com>: > Requires evergreen/cayman, and updated radeon kernel module. > > Signed-off-by: Glenn Kennard <glenn.kenn...@gmail.com> > --- > See also kernel side patch sent to dri-de...@lists.freedesktop.org > > docs/GL3.txt | 4 +- > docs/relnotes/10.4.html | 1 + > src/gallium/drivers/r600/evergreend.h | 7 ++- > src/gallium/drivers/r600/r600_pipe.c | 6 ++- > src/gallium/drivers/r600/r600_state_common.c | 80 > ++++++++++++++++++++++------ > 5 files changed, 77 insertions(+), 21 deletions(-) > > diff --git a/docs/GL3.txt b/docs/GL3.txt > index 2854431..06c52f9 100644 > --- a/docs/GL3.txt > +++ b/docs/GL3.txt > @@ -95,7 +95,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, > radeonsi, llvmpipe, soft > GL 4.0, GLSL 4.00: > > GL_ARB_draw_buffers_blend DONE (i965, nv50, > nvc0, r600, radeonsi, llvmpipe, softpipe) > - GL_ARB_draw_indirect DONE (i965, nvc0, > radeonsi, llvmpipe, softpipe) > + GL_ARB_draw_indirect DONE (i965, nvc0, > r600, radeonsi, llvmpipe, softpipe) > GL_ARB_gpu_shader5 DONE (i965, nvc0) > - 'precise' qualifier DONE > - Dynamically uniform sampler array indices DONE (r600) > @@ -159,7 +159,7 @@ GL 4.3, GLSL 4.30: > GL_ARB_framebuffer_no_attachments not started > GL_ARB_internalformat_query2 not started > GL_ARB_invalidate_subdata DONE (all drivers) > - GL_ARB_multi_draw_indirect DONE (i965, nvc0, > radeonsi, llvmpipe, softpipe) > + GL_ARB_multi_draw_indirect DONE (i965, nvc0, > r600, radeonsi, llvmpipe, softpipe) > GL_ARB_program_interface_query not started > GL_ARB_robust_buffer_access_behavior not started > GL_ARB_shader_image_size not started > diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html > index d0fbd3b..9c2a491 100644 > --- a/docs/relnotes/10.4.html > +++ b/docs/relnotes/10.4.html > @@ -49,6 +49,7 @@ Note: some of the new features are only available with > certain drivers. > <li>GL_ARB_texture_view on nv50, nvc0</li> > <li>GL_ARB_clip_control on llvmpipe, softpipe, r300, r600, radeonsi</li> > <li>GL_KHR_context_flush_control on all drivers</li> > +<li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li> > </ul> > > > diff --git a/src/gallium/drivers/r600/evergreend.h > b/src/gallium/drivers/r600/evergreend.h > index 4989996..b8880c8 100644 > --- a/src/gallium/drivers/r600/evergreend.h > +++ b/src/gallium/drivers/r600/evergreend.h > @@ -64,6 +64,8 @@ > #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 > > #define PKT3_NOP 0x10 > +#define PKT3_SET_BASE 0x11 > +#define PKT3_INDEX_BUFFER_SIZE 0x13 > #define PKT3_DEALLOC_STATE 0x14 > #define PKT3_DISPATCH_DIRECT 0x15 > #define PKT3_DISPATCH_INDIRECT 0x16 > @@ -72,12 +74,15 @@ > #define PKT3_REG_RMW 0x21 > #define PKT3_COND_EXEC 0x22 > #define PKT3_PRED_EXEC 0x23 > -#define PKT3_START_3D_CMDBUF 0x24 > +#define PKT3_DRAW_INDIRECT 0x24 > +#define PKT3_DRAW_INDEX_INDIRECT 0x25 > +#define PKT3_INDEX_BASE 0x26 > #define PKT3_DRAW_INDEX_2 0x27 > #define PKT3_CONTEXT_CONTROL 0x28 > #define PKT3_DRAW_INDEX_IMMD_BE 0x29 > #define PKT3_INDEX_TYPE 0x2A > #define PKT3_DRAW_INDEX 0x2B > +#define PKT3_DRAW_INDIRECT_MULTI 0x2C > #define PKT3_DRAW_INDEX_AUTO 0x2D > #define PKT3_DRAW_INDEX_IMMD 0x2E > #define PKT3_NUM_INSTANCES 0x2F > diff --git a/src/gallium/drivers/r600/r600_pipe.c > b/src/gallium/drivers/r600/r600_pipe.c > index 0b571e4..829deaf 100644 > --- a/src/gallium/drivers/r600/r600_pipe.c > +++ b/src/gallium/drivers/r600/r600_pipe.c > @@ -313,6 +313,11 @@ static int r600_get_param(struct pipe_screen* > pscreen, enum pipe_cap param) > return family >= CHIP_CEDAR ? 1 : 0; > case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > return family >= CHIP_CEDAR ? 4 : 0; > + case PIPE_CAP_DRAW_INDIRECT: > + /* needs kernel command checking support to work */ > + if (family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= > 41) > + return 1; > + return 0; > > /* Unsupported features. */ > case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: > @@ -322,7 +327,6 @@ static int r600_get_param(struct pipe_screen* pscreen, > enum pipe_cap param) > case PIPE_CAP_VERTEX_COLOR_CLAMPED: > case PIPE_CAP_USER_VERTEX_BUFFERS: > case PIPE_CAP_TEXTURE_GATHER_OFFSETS: > - case PIPE_CAP_DRAW_INDIRECT: > case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: > case PIPE_CAP_SAMPLER_VIEW_TARGET: > return 0; > diff --git a/src/gallium/drivers/r600/r600_state_common.c > b/src/gallium/drivers/r600/r600_state_common.c > index c3f21cb..649bf24 100644 > --- a/src/gallium/drivers/r600/r600_state_common.c > +++ b/src/gallium/drivers/r600/r600_state_common.c > @@ -1362,7 +1362,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > unsigned i; > struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; > > - if (!info.count && (info.indexed || > !info.count_from_stream_output)) { > + if (!info.indirect && !info.count && (info.indexed || > !info.count_from_stream_output)) { > return; > } > > @@ -1391,7 +1391,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > ib.offset = rctx->index_buffer.offset + info.start * > ib.index_size; > > /* Translate 8-bit indices to 16-bit. */ > - if (ib.index_size == 1) { > + if (unlikely(ib.index_size == 1)) { > struct pipe_resource *out_buffer = NULL; > unsigned out_offset; > void *ptr; > @@ -1414,7 +1414,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > * and the indices are emitted via PKT3_DRAW_INDEX_IMMD. > * Note: Instanced rendering in combination with immediate > indices hangs. */ > if (ib.user_buffer && (R600_BIG_ENDIAN || > info.instance_count > 1 || > - info.count*ib.index_size > 20)) { > + info.count*ib.index_size > 20 || > + info.indirect)) { > u_upload_data(rctx->b.uploader, 0, info.count * > ib.index_size, > ib.user_buffer, &ib.offset, > &ib.buffer); > ib.user_buffer = NULL; > @@ -1521,6 +1522,21 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > /* Draw packets. */ > cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, > rctx->b.predicate_drawing); > cs->buf[cs->cdw++] = info.instance_count; > + > + if (unlikely(info.indirect)) { > + uint64_t va = r600_resource(info.indirect)->gpu_address; > + assert(rctx->b.chip_class >= EVERGREEN); > + cs->buf[cs->cdw++] = PKT3(0x11 /* PKT3_SET_BASE */, 2, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = 1; // 1 means DX11 > Draw_Index_Indirect Patch Table Base > + cs->buf[cs->cdw++] = va; > + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; > + > + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, > &rctx->b.rings.gfx, > + (struct > r600_resource*)info.indirect, > + > RADEON_USAGE_READ, RADEON_PRIO_MIN); > + } > + > if (info.indexed) { > cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, > rctx->b.predicate_drawing); > cs->buf[cs->cdw++] = ib.index_size == 4 ? > @@ -1537,18 +1553,40 @@ static void r600_draw_vbo(struct pipe_context > *ctx, const struct pipe_draw_info > cs->cdw += size_dw; > } else { > uint64_t va = > r600_resource(ib.buffer)->gpu_address + ib.offset; > - cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, > rctx->b.predicate_drawing); > - cs->buf[cs->cdw++] = va; > - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; > - cs->buf[cs->cdw++] = info.count; > - cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; > - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, > rctx->b.predicate_drawing); > - cs->buf[cs->cdw++] = > r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, > - (struct > r600_resource*)ib.buffer, > - > RADEON_USAGE_READ, RADEON_PRIO_MIN); > + > + if (likely(!info.indirect)) { > + cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, > 3, rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = va; > + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; > + cs->buf[cs->cdw++] = info.count; > + cs->buf[cs->cdw++] = > V_0287F0_DI_SRC_SEL_DMA; > + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = > r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, > + > (struct r600_resource*)ib.buffer, > + > RADEON_USAGE_READ, RADEON_PRIO_MIN); > + } > + else { > + uint32_t max_size = (ib.buffer->width0 - > ib.offset) / ib.index_size; > + > + cs->buf[cs->cdw++] = PKT3(0x26 /* > PKT3_INDEX_BASE */, 1, rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = va; > + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; > + > + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = > r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, > + > (struct r600_resource*)ib.buffer, > + > RADEON_USAGE_READ, RADEON_PRIO_MIN); > + > + cs->buf[cs->cdw++] = PKT3(0x13 /* > PKT3_INDEX_BUFFER_SIZE */, 0, rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = max_size; > + > + cs->buf[cs->cdw++] = PKT3(0x25 /* > PKT3_DRAW_INDEX_INDIRECT */, 1, rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = info.indirect_offset; > + cs->buf[cs->cdw++] = > V_0287F0_DI_SRC_SEL_DMA; > + } > } > } else { > - if (info.count_from_stream_output) { > + if (unlikely(info.count_from_stream_output)) { > struct r600_so_target *t = (struct > r600_so_target*)info.count_from_stream_output; > uint64_t va = t->buf_filled_size->gpu_address + > t->buf_filled_size_offset; > > @@ -1567,10 +1605,18 @@ static void r600_draw_vbo(struct pipe_context > *ctx, const struct pipe_draw_info > > RADEON_PRIO_MIN); > } > > - cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, > rctx->b.predicate_drawing); > - cs->buf[cs->cdw++] = info.count; > - cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX | > - (info.count_from_stream_output ? > S_0287F0_USE_OPAQUE(1) : 0); > + if (likely(!info.indirect)) { > + cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = info.count; > + cs->buf[cs->cdw++] = > V_0287F0_DI_SRC_SEL_AUTO_INDEX | > + > (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0); > + } > + else { > + cs->buf[cs->cdw++] = PKT3(0x24 /* > PKT3_DRAW_INDIRECT */, 1, rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = info.indirect_offset; > + cs->buf[cs->cdw++] = > V_0287F0_DI_SRC_SEL_AUTO_INDEX | > + > (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0); > + } > } > > if (rctx->screen->b.trace_bo) { > -- > 1.9.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev