Please bump the size of vgt_state for the SQ_VTX_BASE_VTX_LOC register. It's set by r600_init_atom in r600_state.c and evergreen_state.c
Please bump R600_MAX_DRAW_CS_DWORDS. It's an upper bound of how many dwords draw_vbo can emit. I don't understand what get_vfetch_type is good for. Could you please explain it in the code? Also, I don't understand what constant buffer fetches have to do with VertexID. Marek On Thu, Feb 5, 2015 at 11:28 PM, Glenn Kennard <glenn.kenn...@gmail.com> wrote: > Requires Evergreen/Cayman and radeon kernel module > 2.41.0 or newer. > > Signed-off-by: Glenn Kennard <glenn.kenn...@gmail.com> > --- > Changes since v2: > * Fix failing arb_draw_indirect-vertexid piglit test cases. > * Ensure start_instance, base_vertex, index_offset are reset when > switching back to direct draws. > * Juggled some header defines to avoid use of magic numbers. > > docs/GL3.txt | 4 +- > docs/relnotes/10.5.0.html | 1 + > src/gallium/drivers/r600/evergreend.h | 1 - > src/gallium/drivers/r600/r600_pipe.c | 4 +- > src/gallium/drivers/r600/r600_pipe.h | 1 + > src/gallium/drivers/r600/r600_shader.c | 14 ++- > src/gallium/drivers/r600/r600_state_common.c | 128 > ++++++++++++++++++++++----- > src/gallium/drivers/r600/r600d.h | 8 +- > 8 files changed, 130 insertions(+), 31 deletions(-) > > diff --git a/docs/GL3.txt b/docs/GL3.txt > index 23f5561..ef4f0ae 100644 > --- a/docs/GL3.txt > +++ b/docs/GL3.txt > @@ -95,7 +95,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, > radeonsi, llvmpipe, soft > GL 4.0, GLSL 4.00: > > GL_ARB_draw_buffers_blend DONE (i965, nv50, > nvc0, r600, radeonsi, llvmpipe, softpipe) > - GL_ARB_draw_indirect DONE (i965, nvc0, > radeonsi, llvmpipe, softpipe) > + GL_ARB_draw_indirect DONE (i965, nvc0, > r600, radeonsi, llvmpipe, softpipe) > GL_ARB_gpu_shader5 DONE (i965, nvc0) > - 'precise' qualifier DONE > - Dynamically uniform sampler array indices DONE (r600) > @@ -159,7 +159,7 @@ GL 4.3, GLSL 4.30: > GL_ARB_framebuffer_no_attachments not started > GL_ARB_internalformat_query2 not started > GL_ARB_invalidate_subdata DONE (all drivers) > - GL_ARB_multi_draw_indirect DONE (i965, nvc0, > radeonsi, llvmpipe, softpipe) > + GL_ARB_multi_draw_indirect DONE (i965, nvc0, > r600, radeonsi, llvmpipe, softpipe) > GL_ARB_program_interface_query not started > GL_ARB_robust_buffer_access_behavior not started > GL_ARB_shader_image_size not started > diff --git a/docs/relnotes/10.5.0.html b/docs/relnotes/10.5.0.html > index 4f921ea..47686c0 100644 > --- a/docs/relnotes/10.5.0.html > +++ b/docs/relnotes/10.5.0.html > @@ -49,6 +49,7 @@ Note: some of the new features are only available with > certain drivers. > <li>GL_EXT_packed_float on freedreno</li> > <li>GL_EXT_texture_shared_exponent on freedreno</li> > <li>GL_EXT_texture_snorm on freedreno</li> > +<li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li> > </ul> > > > diff --git a/src/gallium/drivers/r600/evergreend.h > b/src/gallium/drivers/r600/evergreend.h > index 4989996..cd4ff46 100644 > --- a/src/gallium/drivers/r600/evergreend.h > +++ b/src/gallium/drivers/r600/evergreend.h > @@ -72,7 +72,6 @@ > #define PKT3_REG_RMW 0x21 > #define PKT3_COND_EXEC 0x22 > #define PKT3_PRED_EXEC 0x23 > -#define PKT3_START_3D_CMDBUF 0x24 > #define PKT3_DRAW_INDEX_2 0x27 > #define PKT3_CONTEXT_CONTROL 0x28 > #define PKT3_DRAW_INDEX_IMMD_BE 0x29 > diff --git a/src/gallium/drivers/r600/r600_pipe.c > b/src/gallium/drivers/r600/r600_pipe.c > index b6f7859..3127e23 100644 > --- a/src/gallium/drivers/r600/r600_pipe.c > +++ b/src/gallium/drivers/r600/r600_pipe.c > @@ -313,6 +313,9 @@ static int r600_get_param(struct pipe_screen* pscreen, > enum pipe_cap param) > return family >= CHIP_CEDAR ? 1 : 0; > case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > return family >= CHIP_CEDAR ? 4 : 0; > + case PIPE_CAP_DRAW_INDIRECT: > + /* kernel command checker support is also required */ > + return family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= > 41; > > /* Unsupported features. */ > case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: > @@ -322,7 +325,6 @@ static int r600_get_param(struct pipe_screen* pscreen, > enum pipe_cap param) > case PIPE_CAP_VERTEX_COLOR_CLAMPED: > case PIPE_CAP_USER_VERTEX_BUFFERS: > case PIPE_CAP_TEXTURE_GATHER_OFFSETS: > - case PIPE_CAP_DRAW_INDIRECT: > case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: > case PIPE_CAP_SAMPLER_VIEW_TARGET: > case PIPE_CAP_VERTEXID_NOBASE: > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index e110efe..1db43c4 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -145,6 +145,7 @@ struct r600_vgt_state { > uint32_t vgt_multi_prim_ib_reset_en; > uint32_t vgt_multi_prim_ib_reset_indx; > uint32_t vgt_indx_offset; > + bool last_draw_was_indirect; > }; > > struct r600_blend_color { > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 16e820e..19c84bb 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -291,6 +291,7 @@ struct r600_shader_ctx { > uint32_t nliterals; > uint32_t max_driver_temp_used; > boolean use_llvm; > + boolean has_vertexid; > /* needed for evergreen interpolation */ > struct eg_interp eg_interpolators[6]; // indexed by > Persp/Linear * 3 + sample/center/centroid > /* evergreen/cayman also store sample mask in face register */ > @@ -749,8 +750,10 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) > return r; > } > break; > - } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) > + } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) { > + ctx->has_vertexid = true; > break; > + } > else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) > break; > default: > @@ -1060,6 +1063,11 @@ static void tgsi_src(struct r600_shader_ctx *ctx, > } > } > > +static int get_vfetch_type(struct r600_shader_ctx *ctx) { > + // TODO: Only set VERTEX if src depends on VERTEXID > + return ctx->has_vertexid ? 0 : 2; /* > SQ_VTX_FETCH_VERTEX_DATA / VTX_FETCH_NO_INDEX_OFFSET */; > +} > + > static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, > unsigned int cb_idx, unsigned cb_rel, > unsigned int offset, unsigned ar_chan, > unsigned int dst_reg) > @@ -1095,7 +1103,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx > *ctx, > > memset(&vtx, 0, sizeof(vtx)); > vtx.buffer_id = cb_idx; > - vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ > + vtx.fetch_type = get_vfetch_type(ctx); > vtx.src_gpr = ar_reg; > vtx.src_sel_x = ar_chan; > vtx.mega_fetch_count = 16; > @@ -4990,7 +4998,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx > *ctx, boolean src_requires_l > memset(&vtx, 0, sizeof(vtx)); > vtx.op = FETCH_OP_VFETCH; > vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; > - vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ > + vtx.fetch_type = get_vfetch_type(ctx); > vtx.src_gpr = src_gpr; > vtx.mega_fetch_count = 16; > vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + > inst->Dst[0].Register.Index; > diff --git a/src/gallium/drivers/r600/r600_state_common.c > b/src/gallium/drivers/r600/r600_state_common.c > index b498d00..a08124b 100644 > --- a/src/gallium/drivers/r600/r600_state_common.c > +++ b/src/gallium/drivers/r600/r600_state_common.c > @@ -196,6 +196,10 @@ void r600_emit_vgt_state(struct r600_context *rctx, > struct r600_atom *atom) > r600_write_context_reg_seq(cs, R_028408_VGT_INDX_OFFSET, 2); > radeon_emit(cs, a->vgt_indx_offset); /* R_028408_VGT_INDX_OFFSET */ > radeon_emit(cs, a->vgt_multi_prim_ib_reset_indx); /* > R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX */ > + if (a->last_draw_was_indirect) { > + a->last_draw_was_indirect = false; > + r600_write_ctl_const(cs, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); > + } > } > > static void r600_set_clip_state(struct pipe_context *ctx, > @@ -1353,7 +1357,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > unsigned i; > struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; > > - if (!info.count && (info.indexed || !info.count_from_stream_output)) { > + if (!info.indirect && !info.count && (info.indexed || > !info.count_from_stream_output)) { > return; > } > > @@ -1379,19 +1383,44 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > pipe_resource_reference(&ib.buffer, > rctx->index_buffer.buffer); > ib.user_buffer = rctx->index_buffer.user_buffer; > ib.index_size = rctx->index_buffer.index_size; > - ib.offset = rctx->index_buffer.offset + info.start * > ib.index_size; > + ib.offset = rctx->index_buffer.offset; > + if (!info.indirect) { > + ib.offset += info.start * ib.index_size; > + } > > /* Translate 8-bit indices to 16-bit. */ > - if (ib.index_size == 1) { > + if (unlikely(ib.index_size == 1)) { > struct pipe_resource *out_buffer = NULL; > unsigned out_offset; > void *ptr; > + unsigned start, count; > + > + if (likely(!info.indirect)) { > + start = 0; > + count = info.count; > + } > + else { > + /* Have to get start/count from indirect > buffer, slow path ahead... */ > + struct r600_resource *indirect_resource = > (struct r600_resource *)info.indirect; > + unsigned *data = > r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, > + PIPE_TRANSFER_READ); > + if (data) { > + data += info.indirect_offset / > sizeof(unsigned); > + start = data[2] * ib.index_size; > + count = data[0]; > + > rctx->b.ws->buffer_unmap(indirect_resource->cs_buf); > + } > + else { > + start = 0; > + count = 0; > + } > + } > > - u_upload_alloc(rctx->b.uploader, 0, info.count * 2, > + u_upload_alloc(rctx->b.uploader, start, count * 2, > &out_offset, &out_buffer, &ptr); > > util_shorten_ubyte_elts_to_userptr( > - &rctx->b.b, &ib, 0, > ib.offset, info.count, ptr); > + &rctx->b.b, &ib, 0, ib.offset > + start, count, ptr); > > pipe_resource_reference(&ib.buffer, NULL); > ib.user_buffer = NULL; > @@ -1403,9 +1432,11 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > /* Upload the index buffer. > * The upload is skipped for small index counts on > little-endian machines > * and the indices are emitted via PKT3_DRAW_INDEX_IMMD. > + * Indirect draws never use immediate indices. > * Note: Instanced rendering in combination with immediate > indices hangs. */ > - if (ib.user_buffer && (R600_BIG_ENDIAN || info.instance_count > > 1 || > - info.count*ib.index_size > 20)) { > + if (ib.user_buffer && (R600_BIG_ENDIAN || info.indirect || > + info.instance_count > 1 || > + info.count*ib.index_size > > 20)) { > u_upload_data(rctx->b.uploader, 0, info.count * > ib.index_size, > ib.user_buffer, &ib.offset, &ib.buffer); > ib.user_buffer = NULL; > @@ -1417,7 +1448,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > /* Set the index offset and primitive restart. */ > if (rctx->vgt_state.vgt_multi_prim_ib_reset_en != > info.primitive_restart || > rctx->vgt_state.vgt_multi_prim_ib_reset_indx != > info.restart_index || > - rctx->vgt_state.vgt_indx_offset != info.index_bias) { > + rctx->vgt_state.vgt_indx_offset != info.index_bias || > + (rctx->vgt_state.last_draw_was_indirect && !info.indirect)) { > rctx->vgt_state.vgt_multi_prim_ib_reset_en = > info.primitive_restart; > rctx->vgt_state.vgt_multi_prim_ib_reset_indx = > info.restart_index; > rctx->vgt_state.vgt_indx_offset = info.index_bias; > @@ -1485,7 +1517,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > } > > /* Update start instance. */ > - if (rctx->last_start_instance != info.start_instance) { > + if (!info.indirect && rctx->last_start_instance != > info.start_instance) { > r600_write_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, > info.start_instance); > rctx->last_start_instance = info.start_instance; > } > @@ -1510,8 +1542,30 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > } > > /* Draw packets. */ > - cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, > rctx->b.predicate_drawing); > - cs->buf[cs->cdw++] = info.instance_count; > + if (!info.indirect) { > + cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = info.instance_count; > + } > + > + if (unlikely(info.indirect)) { > + uint64_t va = r600_resource(info.indirect)->gpu_address; > + assert(rctx->b.chip_class >= EVERGREEN); > + > + // Invalidate so non-indirect draw calls reset this state > + rctx->vgt_state.last_draw_was_indirect = true; > + rctx->last_start_instance = -1; > + > + cs->buf[cs->cdw++] = PKT3(EG_PKT3_SET_BASE, 2, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE; > + cs->buf[cs->cdw++] = va; > + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; > + > + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, > &rctx->b.rings.gfx, > + (struct > r600_resource*)info.indirect, > + RADEON_USAGE_READ, > RADEON_PRIO_MIN); > + } > + > if (info.indexed) { > cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, > rctx->b.predicate_drawing); > cs->buf[cs->cdw++] = ib.index_size == 4 ? > @@ -1528,18 +1582,40 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > cs->cdw += size_dw; > } else { > uint64_t va = r600_resource(ib.buffer)->gpu_address + > ib.offset; > - cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, > rctx->b.predicate_drawing); > - cs->buf[cs->cdw++] = va; > - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; > - cs->buf[cs->cdw++] = info.count; > - cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; > - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, > rctx->b.predicate_drawing); > - cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, > &rctx->b.rings.gfx, > - (struct > r600_resource*)ib.buffer, > - > RADEON_USAGE_READ, RADEON_PRIO_MIN); > + > + if (likely(!info.indirect)) { > + cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = va; > + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; > + cs->buf[cs->cdw++] = info.count; > + cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; > + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = > r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, > + > (struct r600_resource*)ib.buffer, > + > RADEON_USAGE_READ, RADEON_PRIO_MIN); > + } > + else { > + uint32_t max_size = (ib.buffer->width0 - > ib.offset) / ib.index_size; > + > + cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BASE, > 1, rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = va; > + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; > + > + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = > r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, > + > (struct r600_resource*)ib.buffer, > + > RADEON_USAGE_READ, RADEON_PRIO_MIN); > + > + cs->buf[cs->cdw++] = > PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = max_size; > + > + cs->buf[cs->cdw++] = > PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = info.indirect_offset; > + cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; > + } > } > } else { > - if (info.count_from_stream_output) { > + if (unlikely(info.count_from_stream_output)) { > struct r600_so_target *t = (struct > r600_so_target*)info.count_from_stream_output; > uint64_t va = t->buf_filled_size->gpu_address + > t->buf_filled_size_offset; > > @@ -1558,8 +1634,14 @@ static void r600_draw_vbo(struct pipe_context *ctx, > const struct pipe_draw_info > > RADEON_PRIO_MIN); > } > > - cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, > rctx->b.predicate_drawing); > - cs->buf[cs->cdw++] = info.count; > + if (likely(!info.indirect)) { > + cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = info.count; > + } > + else { > + cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDIRECT, 1, > rctx->b.predicate_drawing); > + cs->buf[cs->cdw++] = info.indirect_offset; > + } > cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX | > (info.count_from_stream_output ? > S_0287F0_USE_OPAQUE(1) : 0); > } > diff --git a/src/gallium/drivers/r600/r600d.h > b/src/gallium/drivers/r600/r600d.h > index 6a5b964..bce8b4e 100644 > --- a/src/gallium/drivers/r600/r600d.h > +++ b/src/gallium/drivers/r600/r600d.h > @@ -52,12 +52,18 @@ > > > #define PKT3_NOP 0x10 > +#define EG_PKT3_SET_BASE 0x11 /* >= evergreen */ > +#define EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE 1 /* DX11 > Draw_Index_Indirect Patch Table Base */ > +#define EG_PKT3_INDEX_BUFFER_SIZE 0x13 > #define PKT3_INDIRECT_BUFFER_END 0x17 > #define PKT3_SET_PREDICATION 0x20 > #define PKT3_REG_RMW 0x21 > #define PKT3_COND_EXEC 0x22 > #define PKT3_PRED_EXEC 0x23 > -#define PKT3_START_3D_CMDBUF 0x24 > +#define PKT3_START_3D_CMDBUF 0x24 /* removed on evergreen > */ > +#define EG_PKT3_DRAW_INDIRECT 0x24 /* >= evergreen */ > +#define EG_PKT3_DRAW_INDEX_INDIRECT 0x25 > +#define EG_PKT3_INDEX_BASE 0x26 > #define PKT3_DRAW_INDEX_2 0x27 > #define PKT3_CONTEXT_CONTROL 0x28 > #define PKT3_DRAW_INDEX_IMMD_BE 0x29 > -- > 1.9.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev