From: Dave Airlie <airl...@redhat.com> This adds TBO support to r600g, and with GLSL 1.40 enabled, we now get 3.1 core profiles advertised for r600g.
This code is evergreen only so far, but I don't think there is much to make it work on r600/700/cayman other than testing. a) buffer txq is broken like cube map txq, this sucks, fix it the exact same way. b) buffer fetches are done with a vertex clause, c) vertex swizzling offsets are different than texture swizzles, but we still need to use the combiner, so make it configurable. d) add implementation of UCMP. TODO: r600/700/cayman testin Signed-off-by: Dave Airlie <airl...@redhat.com> --- src/gallium/drivers/r600/evergreen_state.c | 55 ++++++++++++++++++++ src/gallium/drivers/r600/r600_asm.c | 2 +- src/gallium/drivers/r600/r600_asm.h | 2 + src/gallium/drivers/r600/r600_pipe.c | 4 +- src/gallium/drivers/r600/r600_pipe.h | 10 +++- src/gallium/drivers/r600/r600_shader.c | 75 ++++++++++++++++++++++++++++ src/gallium/drivers/r600/r600_shader.h | 1 + src/gallium/drivers/r600/r600_state_common.c | 58 +++++++++++++++++---- src/gallium/drivers/r600/r600_texture.c | 16 ++++-- 9 files changed, 204 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 996c1b4..49564e7 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -969,6 +969,58 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, return ss; } +static struct pipe_sampler_view * +texture_buffer_sampler_view(struct r600_pipe_sampler_view *view, + unsigned width0, unsigned height0) + +{ + struct pipe_context *ctx = view->base.context; + struct r600_texture *tmp = (struct r600_texture*)view->base.texture; + uint64_t va; + int stride = util_format_get_blocksize(view->base.format); + unsigned format, num_format, format_comp, endian; + unsigned swizzle_res; + unsigned char swizzle[4]; + const struct util_format_description *desc; + + swizzle[0] = view->base.swizzle_r; + swizzle[1] = view->base.swizzle_g; + swizzle[2] = view->base.swizzle_b; + swizzle[3] = view->base.swizzle_a; + + r600_vertex_data_type(view->base.format, + &format, &num_format, &format_comp, + &endian); + + desc = util_format_description(view->base.format); + + swizzle_res = r600_get_swizzle_combined(desc->swizzle, swizzle, TRUE); + + va = r600_resource_va(ctx->screen, view->base.texture); + view->tex_resource = &tmp->resource; + + view->skip_mip_address_reloc = true; + view->tex_resource_words[0] = va; + view->tex_resource_words[1] = width0 - 1; + view->tex_resource_words[2] = S_030008_BASE_ADDRESS_HI(va >> 32UL) | + S_030008_STRIDE(stride) | + S_030008_DATA_FORMAT(format) | + S_030008_NUM_FORMAT_ALL(num_format) | + S_030008_FORMAT_COMP_ALL(format_comp) | + S_030008_SRF_MODE_ALL(1) | + S_030008_ENDIAN_SWAP(endian); + view->tex_resource_words[3] = swizzle_res; + /* + * in theory dword 4 is for number of elements, for use with resinfo, + * but it seems to utterly fail to work, the amd gpu shader analyser + * uses a const buffer to store the element sizes for buffer txq + */ + view->tex_resource_words[4] = 0; + view->tex_resource_words[5] = view->tex_resource_words[6] = 0; + view->tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER); + return &view->base; +} + struct pipe_sampler_view * evergreen_create_sampler_view_custom(struct pipe_context *ctx, struct pipe_resource *texture, @@ -997,6 +1049,9 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, view->base.reference.count = 1; view->base.context = ctx; + if (texture->target == PIPE_BUFFER) + return texture_buffer_sampler_view(view, width0, height0); + swizzle[0] = state->swizzle_r; swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 268137f..4bb22bd 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2607,7 +2607,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc) fprintf(stderr, "--------------------------------------\n"); } -static void r600_vertex_data_type(enum pipe_format pformat, +void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, unsigned *num_format, unsigned *format_comp, unsigned *endian) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 5727a7c..182f403 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -250,4 +250,6 @@ void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint3 void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1); void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1); +void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, + unsigned *num_format, unsigned *format_comp, unsigned *endian); #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 19147d9..0b94bd3 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -424,7 +424,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 256; case PIPE_CAP_GLSL_FEATURE_LEVEL: - return 130; + return family >= CHIP_CEDAR ? 140 : 130; case PIPE_CAP_TEXTURE_MULTISAMPLE: return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO; @@ -438,6 +438,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) /* Supported on Evergreen. */ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return family >= CHIP_CEDAR ? 1 : 0; /* Unsupported features. */ @@ -449,7 +450,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_USER_VERTEX_BUFFERS: - case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 50f181d..8b25277 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -38,12 +38,13 @@ #define R600_NUM_ATOMS 36 #define R600_MAX_USER_CONST_BUFFERS 13 -#define R600_MAX_DRIVER_CONST_BUFFERS 2 +#define R600_MAX_DRIVER_CONST_BUFFERS 3 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) /* start driver buffers after user buffers */ #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) +#define R600_BUFFER_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) #define R600_MAX_CONST_BUFFER_SIZE 4096 @@ -316,6 +317,7 @@ struct r600_samplerview_state { uint32_t compressed_depthtex_mask; /* which textures are depth */ uint32_t compressed_colortex_mask; boolean dirty_txq_constants; + boolean dirty_buffer_txq_constants; }; struct r600_sampler_states { @@ -333,6 +335,8 @@ struct r600_textures_info { /* cube array txq workaround */ uint32_t *txq_constants; + /* buffer txq workaround */ + uint32_t *buffer_txq_constants; }; struct r600_fence { @@ -663,6 +667,10 @@ struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, const struct pipe_surface *templ, unsigned width, unsigned height); +unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, + const unsigned char *swizzle_view, + boolean vtx); + /* r600_state_common.c */ void r600_init_common_state_functions(struct r600_context *rctx); void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index feb7001..60667e7 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -3819,6 +3819,71 @@ static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; } +static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) +{ + struct r600_bytecode_vtx vtx; + struct r600_bytecode_alu alu; + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int src_gpr, r, i; + + src_gpr = tgsi_tex_get_src_gpr(ctx, 0); + if (src_requires_loading) { + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 3) + alu.last = 1; + alu.dst.write = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + src_gpr = ctx->temp_reg; + } + + memset(&vtx, 0, sizeof(vtx)); + vtx.inst = 0; + vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + R600_MAX_CONST_BUFFERS;; + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = src_gpr; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; + vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ + vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ + vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ + vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ + vtx.use_const_fields = 1; + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ + + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) + return r; + return 0; +} + +static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int r; + int id = tgsi_tex_get_src_gpr(ctx, 1); + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + + alu.src[0].sel = 512 + (id / 4); + alu.src[0].kc_bank = R600_BUFFER_TXQ_CONST_BUFFER; + alu.src[0].chan = id % 4; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; +} + static int tgsi_tex(struct r600_shader_ctx *ctx) { static float one_point_five = 1.5f; @@ -3857,6 +3922,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_gpr = tgsi_tex_get_src_gpr(ctx, 0); + if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { + if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { + ctx->shader->has_txq_buffer = true; + return r600_do_buffer_txq(ctx); + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) + return do_vtx_fetch_inst(ctx, src_requires_loading); + return -1; /* can only TXF/TXQ from buffers */ + } + if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { /* get offset values */ if (inst->Texture.NumOffsets) { diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index b58a58a..88f71ad 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -61,6 +61,7 @@ struct r600_shader { boolean vs_out_misc_write; boolean vs_out_point_size; boolean has_txq_cube_array_z_comp; + boolean has_txq_buffer; }; struct r600_shader_key { diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index b20f655..9e51f7d 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -591,19 +591,20 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, struct r600_texture *rtex = (struct r600_texture*)rviews[i]->base.texture; - if (rtex->is_depth && !rtex->is_flushing_texture) { - dst->views.compressed_depthtex_mask |= 1 << i; - } else { - dst->views.compressed_depthtex_mask &= ~(1 << i); - } + if (rviews[i]->base.texture->target != PIPE_BUFFER) { + if (rtex->is_depth && !rtex->is_flushing_texture) { + dst->views.compressed_depthtex_mask |= 1 << i; + } else { + dst->views.compressed_depthtex_mask &= ~(1 << i); + } - /* Track compressed colorbuffers. */ - if (rtex->cmask_size && rtex->fmask_size) { - dst->views.compressed_colortex_mask |= 1 << i; - } else { - dst->views.compressed_colortex_mask &= ~(1 << i); + /* Track compressed colorbuffers. */ + if (rtex->cmask_size && rtex->fmask_size) { + dst->views.compressed_colortex_mask |= 1 << i; + } else { + dst->views.compressed_colortex_mask &= ~(1 << i); + } } - /* Changing from array to non-arrays textures and vice versa requires * updating TEX_ARRAY_OVERRIDE in sampler states on R6xx-R7xx. */ if (rctx->chip_class <= R700 && @@ -628,6 +629,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, dst->views.compressed_depthtex_mask &= dst->views.enabled_mask; dst->views.compressed_colortex_mask &= dst->views.enabled_mask; dst->views.dirty_txq_constants = TRUE; + dst->views.dirty_buffer_txq_constants = TRUE; r600_sampler_views_dirty(rctx, &dst->views); if (dirty_sampler_states_mask) { @@ -1026,6 +1028,35 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask rctx->sample_mask.atom.dirty = true; } +static void r600_setup_txq_buffer_constants(struct r600_context *rctx, int shader_type) +{ + struct r600_textures_info *samplers = &rctx->samplers[shader_type]; + int bits; + uint32_t array_size; + struct pipe_constant_buffer cb; + int i; + + if (!samplers->views.dirty_buffer_txq_constants) + return; + + samplers->views.dirty_buffer_txq_constants = FALSE; + + bits = util_last_bit(samplers->views.enabled_mask); + array_size = bits * sizeof(uint32_t) * 4; + samplers->buffer_txq_constants = realloc(samplers->buffer_txq_constants, array_size); + memset(samplers->buffer_txq_constants, 0, array_size); + for (i = 0; i < bits; i++) + if (samplers->views.enabled_mask & (1 << i)) + samplers->buffer_txq_constants[i] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + + cb.buffer = NULL; + cb.user_buffer = samplers->buffer_txq_constants; + cb.buffer_offset = 0; + cb.buffer_size = array_size; + rctx->context.set_constant_buffer(&rctx->context, shader_type, R600_BUFFER_TXQ_CONST_BUFFER, &cb); + pipe_resource_reference(&cb.buffer, NULL); +} + static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int shader_type) { struct r600_textures_info *samplers = &rctx->samplers[shader_type]; @@ -1098,6 +1129,11 @@ static bool r600_update_derived_state(struct r600_context *rctx) if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_cube_array_z_comp) r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_VERTEX); + if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_buffer) + r600_setup_txq_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); + if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_buffer) + r600_setup_txq_buffer_constants(rctx, PIPE_SHADER_VERTEX); + if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) { if (!r600_adjust_gprs(rctx)) { /* discard rendering */ diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 56e9b64..5736190 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -862,18 +862,26 @@ void r600_init_surface_functions(struct r600_context *r600) r600->context.surface_destroy = r600_surface_destroy; } -static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, - const unsigned char *swizzle_view) +unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, + const unsigned char *swizzle_view, + boolean vtx) { unsigned i; unsigned char swizzle[4]; unsigned result = 0; - const uint32_t swizzle_shift[4] = { + const uint32_t tex_swizzle_shift[4] = { 16, 19, 22, 25, }; + const uint32_t vtx_swizzle_shift[4] = { + 3, 6, 9, 12, + }; const uint32_t swizzle_bit[4] = { 0, 1, 2, 3, }; + const uint32_t *swizzle_shift = tex_swizzle_shift; + + if (vtx) + swizzle_shift = vtx_swizzle_shift; if (swizzle_view) { util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); @@ -927,7 +935,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, }; desc = util_format_description(format); - word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view); + word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE); /* Colorspace (return non-RGB formats directly). */ switch (desc->colorspace) { -- 1.8.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev