FMASK is bound as a separate texture. For every texture, there can be an FMASK. Therefore a separate array of resource slots has to be added.
This adds a new mechanism for emitting resource descriptors, its features are: - resource descriptors are stored in an ordinary buffer (not in a CS) - descriptors of disabled resources are set to zeros - fine-grained resource updates (it can update one resource slot while not touching the other slots) - updates are done with the WRITE_DATA packet - it implements the si_atom interface for packet emission - only used for FMASK textures right now The primary motivation for this is that FMASK textures naturally need fine-grained resource updates and I also need to query in the shader if a resource is NULL. --- src/gallium/drivers/radeonsi/Makefile.sources | 1 + src/gallium/drivers/radeonsi/r600_hw_context.c | 3 + src/gallium/drivers/radeonsi/r600_resource.h | 1 + src/gallium/drivers/radeonsi/r600_texture.c | 1 + src/gallium/drivers/radeonsi/radeonsi_pipe.c | 9 +- src/gallium/drivers/radeonsi/radeonsi_pipe.h | 6 +- src/gallium/drivers/radeonsi/radeonsi_pm4.c | 7 + src/gallium/drivers/radeonsi/radeonsi_pm4.h | 2 + src/gallium/drivers/radeonsi/si_descriptors.c | 188 +++++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_state.c | 58 +++++++- src/gallium/drivers/radeonsi/si_state.h | 36 +++++ 11 files changed, 305 insertions(+), 7 deletions(-) create mode 100644 src/gallium/drivers/radeonsi/si_descriptors.c diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index b3ffa72..68c8282 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -10,6 +10,7 @@ C_SOURCES := \ r600_translate.c \ radeonsi_pm4.c \ radeonsi_compute.c \ + si_descriptors.c \ si_state.c \ si_state_streamout.c \ si_state_draw.c \ diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c index 7ed7496..b595477 100644 --- a/src/gallium/drivers/radeonsi/r600_hw_context.c +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c @@ -289,6 +289,9 @@ void si_context_flush(struct r600_context *ctx, unsigned flags) * next draw command */ si_pm4_reset_emitted(ctx); + + si_sampler_views_begin_new_cs(ctx, &ctx->fmask_sampler_views[PIPE_SHADER_VERTEX]); + si_sampler_views_begin_new_cs(ctx, &ctx->fmask_sampler_views[PIPE_SHADER_FRAGMENT]); } void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value) diff --git a/src/gallium/drivers/radeonsi/r600_resource.h b/src/gallium/drivers/radeonsi/r600_resource.h index e5dd36a..ab5c7b7 100644 --- a/src/gallium/drivers/radeonsi/r600_resource.h +++ b/src/gallium/drivers/radeonsi/r600_resource.h @@ -44,6 +44,7 @@ struct r600_fmask_info { unsigned offset; unsigned size; unsigned alignment; + unsigned pitch; unsigned bank_height; unsigned slice_tile_max; unsigned tile_mode_index; diff --git a/src/gallium/drivers/radeonsi/r600_texture.c b/src/gallium/drivers/radeonsi/r600_texture.c index cd3d1aa..b613564 100644 --- a/src/gallium/drivers/radeonsi/r600_texture.c +++ b/src/gallium/drivers/radeonsi/r600_texture.c @@ -463,6 +463,7 @@ static void r600_texture_get_fmask_info(struct r600_screen *rscreen, out->slice_tile_max -= 1; out->tile_mode_index = fmask.tiling_index[0]; + out->pitch = fmask.level[0].nblk_x; out->bank_height = fmask.bankh; out->alignment = MAX2(256, fmask.bo_alignment); out->size = fmask.bo_size; diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c index ad955e3..3112124 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c @@ -178,6 +178,9 @@ static void r600_destroy_context(struct pipe_context *context) { struct r600_context *rctx = (struct r600_context *)context; + si_release_sampler_views(&rctx->fmask_sampler_views[PIPE_SHADER_VERTEX]); + si_release_sampler_views(&rctx->fmask_sampler_views[PIPE_SHADER_FRAGMENT]); + si_resource_reference(&rctx->border_color_table, NULL); if (rctx->dummy_pixel_shader) { @@ -233,12 +236,16 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->context.create_video_buffer = vl_video_buffer_create; } + rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL); + + si_init_sampler_views(rctx, &rctx->fmask_sampler_views[PIPE_SHADER_VERTEX]); + si_init_sampler_views(rctx, &rctx->fmask_sampler_views[PIPE_SHADER_FRAGMENT]); + switch (rctx->chip_class) { case SI: case CIK: si_init_state_functions(rctx); LIST_INITHEAD(&rctx->active_query_list); - rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL); rctx->max_db = 8; si_init_config(rctx); break; diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index 5fa9bdc..fd4ca53 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -83,6 +83,7 @@ struct si_pipe_sampler_view { struct pipe_sampler_view base; struct si_resource *resource; uint32_t state[8]; + uint32_t fmask_state[8]; }; struct si_pipe_sampler_state { @@ -94,9 +95,6 @@ struct si_cs_shader_state { struct si_pipe_compute *program; }; -/* needed for blitter save */ -#define NUM_TEX_UNITS 16 - struct r600_textures_info { struct si_pipe_sampler_view *views[NUM_TEX_UNITS]; struct si_pipe_sampler_state *samplers[NUM_TEX_UNITS]; @@ -149,6 +147,8 @@ struct r600_context { struct si_atom *atoms[SI_MAX_ATOMS]; unsigned num_atoms; + struct si_sampler_views fmask_sampler_views[PIPE_SHADER_TYPES]; + struct si_vertex_element *vertex_elements; struct pipe_framebuffer_state framebuffer; unsigned fb_log_samples; diff --git a/src/gallium/drivers/radeonsi/radeonsi_pm4.c b/src/gallium/drivers/radeonsi/radeonsi_pm4.c index bbc62d3..d404d41 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pm4.c +++ b/src/gallium/drivers/radeonsi/radeonsi_pm4.c @@ -91,6 +91,13 @@ void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val) si_pm4_cmd_end(state, false); } +void si_pm4_set_reg_pointer(struct si_pm4_state *state, unsigned reg, + uint64_t va) +{ + si_pm4_set_reg(state, reg, va); + si_pm4_set_reg(state, reg + 4, va >> 32); +} + void si_pm4_add_bo(struct si_pm4_state *state, struct si_resource *bo, enum radeon_bo_usage usage) diff --git a/src/gallium/drivers/radeonsi/radeonsi_pm4.h b/src/gallium/drivers/radeonsi/radeonsi_pm4.h index 68aa36a..a5e91f9 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pm4.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pm4.h @@ -70,6 +70,8 @@ void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw); void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate); void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val); +void si_pm4_set_reg_pointer(struct si_pm4_state *state, unsigned reg, + uint64_t va); void si_pm4_add_bo(struct si_pm4_state *state, struct si_resource *bo, enum radeon_bo_usage usage); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c new file mode 100644 index 0000000..84453f1 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -0,0 +1,188 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Marek Olšák <marek.ol...@amd.com> + */ + +#include "radeonsi_pipe.h" +#include "radeonsi_resource.h" +#include "r600_hw_context_priv.h" + +#include "util/u_memory.h" + + +static void si_init_descriptors(struct r600_context *rctx, + struct si_descriptors *desc, + unsigned element_dw_size, + unsigned num_elements, + void (*emit_func)(struct r600_context *ctx, struct si_atom *state)) +{ + void *map; + + desc->atom.emit = emit_func; + desc->element_dw_size = element_dw_size; + desc->num_elements = num_elements; + desc->buffer = (struct si_resource*) + pipe_buffer_create(rctx->context.screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STATIC, + num_elements * element_dw_size * 4); + + map = rctx->ws->buffer_map(desc->buffer->cs_buf, NULL, PIPE_TRANSFER_WRITE); + memset(map, 0, desc->buffer->b.b.width0); + + r600_context_bo_reloc(rctx, desc->buffer, RADEON_USAGE_READWRITE); + si_add_atom(rctx, &desc->atom); +} + +static void si_release_descriptors(struct si_descriptors *desc) +{ + pipe_resource_reference((struct pipe_resource**)&desc->buffer, NULL); +} + +static void si_update_descriptors(struct si_descriptors *desc) +{ + if (desc->dirty_mask) { + desc->atom.num_dw = (4 + desc->element_dw_size) * + util_bitcount(desc->dirty_mask); + desc->atom.dirty = true; + } +} + +static void si_emit_descriptors(struct r600_context *rctx, + struct si_descriptors *desc, + const uint32_t **descriptors) +{ + struct radeon_winsys_cs *cs = rctx->cs; + uint64_t va_base; + int packet_start; + int packet_size = 0; + int last_index = desc->num_elements; + unsigned dirty_mask = desc->dirty_mask; + + va_base = r600_resource_va(rctx->context.screen, &desc->buffer->b.b); + + while (dirty_mask) { + int i = u_bit_scan(&dirty_mask); + + assert(i < desc->num_elements); + + if (last_index+1 == i && packet_size) { + /* Append new data at the end of the last packet. */ + packet_size += desc->element_dw_size; + cs->buf[packet_start] = PKT3(PKT3_WRITE_DATA, packet_size, 0); + } else { + /* Start a new packet. */ + uint64_t va = va_base + i * desc->element_dw_size * 4; + + packet_start = cs->cdw; + packet_size = 2 + desc->element_dw_size; + + cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, packet_size, 0); + cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | + PKT3_WRITE_DATA_WR_CONFIRM | + PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME); + cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL; + } + + memcpy(cs->buf+cs->cdw, descriptors[i], desc->element_dw_size * 4); + cs->cdw += desc->element_dw_size; + + last_index = i; + } + desc->dirty_mask = 0; +} + +/* SAMPLER VIEWS */ + +static void si_emit_sampler_views(struct r600_context *rctx, struct si_atom *atom) +{ + struct si_sampler_views *views = (struct si_sampler_views*)atom; + + si_emit_descriptors(rctx, &views->desc, views->desc_data); +} + +void si_init_sampler_views(struct r600_context *rctx, struct si_sampler_views *views) +{ + si_init_descriptors(rctx, &views->desc, 8, 16, + si_emit_sampler_views); +} + +void si_release_sampler_views(struct si_sampler_views *views) +{ + int i; + + for (i = 0; i < Elements(views->views); i++) { + pipe_sampler_view_reference(&views->views[i], NULL); + } + si_release_descriptors(&views->desc); +} + +void si_update_sampler_views(struct si_sampler_views *views) +{ + si_update_descriptors(&views->desc); +} + +void si_sampler_views_begin_new_cs(struct r600_context *rctx, struct si_sampler_views *views) +{ + unsigned mask = views->desc.enabled_mask; + + /* Add relocations to the CS. */ + while (mask) { + int i = u_bit_scan(&mask); + struct si_pipe_sampler_view *rview = + (struct si_pipe_sampler_view*)views->views[i]; + + r600_context_bo_reloc(rctx, rview->resource, RADEON_USAGE_READ); + } + + r600_context_bo_reloc(rctx, views->desc.buffer, RADEON_USAGE_READWRITE); +} + +void si_set_fmask_sampler_view(struct r600_context *rctx, unsigned shader, + unsigned slot, struct pipe_sampler_view *view) +{ + static const uint32_t null_desc[8]; + struct si_sampler_views *views = &rctx->fmask_sampler_views[shader]; + + if (views->views[slot] == view) + return; + + if (view) { + struct si_pipe_sampler_view *rview = + (struct si_pipe_sampler_view*)view; + + r600_context_bo_reloc(rctx, rview->resource, RADEON_USAGE_READ); + + pipe_sampler_view_reference(&views->views[slot], view); + views->desc_data[slot] = rview->fmask_state; + views->desc.enabled_mask |= 1 << slot; + } else { + pipe_sampler_view_reference(&views->views[slot], NULL); + views->desc_data[slot] = null_desc; + views->desc.enabled_mask &= ~(1 << slot); + } + + views->desc.dirty_mask |= 1 << slot; + si_update_sampler_views(views); +} diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 6965745..1cc0813 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2701,6 +2701,44 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx view->state[6] = 0; view->state[7] = 0; + /* Initialize the sampler view for FMASK. */ + if (tmp->fmask.size) { + uint64_t va = r600_resource_va(ctx->screen, texture) + tmp->fmask.offset; + uint32_t fmask_format; + + switch (texture->nr_samples) { + case 2: + fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; + break; + case 4: + fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; + break; + case 8: + fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; + break; + default: + assert(0); + } + + view->fmask_state[0] = va >> 8; + view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | + S_008F14_DATA_FORMAT(fmask_format) | + S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); + view->fmask_state[2] = S_008F18_WIDTH(width - 1) | + S_008F18_HEIGHT(height - 1); + view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | + S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | + S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | + S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | + S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) | + S_008F1C_TYPE(si_tex_dim(texture->target, 0)); + view->fmask_state[4] = S_008F20_PITCH(tmp->fmask.pitch - 1); + view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) | + S_008F24_LAST_ARRAY(state->u.tex.last_layer); + view->fmask_state[6] = 0; + view->fmask_state[7] = 0; + } + return &view->base; } @@ -2775,7 +2813,7 @@ static void *si_create_sampler_state(struct pipe_context *ctx, } static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx, - unsigned count, + unsigned shader, unsigned count, struct pipe_sampler_view **views, struct r600_textures_info *samplers, unsigned user_data_reg) @@ -2812,6 +2850,9 @@ static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx, } else { samplers->compressed_colortex_mask &= ~(1 << i); } + + si_set_fmask_sampler_view(rctx, shader, i, + rtex->fmask.size ? views[i] : NULL); } else { samplers->depth_texture_mask &= ~(1 << i); samplers->compressed_colortex_mask &= ~(1 << i); @@ -2827,6 +2868,7 @@ static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx, pipe_sampler_view_reference((struct pipe_sampler_view **)&samplers->views[i], NULL); samplers->depth_texture_mask &= ~(1 << i); samplers->compressed_colortex_mask &= ~(1 << i); + si_set_fmask_sampler_view(rctx, shader, i, NULL); } } @@ -2843,7 +2885,7 @@ static void si_set_vs_sampler_views(struct pipe_context *ctx, unsigned count, struct r600_context *rctx = (struct r600_context *)ctx; struct si_pm4_state *pm4; - pm4 = si_set_sampler_views(rctx, count, views, &rctx->vs_samplers, + pm4 = si_set_sampler_views(rctx, PIPE_SHADER_VERTEX, count, views, &rctx->vs_samplers, R_00B130_SPI_SHADER_USER_DATA_VS_0); si_pm4_set_state(rctx, vs_sampler_views, pm4); } @@ -2854,7 +2896,7 @@ static void si_set_ps_sampler_views(struct pipe_context *ctx, unsigned count, struct r600_context *rctx = (struct r600_context *)ctx; struct si_pm4_state *pm4; - pm4 = si_set_sampler_views(rctx, count, views, &rctx->ps_samplers, + pm4 = si_set_sampler_views(rctx, PIPE_SHADER_FRAGMENT, count, views, &rctx->ps_samplers, R_00B030_SPI_SHADER_USER_DATA_PS_0); si_pm4_set_state(rctx, ps_sampler_views, pm4); } @@ -3292,5 +3334,15 @@ void si_init_config(struct r600_context *rctx) } } + si_pm4_set_reg_pointer(pm4, + R_00B130_SPI_SHADER_USER_DATA_VS_0 + SI_SGPR_FMASK_RESOURCE * 4, + r600_resource_va(rctx->context.screen, + &rctx->fmask_sampler_views[PIPE_SHADER_VERTEX].desc.buffer->b.b)); + + si_pm4_set_reg_pointer(pm4, + R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_FMASK_RESOURCE * 4, + r600_resource_va(rctx->context.screen, + &rctx->fmask_sampler_views[PIPE_SHADER_FRAGMENT].desc.buffer->b.b)); + si_pm4_set_state(rctx, init, pm4); } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 4aabdef..9a89d8f 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -116,6 +116,34 @@ union si_state { struct si_pm4_state *array[0]; }; +#define NUM_TEX_UNITS 16 + +/* This represents resource descriptors in memory, such as buffer resources, + * image resources, and sampler states. + */ +struct si_descriptors { + struct si_atom atom; + + /* The size of one resource descriptor. */ + unsigned element_dw_size; + /* The maximum number of resource descriptors. */ + unsigned num_elements; + + /* The buffer where resource descriptors are stored. */ + struct si_resource *buffer; + + /* The i-th bit is set if that element is dirty (changed but not emitted). */ + unsigned dirty_mask; + /* The i-th bit is set if that element is enabled (non-NULL resource). */ + unsigned enabled_mask; +}; + +struct si_sampler_views { + struct si_descriptors desc; + struct pipe_sampler_view *views[NUM_TEX_UNITS]; + const uint32_t *desc_data[NUM_TEX_UNITS]; +}; + #define si_pm4_block_idx(member) \ (offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *)) @@ -146,6 +174,14 @@ union si_state { } \ } while(0) +/* si_descriptors.c */ +void si_init_sampler_views(struct r600_context *rctx, struct si_sampler_views *views); +void si_release_sampler_views(struct si_sampler_views *views); +void si_update_sampler_views(struct si_sampler_views *views); +void si_sampler_views_begin_new_cs(struct r600_context *rctx, struct si_sampler_views *views); +void si_set_fmask_sampler_view(struct r600_context *rctx, unsigned shader, + unsigned slot, struct pipe_sampler_view *view); + /* si_state.c */ struct si_pipe_shader_selector; -- 1.8.1.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev