From: Marek Olšák <marek.ol...@amd.com> This is a prerequisite for the next commit.
Cc: 19.1 <mesa-sta...@lists.freedesktop.org> --- src/gallium/drivers/radeonsi/si_buffer.c | 7 +-- src/gallium/drivers/radeonsi/si_descriptors.c | 54 ++++++++----------- src/gallium/drivers/radeonsi/si_state.h | 4 +- 3 files changed, 25 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index 4936eb5a5b1..76705937b65 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -280,56 +280,53 @@ si_invalidate_buffer(struct si_context *sctx, /* In AMD_pinned_memory, the user pointer association only gets * broken when the buffer is explicitly re-allocated. */ if (buf->b.is_user_ptr) return false; /* Check if mapping this buffer would cause waiting for the GPU. */ if (si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) || !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) { - uint64_t old_va = buf->gpu_address; - /* Reallocate the buffer in the same pipe_resource. */ si_alloc_resource(sctx->screen, buf); - si_rebind_buffer(sctx, &buf->b.b, old_va); + si_rebind_buffer(sctx, &buf->b.b); } else { util_range_set_empty(&buf->valid_buffer_range); } return true; } /* Replace the storage of dst with src. */ void si_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *dst, struct pipe_resource *src) { struct si_context *sctx = (struct si_context*)ctx; struct si_resource *sdst = si_resource(dst); struct si_resource *ssrc = si_resource(src); - uint64_t old_gpu_address = sdst->gpu_address; pb_reference(&sdst->buf, ssrc->buf); sdst->gpu_address = ssrc->gpu_address; sdst->b.b.bind = ssrc->b.b.bind; sdst->b.max_forced_staging_uploads = ssrc->b.max_forced_staging_uploads; sdst->max_forced_staging_uploads = ssrc->max_forced_staging_uploads; sdst->flags = ssrc->flags; assert(sdst->vram_usage == ssrc->vram_usage); assert(sdst->gart_usage == ssrc->gart_usage); assert(sdst->bo_size == ssrc->bo_size); assert(sdst->bo_alignment == ssrc->bo_alignment); assert(sdst->domains == ssrc->domains); - si_rebind_buffer(sctx, dst, old_gpu_address); + si_rebind_buffer(sctx, dst); } static void si_invalidate_resource(struct pipe_context *ctx, struct pipe_resource *resource) { struct si_context *sctx = (struct si_context*)ctx; struct si_resource *buf = si_resource(resource); /* We currently only do anyting here for buffers */ if (resource->target == PIPE_BUFFER) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index f795c33cf26..744fc9a15d7 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -992,34 +992,36 @@ static void si_bind_sampler_states(struct pipe_context *ctx, static void si_init_buffer_resources(struct si_buffer_resources *buffers, struct si_descriptors *descs, unsigned num_buffers, short shader_userdata_rel_index, enum radeon_bo_priority priority, enum radeon_bo_priority priority_constbuf) { buffers->priority = priority; buffers->priority_constbuf = priority_constbuf; buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); + buffers->offsets = CALLOC(num_buffers, sizeof(buffers->offsets[0])); si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers); } static void si_release_buffer_resources(struct si_buffer_resources *buffers, struct si_descriptors *descs) { int i; for (i = 0; i < descs->num_elements; i++) { pipe_resource_reference(&buffers->buffers[i], NULL); } FREE(buffers->buffers); + FREE(buffers->offsets); } static void si_buffer_resources_begin_new_cs(struct si_context *sctx, struct si_buffer_resources *buffers) { unsigned mask = buffers->enabled_mask; /* Add buffers to the CS. */ while (mask) { int i = u_bit_scan(&mask); @@ -1212,53 +1214,54 @@ static void si_set_constant_buffer(struct si_context *sctx, /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->chip_class == CIK && (!input || (!input->buffer && !input->user_buffer))) input = &sctx->null_const_buf; if (input && (input->buffer || input->user_buffer)) { struct pipe_resource *buffer = NULL; uint64_t va; + unsigned buffer_offset; /* Upload the user buffer if needed. */ if (input->user_buffer) { - unsigned buffer_offset; - si_upload_const_buffer(sctx, (struct si_resource**)&buffer, input->user_buffer, input->buffer_size, &buffer_offset); if (!buffer) { /* Just unbind on failure. */ si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL); return; } - va = si_resource(buffer)->gpu_address + buffer_offset; } else { pipe_resource_reference(&buffer, input->buffer); - va = si_resource(buffer)->gpu_address + input->buffer_offset; + buffer_offset = input->buffer_offset; } + va = si_resource(buffer)->gpu_address + buffer_offset; + /* Set the descriptor. */ uint32_t *desc = descs->list + slot*4; desc[0] = va; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0); desc[2] = input->buffer_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); buffers->buffers[slot] = buffer; + buffers->offsets[slot] = buffer_offset; radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ, buffers->priority_constbuf, true); buffers->enabled_mask |= 1u << slot; } else { /* Clear the descriptor. */ memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4); buffers->enabled_mask &= ~(1u << slot); } @@ -1329,20 +1332,21 @@ static void si_set_shader_buffer(struct si_context *sctx, S_008F04_STRIDE(0); desc[2] = sbuffer->buffer_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); + buffers->offsets[slot] = sbuffer->buffer_offset; radeon_add_to_gfx_buffer_list_check_mem(sctx, buf, writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true); if (writable) buffers->writable_mask |= 1u << slot; else buffers->writable_mask &= ~(1u << slot); buffers->enabled_mask |= 1u << slot; @@ -1498,34 +1502,20 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot, buffers->enabled_mask |= 1u << slot; } else { /* Clear the descriptor. */ memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4); buffers->enabled_mask &= ~(1u << slot); } sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; } -static void si_desc_reset_buffer_offset(uint32_t *desc, uint64_t old_buf_va, - struct pipe_resource *new_buf) -{ - /* Retrieve the buffer offset from the descriptor. */ - uint64_t old_desc_va = si_desc_extract_buffer_address(desc); - - assert(old_buf_va <= old_desc_va); - uint64_t offset_within_buffer = old_desc_va - old_buf_va; - - /* Update the descriptor. */ - si_set_buf_desc_address(si_resource(new_buf), offset_within_buffer, - desc); -} - /* INTERNAL CONST BUFFERS */ static void si_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state) { struct si_context *sctx = (struct si_context *)ctx; struct pipe_constant_buffer cb = {}; unsigned stipple[32]; int i; @@ -1596,48 +1586,46 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx) } /* BUFFER DISCARD/INVALIDATION */ /** Reset descriptors of buffer resources after \p buf has been invalidated. */ static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, unsigned descriptors_idx, unsigned slot_mask, struct pipe_resource *buf, - uint64_t old_va, enum radeon_bo_priority priority) { struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; unsigned mask = buffers->enabled_mask & slot_mask; while (mask) { unsigned i = u_bit_scan(&mask); if (buffers->buffers[i] == buf) { - si_desc_reset_buffer_offset(descs->list + i*4, - old_va, buf); + si_set_buf_desc_address(si_resource(buf), buffers->offsets[i], + descs->list + i*4); sctx->descriptors_dirty |= 1u << descriptors_idx; radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buf), buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true); } } } /* Update all resource bindings where the buffer is bound, including * all resource descriptors. This is invalidate_buffer without * the invalidation. */ -void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, - uint64_t old_va) +void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) { struct si_resource *buffer = si_resource(buf); unsigned i, shader; unsigned num_elems = sctx->vertex_elements ? sctx->vertex_elements->count : 0; /* We changed the buffer, now we need to bind it where the old one * was bound. This consists of 2 things: * 1) Updating the resource descriptor and dirtying it. * 2) Adding a relocation to the CS, so that it's usable. @@ -1663,22 +1651,22 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, /* Streamout buffers. (other internal buffers can't be invalidated) */ if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) { struct si_buffer_resources *buffers = &sctx->rw_buffers; struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; if (buffers->buffers[i] != buf) continue; - si_desc_reset_buffer_offset(descs->list + i*4, - old_va, buf); + si_set_buf_desc_address(si_resource(buf), buffers->offsets[i], + descs->list + i*4); sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; radeon_add_to_gfx_buffer_list_check_mem(sctx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER, true); /* Update the streamout state. */ if (sctx->streamout.begin_emitted) si_emit_streamout_end(sctx); @@ -1687,49 +1675,49 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, si_streamout_buffers_dirty(sctx); } } /* Constant and shader buffers. */ if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { for (shader = 0; shader < SI_NUM_SHADERS; shader++) si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), - buf, old_va, + buf, sctx->const_and_shader_buffers[shader].priority_constbuf); } if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { for (shader = 0; shader < SI_NUM_SHADERS; shader++) si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS), - buf, old_va, + buf, sctx->const_and_shader_buffers[shader].priority); } if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { /* Texture buffers - update bindings. */ for (shader = 0; shader < SI_NUM_SHADERS; shader++) { struct si_samplers *samplers = &sctx->samplers[shader]; struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); unsigned mask = samplers->enabled_mask; while (mask) { unsigned i = u_bit_scan(&mask); if (samplers->views[i]->texture == buf) { unsigned desc_slot = si_get_sampler_slot(i); - si_desc_reset_buffer_offset(descs->list + - desc_slot * 16 + 4, - old_va, buf); + si_set_buf_desc_address(si_resource(buf), + samplers->views[i]->u.buf.offset, + descs->list + desc_slot * 16 + 4); sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); radeon_add_to_gfx_buffer_list_check_mem(sctx, buffer, RADEON_USAGE_READ, RADEON_PRIO_SAMPLER_BUFFER, true); } } } @@ -1745,23 +1733,23 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, while (mask) { unsigned i = u_bit_scan(&mask); if (images->views[i].resource == buf) { unsigned desc_slot = si_get_image_slot(i); if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE) si_mark_image_range_valid(&images->views[i]); - si_desc_reset_buffer_offset( - descs->list + desc_slot * 8 + 4, - old_va, buf); + si_set_buf_desc_address(si_resource(buf), + images->views[i].u.buf.offset, + descs->list + desc_slot * 8 + 4); sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); radeon_add_to_gfx_buffer_list_check_mem( sctx, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SAMPLER_BUFFER, true); } } } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 6df24f9648a..6d74d774b6d 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -402,20 +402,21 @@ struct si_descriptors { short shader_userdata_offset; /* The size of one descriptor. */ ubyte element_dw_size; /* If there is only one slot enabled, bind it directly instead of * uploading descriptors. -1 if disabled. */ signed char slot_index_to_bind_directly; }; struct si_buffer_resources { struct pipe_resource **buffers; /* this has num_buffers elements */ + unsigned *offsets; /* this has num_buffers elements */ enum radeon_bo_priority priority:6; enum radeon_bo_priority priority_constbuf:6; /* The i-th bit is set if that element is enabled (non-NULL resource). */ unsigned enabled_mask; unsigned writable_mask; }; #define si_pm4_state_changed(sctx, member) \ @@ -480,22 +481,21 @@ void si_set_rw_shader_buffer(struct si_context *sctx, uint slot, void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx, uint64_t new_active_mask); void si_set_active_descriptors_for_shader(struct si_context *sctx, struct si_shader_selector *sel); bool si_bindless_descriptor_can_reclaim_slab(void *priv, struct pb_slab_entry *entry); struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index); void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab); -void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, - uint64_t old_va); +void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf); /* si_state.c */ void si_init_state_compute_functions(struct si_context *sctx); void si_init_state_functions(struct si_context *sctx); void si_init_screen_state_functions(struct si_screen *sscreen); void si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, enum pipe_format format, unsigned offset, unsigned size, uint32_t *state); void -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev