From: Marek Olšák <marek.ol...@amd.com> MSAA is supported using sample shading. Layered rendering and all texture targets are also supported. --- docs/features.txt | 2 +- docs/relnotes/18.1.0.html | 1 + src/gallium/drivers/radeonsi/si_blit.c | 8 +++ src/gallium/drivers/radeonsi/si_descriptors.c | 87 +++++++++++++++++++++-- src/gallium/drivers/radeonsi/si_get.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.h | 9 +++ src/gallium/drivers/radeonsi/si_shader.c | 4 +- src/gallium/drivers/radeonsi/si_shader.h | 3 + src/gallium/drivers/radeonsi/si_shader_internal.h | 1 + src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 59 +++++++++++++++ src/gallium/drivers/radeonsi/si_state.c | 19 +++-- src/gallium/drivers/radeonsi/si_state.h | 8 +++ src/gallium/drivers/radeonsi/si_state_binning.c | 2 +- src/gallium/drivers/radeonsi/si_state_shaders.c | 18 +++++ 14 files changed, 205 insertions(+), 18 deletions(-)
diff --git a/docs/features.txt b/docs/features.txt index 5eae34bf0df..d579d245eb4 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -262,21 +262,21 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, r600, radeonsi Additional functionality not covered above: glMemoryBarrierByRegion DONE glGetTexLevelParameter[fi]v - needs updates DONE glGetBooleani_v - restrict to GLES enums gl_HelperInvocation support DONE (i965, r600) GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+ GL_EXT_color_buffer_float DONE (all drivers) - GL_KHR_blend_equation_advanced DONE (i965, nvc0) + GL_KHR_blend_equation_advanced DONE (i965, nvc0, radeonsi) GL_KHR_debug DONE (all drivers) GL_KHR_robustness DONE (i965, nvc0, radeonsi) GL_KHR_texture_compression_astc_ldr DONE (freedreno, i965/gen9+) GL_OES_copy_image DONE (all drivers) GL_OES_draw_buffers_indexed DONE (all drivers that support GL_ARB_draw_buffers_blend) GL_OES_draw_elements_base_vertex DONE (all drivers) GL_OES_geometry_shader DONE (i965/hsw+, nvc0, radeonsi) GL_OES_gpu_shader5 DONE (all drivers that support GL_ARB_gpu_shader5) GL_OES_primitive_bounding_box DONE (i965/gen7+, nvc0, radeonsi) GL_OES_sample_shading DONE (i965, nvc0, r600, radeonsi) diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html index 3e119078731..a89861d2bda 100644 --- a/docs/relnotes/18.1.0.html +++ b/docs/relnotes/18.1.0.html @@ -43,20 +43,21 @@ TBD. Note: some of the new features are only available with certain drivers. </p> <ul> <li>OpenGL 3.1 with ARB_compatibility on nv50, nvc0, r600, radeonsi, softpipe, llvmpipe, svga</li> <li>GL_ARB_bindless_texture on nvc0/maxwell+</li> <li>GL_EXT_semaphore on radeonsi</li> <li>GL_EXT_semaphore_fd on radeonsi</li> <li>GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was already supported)</li> <li>GL_EXT_shader_framebuffer_fetch_non_coherent on i965</li> +<li>GL_KHR_blend_equation_advanced on radeonsi</li> <li>Disk shader cache support for i965 enabled by default</li> </ul> <h2>Bug fixes</h2> <ul> TBD </ul> <h2>Changes</h2> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index f1c4f6d1e72..d9d489825f8 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -799,20 +799,28 @@ void si_decompress_textures(struct si_context *sctx, unsigned shader_mask) si_decompress_resident_textures(sctx); if (sctx->uses_bindless_images) si_decompress_resident_images(sctx); } else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) { if (sctx->cs_shader_state.program->uses_bindless_samplers) si_decompress_resident_textures(sctx); if (sctx->cs_shader_state.program->uses_bindless_images) si_decompress_resident_images(sctx); } + if (sctx->ps_uses_fbfetch) { + struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0]; + si_decompress_color_texture(sctx, + (struct r600_texture*)cb0->texture, + cb0->u.tex.first_layer, + cb0->u.tex.last_layer); + } + si_check_render_feedback(sctx); } /* Helper for decompressing a portion of a color or depth resource before * blitting if any decompression is needed. * The driver doesn't decompress resources automatically while u_blitter is * rendering. */ static void si_decompress_subresource(struct pipe_context *ctx, struct pipe_resource *tex, unsigned planes, unsigned level, diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 07d1420d8ee..38befa445d2 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -680,21 +680,21 @@ si_mark_image_range_valid(const struct pipe_image_view *view) assert(res && res->b.b.target == PIPE_BUFFER); util_range_add(&res->valid_buffer_range, view->u.buf.offset, view->u.buf.offset + view->u.buf.size); } static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_image_view *view, bool skip_decompress, - uint32_t *desc) + uint32_t *desc, uint32_t *fmask_desc) { struct si_screen *screen = ctx->screen; struct r600_resource *res; res = (struct r600_resource *)view->resource; if (res->b.b.target == PIPE_BUFFER) { if (view->access & PIPE_IMAGE_ACCESS_WRITE) si_mark_image_range_valid(view); @@ -708,27 +708,28 @@ static void si_set_shader_image_desc(struct si_context *ctx, struct r600_texture *tex = (struct r600_texture *)res; unsigned level = view->u.tex.level; unsigned width, height, depth, hw_level; bool uses_dcc = vi_dcc_enabled(tex, level); unsigned access = view->access; /* Clear the write flag when writes can't occur. * Note that DCC_DECOMPRESS for MSAA doesn't work in some cases, * so we don't wanna trigger it. */ - if (tex->is_depth || tex->resource.b.b.nr_samples >= 2) { + if (tex->is_depth || + (!fmask_desc && tex->fmask.size != 0)) { assert(!"Z/S and MSAA image stores are not supported"); access &= ~PIPE_IMAGE_ACCESS_WRITE; } assert(!tex->is_depth); - assert(tex->fmask.size == 0); + assert(fmask_desc || tex->fmask.size == 0); if (uses_dcc && !skip_decompress && (view->access & PIPE_IMAGE_ACCESS_WRITE || !vi_dcc_formats_compatible(res->b.b.format, view->format))) { /* If DCC can't be disabled, at least decompress it. * The decompression is relatively cheap if the surface * has been decompressed already. */ if (!si_texture_disable_dcc(&ctx->b, tex)) ctx->b.decompress_dcc(&ctx->b.b, tex); @@ -755,21 +756,21 @@ static void si_set_shader_image_desc(struct si_context *ctx, hw_level = 0; } si_make_texture_descriptor(screen, tex, false, res->b.b.target, view->format, swizzle, hw_level, hw_level, view->u.tex.first_layer, view->u.tex.last_layer, width, height, depth, - desc, NULL); + desc, fmask_desc); si_set_mutable_tex_desc_fields(screen, tex, &tex->surface.u.legacy.level[level], level, level, util_format_get_blockwidth(view->format), false, desc); } } static void si_set_shader_image(struct si_context *ctx, unsigned shader, @@ -785,21 +786,21 @@ static void si_set_shader_image(struct si_context *ctx, if (!view || !view->resource) { si_disable_shader_image(ctx, shader, slot); return; } res = (struct r600_resource *)view->resource; if (&images->views[slot] != view) util_copy_image_view(&images->views[slot], view); - si_set_shader_image_desc(ctx, view, skip_decompress, desc); + si_set_shader_image_desc(ctx, view, skip_decompress, desc, NULL); if (res->b.b.target == PIPE_BUFFER) { images->needs_color_decompress_mask &= ~(1 << slot); res->bind_history |= PIPE_BIND_SHADER_IMAGE; } else { struct r600_texture *tex = (struct r600_texture *)res; unsigned level = view->u.tex.level; if (color_needs_decompression(tex)) { images->needs_color_decompress_mask |= 1 << slot; @@ -863,20 +864,91 @@ si_images_update_needs_color_decompress_mask(struct si_images *images) if (color_needs_decompression(rtex)) { images->needs_color_decompress_mask |= 1 << i; } else { images->needs_color_decompress_mask &= ~(1 << i); } } } } +void si_update_ps_colorbuf0_slot(struct si_context *sctx) +{ + struct si_buffer_resources *buffers = &sctx->rw_buffers; + struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; + unsigned slot = SI_PS_IMAGE_COLORBUF0; + struct pipe_surface *surf = NULL; + + /* si_texture_disable_dcc can get us here again. */ + if (sctx->blitter->running) + return; + + /* See whether FBFETCH is used and color buffer 0 is set. */ + if (sctx->ps_shader.cso && + sctx->ps_shader.cso->info.opcode_count[TGSI_OPCODE_FBFETCH] && + sctx->framebuffer.state.nr_cbufs && + sctx->framebuffer.state.cbufs[0]) + surf = sctx->framebuffer.state.cbufs[0]; + + /* Return if FBFETCH transitions from disabled to disabled. */ + if (!buffers->buffers[slot] && !surf) + return; + + sctx->ps_uses_fbfetch = surf != NULL; + si_update_ps_iter_samples(sctx); + + if (surf) { + struct r600_texture *tex = (struct r600_texture*)surf->texture; + struct pipe_image_view view; + + assert(tex); + assert(!tex->is_depth); + + /* Disable DCC, because the texture is used as both a sampler + * and color buffer. + */ + si_texture_disable_dcc(&sctx->b, tex); + + if (tex->resource.b.b.nr_samples <= 1 && tex->cmask_buffer) { + /* Disable CMASK. */ + assert(tex->cmask_buffer != &tex->resource); + si_eliminate_fast_color_clear(&sctx->b, tex); + si_texture_discard_cmask(sctx->screen, tex); + } + + view.resource = surf->texture; + view.format = surf->format; + view.access = PIPE_IMAGE_ACCESS_READ; + view.u.tex.first_layer = surf->u.tex.first_layer; + view.u.tex.last_layer = surf->u.tex.last_layer; + view.u.tex.level = surf->u.tex.level; + + /* Set the descriptor. */ + uint32_t *desc = descs->list + slot*4; + memset(desc, 0, 16 * 4); + si_set_shader_image_desc(sctx, &view, true, desc, desc + 8); + + pipe_resource_reference(&buffers->buffers[slot], &tex->resource.b.b); + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + &tex->resource, RADEON_USAGE_READ, + RADEON_PRIO_SHADER_RW_IMAGE); + buffers->enabled_mask |= 1u << slot; + } else { + /* Clear the descriptor. */ + memset(descs->list + slot*4, 0, 8*4); + pipe_resource_reference(&buffers->buffers[slot], NULL); + buffers->enabled_mask &= ~(1u << slot); + } + + sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; +} + /* SAMPLER STATES */ static void si_bind_sampler_states(struct pipe_context *ctx, enum pipe_shader_type shader, unsigned start, unsigned count, void **states) { struct si_context *sctx = (struct si_context *)ctx; struct si_samplers *samplers = &sctx->samplers[shader]; struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader); struct si_sampler_state **sstates = (struct si_sampler_state**)states; @@ -1848,21 +1920,21 @@ static void si_update_bindless_image_descriptor(struct si_context *sctx, unsigned desc_slot_offset = img_handle->desc_slot * 16; struct pipe_image_view *view = &img_handle->view; uint32_t desc_list[8]; if (view->resource->target == PIPE_BUFFER) return; memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list)); si_set_shader_image_desc(sctx, view, true, - desc->list + desc_slot_offset); + desc->list + desc_slot_offset, NULL); if (memcmp(desc_list, desc->list + desc_slot_offset, sizeof(desc_list))) { img_handle->desc_dirty = true; sctx->bindless_descriptors_dirty = true; } } static void si_update_all_resident_texture_descriptors(struct si_context *sctx) { @@ -1914,20 +1986,21 @@ void si_update_all_texture_descriptors(struct si_context *sctx) continue; si_set_sampler_view(sctx, shader, i, samplers->views[i], true); } si_update_shader_needs_decompress_mask(sctx, shader); } si_update_all_resident_texture_descriptors(sctx); + si_update_ps_colorbuf0_slot(sctx); } /* SHADER USER DATA */ static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shader) { sctx->shader_pointers_dirty |= u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, SI_NUM_SHADER_DESCS); @@ -2453,21 +2526,21 @@ static uint64_t si_create_image_handle(struct pipe_context *ctx, if (!view || !view->resource) return 0; img_handle = CALLOC_STRUCT(si_image_handle); if (!img_handle) return 0; memset(desc_list, 0, sizeof(desc_list)); si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor); - si_set_shader_image_desc(sctx, view, false, &desc_list[0]); + si_set_shader_image_desc(sctx, view, false, &desc_list[0], NULL); img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list)); if (!img_handle->desc_slot) { FREE(img_handle); return 0; } handle = img_handle->desc_slot; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index b4ca5bea943..327d8a28c38 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -183,20 +183,21 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MEMOBJ: case PIPE_CAP_LOAD_CONSTBUF: case PIPE_CAP_INT64: case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_TGSI_CLOCK: case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS: case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: case PIPE_CAP_TGSI_VOTE: + case PIPE_CAP_TGSI_FS_FBFETCH: return 1; case PIPE_CAP_TGSI_BALLOT: return HAVE_LLVM >= 0x0500; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: return !SI_BIG_ENDIAN && sscreen->info.has_userptr; case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return (sscreen->info.drm_major == 2 && @@ -259,21 +260,20 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) /* Unsupported features. */ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: case PIPE_CAP_MAX_WINDOW_RECTANGLES: - case PIPE_CAP_TGSI_FS_FBFETCH: case PIPE_CAP_TGSI_MUL_ZERO_WINS: case PIPE_CAP_UMA: case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: case PIPE_CAP_POST_DEPTH_COVERAGE: case PIPE_CAP_TILE_RASTER_ORDER: case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: case PIPE_CAP_CONTEXT_PRIORITY_MASK: return 0; case PIPE_CAP_FENCE_SIGNAL: diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index dbb04ed7e45..e3d45ef6c3b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -525,20 +525,21 @@ struct si_context { unsigned num_vs_blit_sgprs; uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD]; /* Vertex and index buffers. */ bool vertex_buffers_dirty; bool vertex_buffer_pointer_dirty; struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS]; /* MSAA config state. */ int ps_iter_samples; + bool ps_uses_fbfetch; bool smoothing_enabled; /* DB render state. */ unsigned ps_db_shader_control; unsigned dbcb_copy_sample; bool dbcb_depth_copy_enabled:1; bool dbcb_stencil_copy_enabled:1; bool db_flush_depth_inplace:1; bool db_flush_stencil_inplace:1; bool db_depth_clear:1; @@ -924,11 +925,19 @@ si_htile_enabled(struct r600_texture *tex, unsigned level) return tex->htile_offset && level == 0; } static inline bool vi_tc_compat_htile_enabled(struct r600_texture *tex, unsigned level) { assert(!tex->tc_compatible_htile || tex->htile_offset); return tex->tc_compatible_htile && level == 0; } +static inline unsigned si_get_ps_iter_samples(struct si_context *sctx) +{ + if (sctx->ps_uses_fbfetch) + return sctx->framebuffer.nr_samples; + + return sctx->ps_iter_samples; +} + #endif diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 08b071e810b..ae98e102eae 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1919,21 +1919,21 @@ void si_llvm_load_input_fs( static void declare_input_fs( struct si_shader_context *ctx, unsigned input_index, const struct tgsi_full_declaration *decl, LLVMValueRef out[4]) { si_llvm_load_input_fs(ctx, input_index, out); } -static LLVMValueRef get_sample_id(struct si_shader_context *ctx) +LLVMValueRef si_get_sample_id(struct si_shader_context *ctx) { return si_unpack_param(ctx, SI_PARAM_ANCILLARY, 8, 4); } static LLVMValueRef get_base_vertex(struct ac_shader_abi *abi) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); /* For non-indexed draws, the base vertex set by the driver * (for direct draws) or the CP (for indirect draws) is the @@ -2142,21 +2142,21 @@ void si_load_system_value(struct si_shader_context *ctx, }; value = lp_build_gather_values(&ctx->gallivm, pos, 4); break; } case TGSI_SEMANTIC_FACE: value = ctx->abi.front_face; break; case TGSI_SEMANTIC_SAMPLEID: - value = get_sample_id(ctx); + value = si_get_sample_id(ctx); break; case TGSI_SEMANTIC_SAMPLEPOS: { LLVMValueRef pos[4] = { LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT), LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT), LLVMConstReal(ctx->f32, 0), LLVMConstReal(ctx->f32, 0) }; pos[0] = lp_build_emit_llvm_unary(&ctx->bld_base, diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index f58978989d4..f598b762e1d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -534,20 +534,23 @@ struct si_shader_key { struct { /* One byte for every input: SI_FIX_FETCH_* enums. */ uint8_t vs_fix_fetch[SI_MAX_ATTRIBS]; union { uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */ /* When PS needs PrimID and GS is disabled. */ unsigned vs_export_prim_id:1; struct { unsigned interpolate_at_sample_force_center:1; + unsigned fbfetch_msaa; + unsigned fbfetch_is_1D; + unsigned fbfetch_layered; } ps; } u; } mono; /* Optimization flags for asynchronous compilation only. */ struct { /* For HW VS (it can be VS, TES, GS) */ uint64_t kill_outputs; /* "get_unique_index" bits */ unsigned clip_disable:1; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 1bd52722413..941c6fc736d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -293,20 +293,21 @@ void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base, #define NOOP_WAITCNT 0xf7f #define LGKM_CNT 0x07f #define VM_CNT 0xf70 LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx, const struct tgsi_ind_register *ind, unsigned addr_mul, int rel_index); LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx, const struct tgsi_ind_register *ind, int rel_index, unsigned num); +LLVMValueRef si_get_sample_id(struct si_shader_context *ctx); void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base); void si_shader_context_init_mem(struct si_shader_context *ctx); LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, LLVMValueRef list, LLVMValueRef index, enum ac_descriptor_type type); LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, LLVMValueRef list, LLVMValueRef index, enum ac_descriptor_type desc_type, bool dcc_off); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index f5fa18fd38a..b90edddf2ef 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -1866,20 +1866,77 @@ static void si_llvm_emit_txqs( samples = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->i32, 16, 0), ""); samples = LLVMBuildAnd(ctx->ac.builder, samples, LLVMConstInt(ctx->i32, 0xf, 0), ""); samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1, samples, ""); emit_data->output[emit_data->chan] = samples; } +static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct ac_image_args args = {}; + LLVMValueRef ptr, image, fmask, addr_vec; + + /* Ignore src0, because KHR_blend_func_extended disallows multiple render + * targets. + */ + + /* Load the image descriptor. */ + STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0); + ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); + ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr, + ac_array_in_const32_addr_space(ctx->v8i32), ""); + image = ac_build_load_to_sgpr(&ctx->ac, ptr, + LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0)); + + LLVMValueRef addr[4]; + unsigned chan = 0; + + addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16); + + if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D) + addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16); + + /* Get the current render target layer index. */ + if (ctx->shader->key.mono.u.ps.fbfetch_layered) + addr[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11); + + if (ctx->shader->key.mono.u.ps.fbfetch_msaa) + addr[chan++] = si_get_sample_id(ctx); + + while (chan < 4) + addr[chan++] = LLVMGetUndef(ctx->i32); + + if (ctx->shader->key.mono.u.ps.fbfetch_msaa) { + fmask = ac_build_load_to_sgpr(&ctx->ac, ptr, + LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0)); + + ac_apply_fmask_to_sample(&ctx->ac, fmask, addr, false); + } + + addr_vec = ac_build_gather_values(&ctx->ac, addr, ARRAY_SIZE(addr)); + + args.opcode = ac_image_load; + args.resource = image; + args.addr = addr_vec; + args.dmask = 0xf; + args.da = ctx->shader->key.mono.u.ps.fbfetch_layered; + + emit_data->output[emit_data->chan] = + ac_build_image_opcode(&ctx->ac, &args); +} + static const struct lp_build_tgsi_action tex_action = { .fetch_args = tex_fetch_args, .emit = build_tex_intrinsic, }; /** * Setup actions for TGSI memory opcode, including texture opcodes. */ void si_shader_context_init_mem(struct si_shader_context *ctx) { @@ -1898,20 +1955,22 @@ void si_shader_context_init_mem(struct si_shader_context *ctx) bld_base->op_actions[TGSI_OPCODE_TXF_LZ] = tex_action; bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action; bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action; bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = txq_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action; bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action; bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs; + bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_llvm_emit_fbfetch; + bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args; bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit; bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args; bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit; bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args; bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit; tmpl.fetch_args = atomic_fetch_args; tmpl.emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b92ec03a054..16ad1d4f823 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2891,20 +2891,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (!surf->depth_initialized) { si_init_depth_surface(sctx, surf); } if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level)) sctx->framebuffer.DB_has_shader_readable_metadata = true; si_context_add_resource_size(ctx, surf->base.texture); } + si_update_ps_colorbuf0_slot(sctx); si_update_poly_offset_state(sctx); si_mark_atom_dirty(sctx, &sctx->cb_render_state); si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); if (sctx->screen->dpbb_allowed) si_mark_atom_dirty(sctx, &sctx->dpbb_state); if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) si_mark_atom_dirty(sctx, &sctx->msaa_config); @@ -3353,40 +3354,41 @@ static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) if (setup_samples > 1) { /* distance from the pixel center, indexed by log2(nr_samples) */ static unsigned max_dist[] = { 0, /* unused */ 4, /* 2x MSAA */ 6, /* 4x MSAA */ 7, /* 8x MSAA */ 8, /* 16x MSAA */ }; unsigned log_samples = util_logbase2(setup_samples); + unsigned ps_iter_samples = si_get_ps_iter_samples(sctx); unsigned log_ps_iter_samples = - util_logbase2(util_next_power_of_two(sctx->ps_iter_samples)); + util_logbase2(util_next_power_of_two(ps_iter_samples)); radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2); radeon_emit(cs, sc_line_cntl | S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */ radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) | S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */ if (sctx->framebuffer.nr_samples > 1) { radeon_set_context_reg(cs, R_028804_DB_EQAA, S_028804_MAX_ANCHOR_SAMPLES(log_samples) | S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) | S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, - S_028A4C_PS_ITER_SAMPLE(sctx->ps_iter_samples > 1) | + S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) | sc_mode_cntl_1); } else if (sctx->smoothing_enabled) { radeon_set_context_reg(cs, R_028804_DB_EQAA, S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) | S_028804_OVERRASTERIZATION_AMOUNT(log_samples)); radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, sc_mode_cntl_1); } } else { @@ -3401,34 +3403,39 @@ static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) sc_mode_cntl_1); } /* GFX9: Flush DFSM when the AA mode changes. */ if (sctx->screen->dfsm_allowed) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); } } +void si_update_ps_iter_samples(struct si_context *sctx) +{ + if (sctx->framebuffer.nr_samples > 1) + si_mark_atom_dirty(sctx, &sctx->msaa_config); + if (sctx->screen->dpbb_allowed) + si_mark_atom_dirty(sctx, &sctx->dpbb_state); +} + static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) { struct si_context *sctx = (struct si_context *)ctx; if (sctx->ps_iter_samples == min_samples) return; sctx->ps_iter_samples = min_samples; sctx->do_update_shaders = true; - if (sctx->framebuffer.nr_samples > 1) - si_mark_atom_dirty(sctx, &sctx->msaa_config); - if (sctx->screen->dpbb_allowed) - si_mark_atom_dirty(sctx, &sctx->dpbb_state); + si_update_ps_iter_samples(sctx); } /* * Samplers */ /** * Build the sampler view descriptor for a buffer texture. * @param state 256-bit descriptor; only the high 128 bits are filled in */ diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 02659a7a4f3..37887853388 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -210,20 +210,26 @@ enum { SI_VS_STREAMOUT_BUF1, SI_VS_STREAMOUT_BUF2, SI_VS_STREAMOUT_BUF3, SI_HS_CONST_DEFAULT_TESS_LEVELS, SI_VS_CONST_INSTANCE_DIVISORS, SI_VS_CONST_CLIP_PLANES, SI_PS_CONST_POLY_STIPPLE, SI_PS_CONST_SAMPLE_POSITIONS, + /* Image descriptor of color buffer 0 for KHR_blend_equation_advanced. */ + SI_PS_IMAGE_COLORBUF0, + SI_PS_IMAGE_COLORBUF0_HI, + SI_PS_IMAGE_COLORBUF0_FMASK, + SI_PS_IMAGE_COLORBUF0_FMASK_HI, + SI_NUM_RW_BUFFERS, }; /* Indices into sctx->descriptors, laid out so that gfx and compute pipelines * are contiguous: * * 0 - rw buffers * 1 - vertex const and shader buffers * 2 - vertex samplers and images * 3 - fragment const and shader buffer @@ -317,20 +323,21 @@ struct si_buffer_resources { si_pm4_block_idx(member)); \ } while(0) /* si_descriptors.c */ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct r600_texture *tex, const struct legacy_surf_level *base_level_info, unsigned base_level, unsigned first_level, unsigned block_width, bool is_stencil, uint32_t *state); +void si_update_ps_colorbuf0_slot(struct si_context *sctx); void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot, struct pipe_constant_buffer *cbuf); void si_get_shader_buffers(struct si_context *sctx, enum pipe_shader_type shader, uint start_slot, uint count, struct pipe_shader_buffer *sbuf); void si_set_ring_buffer(struct pipe_context *ctx, uint slot, struct pipe_resource *buffer, unsigned stride, unsigned num_records, bool add_tid, bool swizzle, @@ -388,20 +395,21 @@ si_make_texture_descriptor(struct si_screen *screen, unsigned width, unsigned height, unsigned depth, uint32_t *state, uint32_t *fmask_state); struct pipe_sampler_view * si_create_sampler_view_custom(struct pipe_context *ctx, struct pipe_resource *texture, const struct pipe_sampler_view *state, unsigned width0, unsigned height0, unsigned force_level); void si_update_fb_dirtiness_after_rendering(struct si_context *sctx); +void si_update_ps_iter_samples(struct si_context *sctx); /* si_state_binning.c */ void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state); /* si_state_shaders.c */ bool si_update_shaders(struct si_context *sctx); void si_init_shader_functions(struct si_context *sctx); bool si_init_shader_cache(struct si_screen *sscreen); void si_destroy_shader_cache(struct si_screen *sscreen); void si_get_active_slot_masks(const struct tgsi_shader_info *info, diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c index 686701d718f..7138f7a03aa 100644 --- a/src/gallium/drivers/radeonsi/si_state_binning.c +++ b/src/gallium/drivers/radeonsi/si_state_binning.c @@ -75,21 +75,21 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx, if (!(cb_target_enabled_4bit & (0xf << (i * 4)))) continue; struct r600_texture *rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture; sum += rtex->surface.bpe; } /* Multiply the sum by some function of the number of samples. */ if (nr_samples >= 2) { - if (sctx->ps_iter_samples >= 2) + if (si_get_ps_iter_samples(sctx) >= 2) sum *= nr_samples; else sum *= 2; } static const si_bin_size_subtable table[] = { { /* One RB / SE */ { /* One shader engine */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 8fe4c04ae79..d7742eafb04 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1453,20 +1453,37 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, sel->info.uses_linear_center + sel->info.uses_linear_centroid + sel->info.uses_linear_sample > 1; if (sel->info.opcode_count[TGSI_OPCODE_INTERP_SAMPLE]) key->mono.u.ps.interpolate_at_sample_force_center = 1; } } key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx); + + /* ps_uses_fbfetch is true only if the color buffer is bound. */ + if (sctx->ps_uses_fbfetch) { + struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0]; + struct pipe_resource *tex = cb0->texture; + + /* 1D textures are allocated and used as 2D on GFX9. */ + key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1; + key->mono.u.ps.fbfetch_is_1D = sctx->b.chip_class != GFX9 && + (tex->target == PIPE_TEXTURE_1D || + tex->target == PIPE_TEXTURE_1D_ARRAY); + key->mono.u.ps.fbfetch_layered = tex->target == PIPE_TEXTURE_1D_ARRAY || + tex->target == PIPE_TEXTURE_2D_ARRAY || + tex->target == PIPE_TEXTURE_CUBE || + tex->target == PIPE_TEXTURE_CUBE_ARRAY || + tex->target == PIPE_TEXTURE_3D; + } break; } default: assert(0); } if (unlikely(sctx->screen->debug_flags & DBG(NO_OPT_VARIANT))) memset(&key->opt, 0, sizeof(key->opt)); } @@ -2419,20 +2436,21 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) si_mark_atom_dirty(sctx, &sctx->cb_render_state); if (sctx->screen->has_out_of_order_rast && (!old_sel || old_sel->info.writes_memory != sel->info.writes_memory || old_sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] != sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])) si_mark_atom_dirty(sctx, &sctx->msaa_config); } si_set_active_descriptors_for_shader(sctx, sel); + si_update_ps_colorbuf0_slot(sctx); } static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) { if (shader->is_optimized) { util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority, &shader->ready); } util_queue_fence_destroy(&shader->ready); -- 2.15.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev