Use 16_ABGR instead of 32_ABGR if Z isn't written. Ported from RadeonSI.
No CTS regressions on Polaris. Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> --- src/amd/common/ac_nir_to_llvm.c | 65 ++++++++++++++++++++++++++++++++++------- src/amd/vulkan/radv_pipeline.c | 29 ++++++++++++++---- 2 files changed, 78 insertions(+), 16 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 663b27d265..5916619e97 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -6166,6 +6166,26 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx, return true; } +static unsigned +si_get_spi_shader_z_format(bool writes_z, bool writes_stencil, + bool writes_samplemask) +{ + if (writes_z) { + /* Z needs 32 bits. */ + if (writes_samplemask) + return V_028710_SPI_SHADER_32_ABGR; + else if (writes_stencil) + return V_028710_SPI_SHADER_32_GR; + else + return V_028710_SPI_SHADER_32_R; + } else if (writes_stencil || writes_samplemask) { + /* Both stencil and sample mask need only 16 bits. */ + return V_028710_SPI_SHADER_UINT16_ABGR; + } else { + return V_028710_SPI_SHADER_ZERO; + } +} + static void si_export_mrt_z(struct nir_to_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil, @@ -6184,19 +6204,42 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx, args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */ args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */ - if (depth) { - args.out[0] = depth; - args.enabled_channels |= 0x1; - } + unsigned format = si_get_spi_shader_z_format(depth != NULL, + stencil != NULL, + samplemask != NULL); + + if (format == V_028710_SPI_SHADER_UINT16_ABGR) { + assert(!depth); + args.compr = 1; /* COMPR flag */ + + if (stencil) { + /* Stencil should be in X[23:16]. */ + stencil = ac_to_integer(&ctx->ac, stencil); + stencil = LLVMBuildShl(ctx->builder, stencil, + LLVMConstInt(ctx->ac.i32, 16, 0), ""); + args.out[0] = ac_to_float(&ctx->ac, stencil); + args.enabled_channels |= 0x3; + } + if (samplemask) { + /* SampleMask should be in Y[15:0]. */ + args.out[1] = samplemask; + args.enabled_channels |= 0xc; + } + } else { + if (depth) { + args.out[0] = depth; + args.enabled_channels |= 0x1; + } - if (stencil) { - args.out[1] = stencil; - args.enabled_channels |= 0x2; - } + if (stencil) { + args.out[1] = stencil; + args.enabled_channels |= 0x2; + } - if (samplemask) { - args.out[2] = samplemask; - args.enabled_channels |= 0x4; + if (samplemask) { + args.out[2] = samplemask; + args.enabled_channels |= 0x4; + } } /* SI (except OLAND and HAINAN) has a bug that it only looks diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 0146d6935e..baaf5c4c77 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2013,6 +2013,25 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, } } +static unsigned +si_get_spi_shader_z_format(bool writes_z, bool writes_stencil, + bool writes_samplemask) +{ + if (writes_z) { + /* Z needs 32 bits. */ + if (writes_samplemask) + return V_028710_SPI_SHADER_32_ABGR; + else if (writes_stencil) + return V_028710_SPI_SHADER_32_GR; + else + return V_028710_SPI_SHADER_32_R; + } else if (writes_stencil || writes_samplemask) { + /* Both stencil and sample mask need only 16 bits. */ + return V_028710_SPI_SHADER_UINT16_ABGR; + } else { + return V_028710_SPI_SHADER_ZERO; + } +} static VkResult radv_pipeline_init(struct radv_pipeline *pipeline, @@ -2108,11 +2127,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline, if (pipeline->device->physical_device->has_rbplus) pipeline->graphics.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); - pipeline->graphics.shader_z_format = - ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR : - ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR : - ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R : - V_028710_SPI_SHADER_ZERO; + unsigned shader_z_format = + si_get_spi_shader_z_format(ps->info.fs.writes_z, + ps->info.fs.writes_stencil, + ps->info.fs.writes_sample_mask); + pipeline->graphics.shader_z_format = shader_z_format; calculate_vgt_gs_mode(pipeline); calculate_vs_outinfo(pipeline); -- 2.15.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev