On Tue, Dec 12, 2017 at 6:08 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > Use 16_ABGR instead of 32_ABGR if Z isn't written. > > Ported from RadeonSI. > > No CTS regressions on Polaris. > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/amd/common/ac_nir_to_llvm.c | 65 > ++++++++++++++++++++++++++++++++++------- > src/amd/vulkan/radv_pipeline.c | 29 ++++++++++++++---- > 2 files changed, 78 insertions(+), 16 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 663b27d265..5916619e97 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -6166,6 +6166,26 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx, > return true; > } > > +static unsigned > +si_get_spi_shader_z_format(bool writes_z, bool writes_stencil, > + bool writes_samplemask) > +{ > + if (writes_z) { > + /* Z needs 32 bits. */ > + if (writes_samplemask) > + return V_028710_SPI_SHADER_32_ABGR; > + else if (writes_stencil) > + return V_028710_SPI_SHADER_32_GR; > + else > + return V_028710_SPI_SHADER_32_R; > + } else if (writes_stencil || writes_samplemask) { > + /* Both stencil and sample mask need only 16 bits. */ > + return V_028710_SPI_SHADER_UINT16_ABGR; > + } else { > + return V_028710_SPI_SHADER_ZERO; > + } > +}
I'm not a fan of having this function in two places. Can we export the format from the compiler to radv, or the other way around? Otherwise, Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> > + > static void > si_export_mrt_z(struct nir_to_llvm_context *ctx, > LLVMValueRef depth, LLVMValueRef stencil, > @@ -6184,19 +6204,42 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx, > args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */ > args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */ > > - if (depth) { > - args.out[0] = depth; > - args.enabled_channels |= 0x1; > - } > + unsigned format = si_get_spi_shader_z_format(depth != NULL, > + stencil != NULL, > + samplemask != NULL); > + > + if (format == V_028710_SPI_SHADER_UINT16_ABGR) { > + assert(!depth); > + args.compr = 1; /* COMPR flag */ > + > + if (stencil) { > + /* Stencil should be in X[23:16]. */ > + stencil = ac_to_integer(&ctx->ac, stencil); > + stencil = LLVMBuildShl(ctx->builder, stencil, > + LLVMConstInt(ctx->ac.i32, 16, > 0), ""); > + args.out[0] = ac_to_float(&ctx->ac, stencil); > + args.enabled_channels |= 0x3; > + } > + if (samplemask) { > + /* SampleMask should be in Y[15:0]. */ > + args.out[1] = samplemask; > + args.enabled_channels |= 0xc; > + } > + } else { > + if (depth) { > + args.out[0] = depth; > + args.enabled_channels |= 0x1; > + } > > - if (stencil) { > - args.out[1] = stencil; > - args.enabled_channels |= 0x2; > - } > + if (stencil) { > + args.out[1] = stencil; > + args.enabled_channels |= 0x2; > + } > > - if (samplemask) { > - args.out[2] = samplemask; > - args.enabled_channels |= 0x4; > + if (samplemask) { > + args.out[2] = samplemask; > + args.enabled_channels |= 0x4; > + } > } > > /* SI (except OLAND and HAINAN) has a bug that it only looks > diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c > index 0146d6935e..baaf5c4c77 100644 > --- a/src/amd/vulkan/radv_pipeline.c > +++ b/src/amd/vulkan/radv_pipeline.c > @@ -2013,6 +2013,25 @@ radv_pipeline_stage_to_user_data_0(struct > radv_pipeline *pipeline, > } > } > > +static unsigned > +si_get_spi_shader_z_format(bool writes_z, bool writes_stencil, > + bool writes_samplemask) > +{ > + if (writes_z) { > + /* Z needs 32 bits. */ > + if (writes_samplemask) > + return V_028710_SPI_SHADER_32_ABGR; > + else if (writes_stencil) > + return V_028710_SPI_SHADER_32_GR; > + else > + return V_028710_SPI_SHADER_32_R; > + } else if (writes_stencil || writes_samplemask) { > + /* Both stencil and sample mask need only 16 bits. */ > + return V_028710_SPI_SHADER_UINT16_ABGR; > + } else { > + return V_028710_SPI_SHADER_ZERO; > + } > +} > > static VkResult > radv_pipeline_init(struct radv_pipeline *pipeline, > @@ -2108,11 +2127,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline, > if (pipeline->device->physical_device->has_rbplus) > pipeline->graphics.db_shader_control |= > S_02880C_DUAL_QUAD_DISABLE(1); > > - pipeline->graphics.shader_z_format = > - ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR : > - ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR : > - ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R : > - V_028710_SPI_SHADER_ZERO; > + unsigned shader_z_format = > + si_get_spi_shader_z_format(ps->info.fs.writes_z, > + ps->info.fs.writes_stencil, > + ps->info.fs.writes_sample_mask); > + pipeline->graphics.shader_z_format = shader_z_format; > > calculate_vgt_gs_mode(pipeline); > calculate_vs_outinfo(pipeline); > -- > 2.15.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev