From: Marek Olšák <marek.ol...@amd.com> Heaven and Valley write gl_SampleMask and not Z. Use 16_ABGR instead of 32_ABGR if Z isn't written. --- src/gallium/drivers/radeonsi/si_shader.c | 63 ++++++++++++++++++++----- src/gallium/drivers/radeonsi/si_shader.h | 2 + src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++- 3 files changed, 56 insertions(+), 16 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 6801722..6d30d1c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2921,57 +2921,96 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) si_llvm_export_vs(bld_base, outputs, i); FREE(outputs); } struct si_ps_exports { unsigned num; LLVMValueRef args[10][9]; }; +unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil, + bool writes_samplemask) +{ + if (writes_z) { + /* Z needs 32 bits. */ + if (writes_samplemask) + return V_028710_SPI_SHADER_32_ABGR; + else if (writes_stencil) + return V_028710_SPI_SHADER_32_GR; + else + return V_028710_SPI_SHADER_32_R; + } else if (writes_stencil || writes_samplemask) { + /* Both stencil and sample mask need only 16 bits. */ + return V_028710_SPI_SHADER_UINT16_ABGR; + } else { + return V_028710_SPI_SHADER_ZERO; + } +} + static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, LLVMValueRef depth, LLVMValueRef stencil, LLVMValueRef samplemask, struct si_ps_exports *exp) { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = &bld_base->base; struct lp_build_context *uint = &bld_base->uint_bld; LLVMValueRef args[9]; unsigned mask = 0; + unsigned format = si_get_spi_shader_z_format(depth != NULL, + stencil != NULL, + samplemask != NULL); assert(depth || stencil || samplemask); args[1] = uint->one; /* whether the EXEC mask is valid */ args[2] = uint->one; /* DONE bit */ /* Specify the target we are exporting */ args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ); args[4] = uint->zero; /* COMP flag */ args[5] = base->undef; /* R, depth */ args[6] = base->undef; /* G, stencil test value[0:7], stencil op value[8:15] */ args[7] = base->undef; /* B, sample mask */ args[8] = base->undef; /* A, alpha to mask */ - if (depth) { - args[5] = depth; - mask |= 0x1; - } - - if (stencil) { - args[6] = stencil; - mask |= 0x2; - } + if (format == V_028710_SPI_SHADER_UINT16_ABGR) { + assert(!depth); + args[4] = uint->one; /* COMPR flag */ - if (samplemask) { - args[7] = samplemask; - mask |= 0x4; + if (stencil) { + /* Stencil should be in X[23:16]. */ + stencil = bitcast(bld_base, TGSI_TYPE_UNSIGNED, stencil); + stencil = LLVMBuildShl(base->gallivm->builder, stencil, + LLVMConstInt(ctx->i32, 16, 0), ""); + args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, stencil); + mask |= 0x3; + } + if (samplemask) { + /* SampleMask should be in Y[15:0]. */ + args[6] = samplemask; + mask |= 0xc; + } + } else { + if (depth) { + args[5] = depth; + mask |= 0x1; + } + if (stencil) { + args[6] = stencil; + mask |= 0x2; + } + if (samplemask) { + args[7] = samplemask; + mask |= 0x4; + } } /* SI (except OLAND) has a bug that it only looks * at the X writemask component. */ if (ctx->screen->b.chip_class == SI && ctx->screen->b.family != CHIP_OLAND) mask |= 0x1; /* Specify which components to enable */ args[0] = lp_build_const_int32(base->gallivm, mask); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index fc1b22d..de4705d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -511,12 +511,14 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor, FILE *f); void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, struct si_shader_config *config, uint64_t scratch_va); void si_shader_binary_read_config(struct radeon_shader_binary *binary, struct si_shader_config *conf, unsigned symbol_offset); +unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil, + bool writes_samplemask); #endif diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b4f19fe..816aadc 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -741,24 +741,23 @@ static void si_shader_ps(struct si_shader *shader) shader->config.spi_ps_input_addr); /* Set interpolation controls. */ spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader)); /* Set registers. */ si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control); si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, - info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR : - info->writes_stencil ? V_028710_SPI_SHADER_32_GR : - info->writes_z ? V_028710_SPI_SHADER_32_R : - V_028710_SPI_SHADER_ZERO); + si_get_spi_shader_z_format(info->writes_z, + info->writes_stencil, + info->writes_samplemask)); si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, spi_shader_col_format); si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask); va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40); si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev